#!/usr/bin/env python3
"""Download MBP-10 data from Databento using batch API for large data."""

import databento as db
import os
import sys
import time

API_KEY = "db-EvJpB5Wi3xUMPvE4gLhwXBQa5J7B9"
OUTPUT_DIR = os.path.expanduser("~/.openclaw/workspace/data/mbp10")
OUTPUT_PATH = os.path.join(OUTPUT_DIR, "ES_mbp10_20251024_20260228.dbn.zst")

client = db.Historical(key=API_KEY)

# Step 1: Check cost
print("=== Checking cost ===")
cost = client.metadata.get_cost(
    dataset="GLBX.MDP3",
    symbols=["ES.c.0"],
    stype_in="continuous",
    schema="mbp-10",
    start="2025-10-24",
    end="2026-02-28",
)
print(f"Estimated cost: ${cost:.2f}")

# Step 2: Submit batch job
print("\n=== Submitting batch job ===")
job = client.batch.submit_job(
    dataset="GLBX.MDP3",
    symbols=["ES.c.0"],
    stype_in="continuous",
    schema="mbp-10",
    start="2025-10-24",
    end="2026-02-28",
    encoding="dbn",
    compression="zstd",
    delivery="download",

)
print(f"Job ID: {job.id}")
print(f"Job state: {job.state}")

# Step 3: Poll for completion
print("\n=== Polling for completion ===")
while True:
    jobs = client.batch.list_jobs()
    current = [j for j in jobs if j.id == job.id][0]
    print(f"  State: {current.state} (checked at {time.strftime('%H:%M:%S')})")
    if current.state == "done":
        break
    elif current.state == "expired" or current.state == "failed":
        print(f"ERROR: Job {current.state}")
        sys.exit(1)
    time.sleep(30)

# Step 4: Download files
print("\n=== Downloading files ===")
files = client.batch.download(job.id, output_dir=OUTPUT_DIR)
print(f"Downloaded {len(files)} file(s):")
for f in files:
    print(f"  {f}")

# Rename if needed (batch download may use different naming)
downloaded = files[0] if files else None
if downloaded and str(downloaded) != OUTPUT_PATH:
    # Check if it's a .dbn.zst file
    if str(downloaded).endswith('.dbn.zst'):
        os.rename(str(downloaded), OUTPUT_PATH)
        print(f"Renamed to {OUTPUT_PATH}")

# Step 5: Verify
if os.path.exists(OUTPUT_PATH):
    file_size = os.path.getsize(OUTPUT_PATH)
    print(f"\n=== Verification ===")
    print(f"File: {OUTPUT_PATH}")
    print(f"File size: {file_size / (1024**3):.2f} GB ({file_size / (1024**2):.0f} MB)")
    
    print("\nReading first/last records to verify date range...")
    stored = db.DBNStore.from_file(OUTPUT_PATH)
    df = stored.to_df(count=5)
    print(f"\nFirst records timestamp: {df.index[0]}")
    
    # For last records, read tail
    df_all = stored.to_df()
    print(f"Last records timestamp: {df_all.index[-1]}")
    print(f"Total records: {len(df_all):,}")
    
    print("\n=== Download complete! ===")
else:
    # Maybe the file is in a subfolder from batch
    print(f"\nLooking for downloaded files in {OUTPUT_DIR}...")
    for root, dirs, fnames in os.walk(OUTPUT_DIR):
        for fname in fnames:
            fpath = os.path.join(root, fname)
            fsize = os.path.getsize(fpath)
            print(f"  {fpath} ({fsize / (1024**2):.0f} MB)")