#!/usr/bin/env python3
"""Download MBP-10 data from Databento for ES.c.0, Oct 24 2025 - Feb 28 2026."""

import databento as db
import os

API_KEY = "db-EvJpB5Wi3xUMPvE4gLhwXBQa5J7B9"
OUTPUT_PATH = os.path.expanduser("~/.openclaw/workspace/data/mbp10/ES_mbp10_20251024_20260228.dbn.zst")

client = db.Historical(key=API_KEY)

# Step 1: Check cost
print("=== Checking cost ===")
cost = client.metadata.get_cost(
    dataset="GLBX.MDP3",
    symbols=["ES.c.0"],
    stype_in="continuous",
    schema="mbp-10",
    start="2025-10-24",
    end="2026-02-28",
)
print(f"Estimated cost: ${cost:.2f}")

# Step 2: Download data (streaming approach)
print("\n=== Starting download ===")
print("This may take a while for MBP-10 data...")

data = client.timeseries.get_range(
    dataset="GLBX.MDP3",
    symbols=["ES.c.0"],
    stype_in="continuous",
    schema="mbp-10",
    start="2025-10-24",
    end="2026-02-28",
)

# Save to file
print(f"Saving to {OUTPUT_PATH}...")
data.to_file(OUTPUT_PATH)

# Step 3: Verify
file_size = os.path.getsize(OUTPUT_PATH)
print(f"\n=== Verification ===")
print(f"File size: {file_size / (1024**3):.2f} GB ({file_size / (1024**2):.0f} MB)")

# Read back and check date range
print("Reading back file to verify date range...")
stored = db.DBNStore.from_file(OUTPUT_PATH)
df = stored.to_df(count=5)
print(f"\nFirst 5 records:")
print(df[['ts_event']].head())

# Get last records
df_last = stored.to_df()
print(f"\nTotal records: {len(df_last):,}")
print(f"Date range: {df_last.index.min()} to {df_last.index.max()}")
print(f"\nLast 5 records:")
print(df_last[['ts_event']].tail())

print("\n=== Download complete! ===")
