#!/usr/bin/env python3
"""Poll and download batch job GLBX-20260314-B3WC6RDDD4."""

import databento as db
import os
import time
import sys

API_KEY = "db-EvJpB5Wi3xUMPvE4gLhwXBQa5J7B9"
OUTPUT_DIR = os.path.expanduser("~/.openclaw/workspace/data/mbp10")
OUTPUT_PATH = os.path.join(OUTPUT_DIR, "ES_mbp10_20251024_20260228.dbn.zst")
JOB_ID = "GLBX-20260314-B3WC6RDDD4"

client = db.Historical(key=API_KEY)

# Cost was $139.08
print(f"Job ID: {JOB_ID}")
print(f"Estimated cost: $139.08")

# Poll for completion
print("\n=== Polling for completion ===")
while True:
    jobs = client.batch.list_jobs()
    # jobs might be list of dicts
    if isinstance(jobs, list) and len(jobs) > 0:
        if isinstance(jobs[0], dict):
            current = [j for j in jobs if j['id'] == JOB_ID]
        else:
            current = [j for j in jobs if j.id == JOB_ID]
        
        if not current:
            print(f"Job {JOB_ID} not found in list!")
            sys.exit(1)
        
        j = current[0]
        state = j['state'] if isinstance(j, dict) else j.state
        print(f"  State: {state} (checked at {time.strftime('%H:%M:%S')})")
        
        if state == "done":
            break
        elif state in ("expired", "failed"):
            print(f"ERROR: Job {state}")
            sys.exit(1)
    
    time.sleep(30)

# Download
print("\n=== Downloading ===")
files = client.batch.download(JOB_ID, output_dir=OUTPUT_DIR)
print(f"Downloaded {len(files)} file(s):")
for f in files:
    fsize = os.path.getsize(str(f))
    print(f"  {f} ({fsize / (1024**2):.0f} MB)")

# Find the main data file and rename
dbn_files = [f for f in files if str(f).endswith('.dbn.zst')]
if dbn_files:
    src = str(dbn_files[0])
    if src != OUTPUT_PATH:
        os.rename(src, OUTPUT_PATH)
        print(f"Renamed to {OUTPUT_PATH}")

# Verify
if os.path.exists(OUTPUT_PATH):
    file_size = os.path.getsize(OUTPUT_PATH)
    print(f"\n=== Verification ===")
    print(f"File size: {file_size / (1024**3):.2f} GB ({file_size / (1024**2):.0f} MB)")
    
    stored = db.DBNStore.from_file(OUTPUT_PATH)
    df = stored.to_df(count=5)
    print(f"First record: {df.index[0]}")
    
    # Read last few records (tail)
    # For large files, reading all into df is expensive. Let's just check first records.
    print(f"\n=== Download complete! ===")
    print("Note: Full verification of last record skipped for large file.")
    print("First 5 timestamps:")
    for ts in df.index[:5]:
        print(f"  {ts}")
