#!/usr/bin/env python3
"""Download completed batch job and verify."""

import databento as db
import os

API_KEY = "db-EvJpB5Wi3xUMPvE4gLhwXBQa5J7B9"
OUTPUT_DIR = os.path.expanduser("~/.openclaw/workspace/data/mbp10")
OUTPUT_PATH = os.path.join(OUTPUT_DIR, "ES_mbp10_20251024_20260228.dbn.zst")
JOB_ID = "GLBX-20260314-B3WC6RDDD4"

client = db.Historical(key=API_KEY)

print(f"Job: {JOB_ID}")
print(f"Records: 995,169,389")
print(f"Cost: $139.08")
print(f"Uncompressed: ~366 GB")

# List available files for this job
print("\n=== Listing files for job ===")
try:
    files_list = client.batch.list_files(JOB_ID)
    print(f"Files available: {files_list}")
except Exception as e:
    print(f"list_files error: {e}")

# Download
print("\n=== Downloading (this will take a while) ===")
files = client.batch.download(JOB_ID, output_dir=OUTPUT_DIR)
print(f"\nDownloaded {len(files)} file(s):")
for f in files:
    fpath = str(f)
    if os.path.exists(fpath):
        fsize = os.path.getsize(fpath)
        print(f"  {fpath} ({fsize / (1024**3):.2f} GB)")

# Find the dbn.zst file and rename
dbn_files = [str(f) for f in files if str(f).endswith('.dbn.zst')]
if dbn_files:
    src = dbn_files[0]
    if src != OUTPUT_PATH and os.path.exists(src):
        if os.path.exists(OUTPUT_PATH):
            os.remove(OUTPUT_PATH)
        os.rename(src, OUTPUT_PATH)
        print(f"\nRenamed to {OUTPUT_PATH}")

# Verify
target = OUTPUT_PATH if os.path.exists(OUTPUT_PATH) else (dbn_files[0] if dbn_files else None)
if target and os.path.exists(target):
    file_size = os.path.getsize(target)
    print(f"\n=== Verification ===")
    print(f"File: {target}")
    print(f"File size: {file_size / (1024**3):.2f} GB")
    
    print("\nReading first few records...")
    stored = db.DBNStore.from_file(target)
    df = stored.to_df(count=10)
    print(f"First record timestamp: {df.index[0]}")
    print(f"Schema: mbp-10")
    print(f"Columns: {list(df.columns)}")
    
    print("\n=== SUCCESS ===")
else:
    print("\nERROR: Could not find downloaded file")
    print("Files in output dir:")
    for root, dirs, fnames in os.walk(OUTPUT_DIR):
        for fn in fnames:
            fp = os.path.join(root, fn)
            print(f"  {fp} ({os.path.getsize(fp) / (1024**2):.0f} MB)")
