#!/usr/bin/env python3
"""Download historical options OI data from Theta Data - v2 with incremental saves."""

import json
import os
import sys
import time
import urllib.request
from datetime import datetime, timedelta, date
from collections import defaultdict

BASE_URL = "http://localhost:25503/v3"
OUTPUT_DIR = "/Users/lutherbot/.openclaw/workspace/data/stock_gamma_history"
SYMBOLS = ["TSLA", "PLTR", "ARM", "MSTR", "COIN", "SOFI", "CRWD", "AMD", "SMCI", "AAPL"]
START_DATE = date(2025, 3, 1)
END_DATE = date(2026, 3, 17)
REQUEST_DELAY = 0.6

def api_get(endpoint, params):
    param_str = "&".join(f"{k}={v}" for k, v in params.items())
    url = f"{BASE_URL}/{endpoint}?{param_str}&format=json"
    try:
        req = urllib.request.Request(url)
        req.add_header('Accept', 'application/json')
        with urllib.request.urlopen(req, timeout=120) as resp:
            return json.loads(resp.read().decode())
    except Exception as e:
        print(f"  ERROR: {url} -> {e}", flush=True)
        return None

def get_expirations(symbol):
    data = api_get("option/list/expirations", {"symbol": symbol})
    if not data or "response" not in data:
        return []
    exps = []
    for item in data["response"]:
        try:
            exps.append(datetime.strptime(item["expiration"], "%Y-%m-%d").date())
        except:
            continue
    return sorted(exps)

def get_trading_dates(start, end):
    dates = []
    current = start
    while current <= end:
        if current.weekday() < 5:
            dates.append(current)
        current += timedelta(days=1)
    return dates

def get_nearest_expirations(trading_date, all_expirations, n=3):
    future_exps = [e for e in all_expirations if e >= trading_date]
    return future_exps[:n]

def download_symbol(symbol):
    output_path = os.path.join(OUTPUT_DIR, f"{symbol}_oi_history.json")
    
    # Check if already completed
    if os.path.exists(output_path):
        try:
            with open(output_path) as f:
                existing = json.load(f)
            if existing.get("complete") and existing.get("dates_covered", 0) > 200:
                print(f"  {symbol} already complete ({existing['dates_covered']} dates), skipping", flush=True)
                return existing['dates_covered']
        except:
            pass
    
    print(f"\n{'='*60}", flush=True)
    print(f"DOWNLOADING: {symbol}", flush=True)
    print(f"{'='*60}", flush=True)
    
    all_exps = get_expirations(symbol)
    if not all_exps:
        print(f"  No expirations found for {symbol}", flush=True)
        return 0
    
    relevant_exps = [e for e in all_exps if e >= START_DATE]
    print(f"  {len(relevant_exps)} relevant expirations", flush=True)
    
    trading_dates = get_trading_dates(START_DATE, END_DATE)
    
    # Build date -> nearest expirations mapping
    date_to_exps = {}
    needed_exps = set()
    for td in trading_dates:
        nearest = get_nearest_expirations(td, relevant_exps, n=3)
        date_to_exps[td] = nearest
        for exp in nearest:
            needed_exps.add(exp)
    
    print(f"  {len(needed_exps)} unique expirations to query", flush=True)
    
    # For each expiration, find the date range we need it for
    exp_ranges = {}
    for exp in sorted(needed_exps):
        dates_using = [td for td, exps in date_to_exps.items() if exp in exps]
        if dates_using:
            exp_ranges[exp] = (min(dates_using), max(dates_using))
    
    # Process: for each expiration, query OI, immediately process into daily structure
    # daily_data[date_str] = {expirations_used: [...], contracts: [...]}
    daily_data = defaultdict(lambda: {"expirations_used": [], "contracts": []})
    
    query_count = 0
    total_exps = len(exp_ranges)
    
    for idx, exp in enumerate(sorted(exp_ranges.keys())):
        exp_start, exp_end = exp_ranges[exp]
        exp_str = exp.strftime("%Y-%m-%d")
        
        # Query in monthly chunks
        current = exp_start.replace(day=1)
        while current <= exp_end:
            q_start = max(current, exp_start)
            if current.month == 12:
                next_month = current.replace(year=current.year + 1, month=1)
            else:
                next_month = current.replace(month=current.month + 1)
            q_end = min(next_month - timedelta(days=1), exp_end)
            current = next_month
            
            if q_start > q_end:
                continue
            
            time.sleep(REQUEST_DELAY)
            result = api_get("option/history/open_interest", {
                "symbol": symbol,
                "expiration": exp_str,
                "start_date": q_start.strftime("%Y-%m-%d"),
                "end_date": q_end.strftime("%Y-%m-%d")
            })
            query_count += 1
            
            if not result or "response" not in result:
                continue
            
            # Process immediately - don't store raw
            for item in result["response"]:
                contract = item.get("contract", {})
                for dp in item.get("data", []):
                    ts = dp.get("timestamp", "")
                    dp_date = ts[:10]
                    oi = dp.get("open_interest", 0)
                    if oi > 0:
                        daily_data[dp_date]["contracts"].append({
                            "strike": contract.get("strike", 0),
                            "right": contract.get("right", ""),
                            "expiration": contract.get("expiration", ""),
                            "open_interest": oi
                        })
            
            # Free memory
            del result
        
        if (idx + 1) % 5 == 0 or idx == total_exps - 1:
            print(f"  [{symbol}] {idx+1}/{total_exps} expirations done, {query_count} queries, {len(daily_data)} dates", flush=True)
    
    # Add expirations_used for each date
    for td in trading_dates:
        td_str = td.strftime("%Y-%m-%d")
        if td_str in daily_data:
            daily_data[td_str]["expirations_used"] = [e.strftime("%Y-%m-%d") for e in date_to_exps[td]]
    
    # Save
    dates_with_data = len([d for d in daily_data if daily_data[d]["contracts"]])
    output = {
        "symbol": symbol,
        "download_date": "2026-03-18",
        "dates_covered": dates_with_data,
        "complete": True,
        "data": dict(daily_data)
    }
    
    with open(output_path, 'w') as f:
        json.dump(output, f)
    
    file_size = os.path.getsize(output_path) / (1024 * 1024)
    print(f"  ✓ {symbol}: {dates_with_data} dates, {file_size:.1f} MB, {query_count} queries", flush=True)
    return dates_with_data

def main():
    print("Theta Data Options OI Download v2", flush=True)
    print(f"Symbols: {', '.join(SYMBOLS)}", flush=True)
    print(f"Range: {START_DATE} to {END_DATE}", flush=True)
    
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    
    results = {}
    for symbol in SYMBOLS:
        try:
            count = download_symbol(symbol)
            results[symbol] = count
        except Exception as e:
            print(f"  FAILED: {symbol} -> {e}", flush=True)
            import traceback
            traceback.print_exc()
            results[symbol] = 0
    
    print("\nSUMMARY:", flush=True)
    for sym, count in results.items():
        status = "✓" if count > 200 else "⚠"
        print(f"  {status} {sym}: {count} days", flush=True)

if __name__ == "__main__":
    main()
