#!/usr/bin/env python3
"""
Gamma Tilt by Participant Type Backtest
========================================
Tests whether specific participant types (MM, Customer, Firm, ProCustomer, BrokerDealer)
produce more predictive gamma tilt signals than the combined total.

TILT DEFINITION (matching gamma shift v5):
  Tilt = |negative_gamma above spot| / total |negative_gamma|
  Higher tilt → more short gamma above spot → bullish (dealers hedge by buying)
"""

import json
import os
import warnings
import numpy as np
import pandas as pd
from datetime import datetime, time as dtime, timedelta
from scipy import stats

warnings.filterwarnings('ignore')

WORKSPACE = '/Users/lutherbot/.openclaw/workspace'
TRACE_DIR = os.path.join(WORKSPACE, 'data/trace_uncorrupted')
ES_FILE = os.path.join(WORKSPACE, 'data/es_1min_delta_bars.csv')
FOMC_FILE = os.path.join(WORKSPACE, 'data/fomc_dates.json')
OUTPUT_FILE = os.path.join(WORKSPACE, 'data/gamma_tilt_participant_results.json')

CORRUPT_START = pd.Timestamp('2025-10-27')
CORRUPT_END = pd.Timestamp('2026-02-17')

# Participant gamma columns (non-0DTE + 0DTE)
PARTICIPANTS = ['mm', 'cust', 'firm', 'procust', 'bd']
PARTICIPANT_LABELS = {'mm': 'MM', 'cust': 'Customer', 'firm': 'Firm', 'procust': 'ProCust', 'bd': 'BrokerDealer'}


def load_fomc_dates():
    with open(FOMC_FILE) as f:
        data = json.load(f)
    if isinstance(data, dict) and 'dates' in data:
        dates = data['dates']
    elif isinstance(data, list):
        dates = data
    else:
        dates = []
        for v in data.values():
            if isinstance(v, list): dates.extend(v)
    return set(str(pd.Timestamp(d).date()) for d in dates)


def load_es_bars():
    df = pd.read_csv(ES_FILE, parse_dates=['timestamp'])
    df = df.sort_values('timestamp').reset_index(drop=True)
    df['date'] = df['timestamp'].dt.date
    return df


def compute_neg_gamma_tilt(snap, spot_price, participant=None):
    """
    Compute negative gamma tilt for a participant or total.
    Tilt = |neg_gamma above spot| / total |neg_gamma|
    
    If participant is None, compute for total (all participants combined).
    """
    gamma_cols = [f'{p}_gamma' for p in PARTICIPANTS]
    gamma0_cols = [f'{p}_gamma_0' for p in PARTICIPANTS]
    
    if participant is None:
        # Total gamma across all participants
        gamma = sum(snap[c] for c in gamma_cols) + sum(snap[c] for c in gamma0_cols)
    else:
        gamma = snap[f'{participant}_gamma'] + snap[f'{participant}_gamma_0']
    
    # Negative gamma (clamp to 0 then abs)
    neg_gamma = gamma.clip(upper=0).abs()
    
    above = neg_gamma[snap['strike_price'] > spot_price].sum()
    total = neg_gamma.sum()
    
    if total > 0:
        return float(above / total)
    return np.nan


def process_all_days():
    """Process all TRACE files and compute tilts + forward returns."""
    print("Loading ES bars...")
    es_df = load_es_bars()
    
    print("Loading FOMC dates...")
    fomc_dates = load_fomc_dates()
    print(f"  {len(fomc_dates)} FOMC dates")
    
    files = sorted([f for f in os.listdir(TRACE_DIR) if f.endswith('.parquet')])
    print(f"  {len(files)} TRACE files")
    
    results = []
    skipped = {'corrupt': 0, 'fomc': 0, 'no_morning': 0, 'no_es': 0, 'low_vol': 0}
    
    for fname in files:
        date_str = fname.replace('intradayStrikeGEX_', '').replace('.parquet', '')
        date = pd.Timestamp(date_str)
        
        if CORRUPT_START <= date <= CORRUPT_END:
            skipped['corrupt'] += 1
            continue
        
        if date_str in fomc_dates:
            skipped['fomc'] += 1
            continue
        
        # Load TRACE
        df = pd.read_parquet(os.path.join(TRACE_DIR, fname))
        
        # Morning snapshots (9:30-11:00 ET)
        morning = df[
            ((df['timestamp'].dt.hour == 9) & (df['timestamp'].dt.minute >= 30)) |
            ((df['timestamp'].dt.hour == 10)) |
            ((df['timestamp'].dt.hour == 11) & (df['timestamp'].dt.minute == 0))
        ]
        
        if len(morning) == 0:
            skipped['no_morning'] += 1
            continue
        
        # Find snapshot closest to 10:00 ET
        target = pd.Timestamp(f'{date_str} 10:00:00', tz='America/New_York')
        timestamps = morning['timestamp'].unique()
        best_ts = min(timestamps, key=lambda x: abs(x - target))
        snap = morning[morning['timestamp'] == best_ts].copy()
        
        if len(snap) == 0:
            skipped['no_morning'] += 1
            continue
        
        # Get ES spot price
        date_es = es_df[es_df['date'] == date.date()]
        if len(date_es) == 0:
            skipped['no_es'] += 1
            continue
        
        day_volume = date_es['volume'].sum()
        if day_volume < 100000:
            skipped['low_vol'] += 1
            continue
        
        # Convert signal time to UTC for matching ES bars
        signal_hour_et = best_ts.hour
        signal_min_et = best_ts.minute
        
        spot = None
        for offset in [4, 5]:
            utc_hour = signal_hour_et + offset
            target_bar = pd.Timestamp(f'{date_str} {utc_hour:02d}:{signal_min_et:02d}:00')
            nearby = date_es[(date_es['timestamp'] >= target_bar - pd.Timedelta(minutes=10)) &
                           (date_es['timestamp'] <= target_bar + pd.Timedelta(minutes=10))]
            if len(nearby) > 0:
                spot = nearby.iloc[len(nearby)//2]['close']
                break
        
        if spot is None:
            # Fallback: use median strike
            spot = snap['strike_price'].median()
        
        # Compute tilt for each participant and total
        row = {'date': date_str, 'spot': float(spot), 'signal_time': str(best_ts)}
        
        row['total_tilt'] = compute_neg_gamma_tilt(snap, spot, participant=None)
        for p in PARTICIPANTS:
            row[f'{p}_tilt'] = compute_neg_gamma_tilt(snap, spot, participant=p)
        
        # Forward returns from signal time
        # Find ES bar at signal time
        for offset in [4, 5]:
            utc_hour = signal_hour_et + offset
            target_bar = pd.Timestamp(f'{date_str} {utc_hour:02d}:{signal_min_et:02d}:00')
            mask = (date_es['timestamp'] >= target_bar - pd.Timedelta(minutes=2)) & \
                   (date_es['timestamp'] <= target_bar + pd.Timedelta(minutes=2))
            if len(date_es[mask]) > 0:
                break
        
        if len(date_es[mask]) == 0:
            skipped['no_es'] += 1
            continue
        
        signal_idx = date_es[mask].index[0]
        signal_price = date_es.loc[signal_idx, 'close']
        
        # 1H forward
        future_1h = date_es[date_es.index >= signal_idx + 60]
        row['ret_1h'] = float((future_1h.iloc[0]['close'] - signal_price) / signal_price * 10000) if len(future_1h) > 0 else np.nan
        
        # 3H forward
        future_3h = date_es[date_es.index >= signal_idx + 180]
        row['ret_3h'] = float((future_3h.iloc[0]['close'] - signal_price) / signal_price * 10000) if len(future_3h) > 0 else np.nan
        
        # Close return (rest of day)
        day_close_bars = date_es[date_es.index >= signal_idx + 300]
        if len(day_close_bars) > 0:
            row['ret_close'] = float((day_close_bars.iloc[-1]['close'] - signal_price) / signal_price * 10000)
        else:
            last_bar = date_es.iloc[-1]
            row['ret_close'] = float((last_bar['close'] - signal_price) / signal_price * 10000)
        
        results.append(row)
    
    print(f"\nProcessed: {len(results)} days")
    print(f"Skipped: {skipped}")
    return pd.DataFrame(results)


def ic(tilts, rets):
    mask = ~(np.isnan(tilts) | np.isnan(rets))
    if mask.sum() < 20:
        return np.nan, np.nan
    return stats.spearmanr(tilts[mask], rets[mask])


def wr(tilts, rets, thresh):
    mask = tilts > thresh
    r = rets[mask]
    r = r[~np.isnan(r)]
    if len(r) < 5:
        return np.nan, len(r)
    return float((r > 0).mean()), len(r)


def run_analysis(df):
    results = {}
    tilt_cols = ['total_tilt'] + [f'{p}_tilt' for p in PARTICIPANTS]
    ret_cols = [('ret_1h', '1H'), ('ret_3h', '3H'), ('ret_close', 'Close')]
    
    n = len(df)
    split = int(n * 0.6)
    is_df = df.iloc[:split]
    oos_df = df.iloc[split:]
    
    print(f"\n{'='*80}")
    print(f"GAMMA TILT BY PARTICIPANT TYPE — NEG GAMMA TILT")
    print(f"{'='*80}")
    print(f"Total days: {n}, IS: {len(is_df)} ({is_df['date'].iloc[0]} → {is_df['date'].iloc[-1]})")
    print(f"OOS: {len(oos_df)} ({oos_df['date'].iloc[0]} → {oos_df['date'].iloc[-1]})")
    
    # ==========================================
    # 1. TILT STATISTICS
    # ==========================================
    print(f"\n{'='*80}")
    print("1. TILT DISTRIBUTION BY PARTICIPANT")
    print(f"{'='*80}")
    
    tilt_stats = {}
    for col in tilt_cols:
        v = df[col].dropna()
        s = {
            'mean': round(float(v.mean()), 3),
            'std': round(float(v.std()), 3),
            'q25': round(float(v.quantile(0.25)), 3),
            'median': round(float(v.median()), 3),
            'q75': round(float(v.quantile(0.75)), 3),
            'pct_gt_75': round(float((v > 0.75).mean()) * 100, 1),
            'pct_gt_85': round(float((v > 0.85).mean()) * 100, 1),
        }
        tilt_stats[col] = s
        label = PARTICIPANT_LABELS.get(col.replace('_tilt', ''), col)
        print(f"  {label:<12} mean={s['mean']:.3f} std={s['std']:.3f} "
              f"Q25={s['q25']:.3f} med={s['median']:.3f} Q75={s['q75']:.3f} "
              f">75%:{s['pct_gt_75']:.1f}% >85%:{s['pct_gt_85']:.1f}%")
    
    results['tilt_stats'] = tilt_stats
    
    # ==========================================
    # 2. IC COMPARISON
    # ==========================================
    print(f"\n{'='*80}")
    print("2. INFORMATION COEFFICIENT (IC) — TILT vs FORWARD RETURN")
    print(f"{'='*80}")
    
    ic_results = {}
    for rc, rl in ret_cols:
        print(f"\n--- {rl} Forward ---")
        print(f"  {'Participant':<14} {'IS IC':>8} {'IS p':>8} {'OOS IC':>8} {'OOS p':>8} {'Full IC':>8} {'Consistent':>10}")
        print(f"  {'-'*70}")
        
        for col in tilt_cols:
            is_ic, is_p = ic(is_df[col].values, is_df[rc].values)
            oos_ic, oos_p = ic(oos_df[col].values, oos_df[rc].values)
            full_ic, full_p = ic(df[col].values, df[rc].values)
            consistent = (not np.isnan(is_ic)) and (not np.isnan(oos_ic)) and (np.sign(is_ic) == np.sign(oos_ic))
            
            label = PARTICIPANT_LABELS.get(col.replace('_tilt', ''), col)
            print(f"  {label:<14} {is_ic:>8.4f} {is_p:>8.4f} {oos_ic:>8.4f} {oos_p:>8.4f} {full_ic:>8.4f} {'YES' if consistent else 'NO':>10}")
            
            key = f'{col}_{rl}'
            ic_results[key] = {
                'is_ic': round(float(is_ic), 4) if not np.isnan(is_ic) else None,
                'is_p': round(float(is_p), 4) if not np.isnan(is_p) else None,
                'oos_ic': round(float(oos_ic), 4) if not np.isnan(oos_ic) else None,
                'oos_p': round(float(oos_p), 4) if not np.isnan(oos_p) else None,
                'full_ic': round(float(full_ic), 4) if not np.isnan(full_ic) else None,
                'consistent': consistent,
            }
    
    results['ic_comparison'] = ic_results
    
    # ==========================================
    # 3. WIN RATE AT THRESHOLDS
    # ==========================================
    print(f"\n{'='*80}")
    print("3. WIN RATE AT TILT THRESHOLDS")
    print(f"{'='*80}")
    
    wr_results = {}
    for rc, rl in ret_cols:
        print(f"\n--- {rl} Forward ---")
        for thresh in [0.60, 0.65, 0.70, 0.75, 0.80, 0.85]:
            print(f"\n  Tilt > {thresh*100:.0f}%:")
            print(f"  {'Participant':<14} {'Full WR':>8} {'N':>5} {'IS WR':>8} {'IS N':>5} {'OOS WR':>8} {'OOS N':>5}")
            print(f"  {'-'*60}")
            
            for col in tilt_cols:
                f_wr, f_n = wr(df[col].values, df[rc].values, thresh)
                i_wr, i_n = wr(is_df[col].values, is_df[rc].values, thresh)
                o_wr, o_n = wr(oos_df[col].values, oos_df[rc].values, thresh)
                
                label = PARTICIPANT_LABELS.get(col.replace('_tilt', ''), col)
                f_s = f"{f_wr*100:.1f}%" if not np.isnan(f_wr) else "N/A"
                i_s = f"{i_wr*100:.1f}%" if not np.isnan(i_wr) else "N/A"
                o_s = f"{o_wr*100:.1f}%" if not np.isnan(o_wr) else "N/A"
                print(f"  {label:<14} {f_s:>8} {f_n:>5} {i_s:>8} {i_n:>5} {o_s:>8} {o_n:>5}")
                
                wr_results[f'{col}_{rl}_{thresh}'] = {
                    'full_wr': round(float(f_wr), 4) if not np.isnan(f_wr) else None,
                    'full_n': f_n,
                    'is_wr': round(float(i_wr), 4) if not np.isnan(i_wr) else None,
                    'is_n': i_n,
                    'oos_wr': round(float(o_wr), 4) if not np.isnan(o_wr) else None,
                    'oos_n': o_n,
                }
    
    results['wr_thresholds'] = wr_results
    
    # ==========================================
    # 4. CORRELATION MATRIX
    # ==========================================
    print(f"\n{'='*80}")
    print("4. PARTICIPANT TILT CORRELATION MATRIX")
    print(f"{'='*80}")
    
    corr = df[tilt_cols].corr()
    # Rename for display
    labels = {c: PARTICIPANT_LABELS.get(c.replace('_tilt', ''), c) for c in tilt_cols}
    display_corr = corr.rename(index=labels, columns=labels)
    print(display_corr.round(3).to_string())
    results['correlation_matrix'] = corr.round(4).to_dict()
    
    # ==========================================
    # 5. AGREEMENT/DISAGREEMENT
    # ==========================================
    print(f"\n{'='*80}")
    print("5. AGREEMENT/DISAGREEMENT ANALYSIS")
    print(f"{'='*80}")
    
    agree_results = {}
    for rc, rl in [('ret_1h', '1H'), ('ret_3h', '3H')]:
        print(f"\n--- {rl} Forward ---")
        
        scenarios = {
            'mm_AND_cust_both>75': (df['mm_tilt'] > 0.75) & (df['cust_tilt'] > 0.75),
            'mm>75_cust<50': (df['mm_tilt'] > 0.75) & (df['cust_tilt'] < 0.50),
            'total>75_AND_mm>75': (df['total_tilt'] > 0.75) & (df['mm_tilt'] > 0.75),
            'total>75_mm<60': (df['total_tilt'] > 0.75) & (df['mm_tilt'] < 0.60),
            'mm>60_cust>60_firm>60': (df['mm_tilt'] > 0.60) & (df['cust_tilt'] > 0.60) & (df['firm_tilt'] > 0.60),
            'mm>70_AND_total>70': (df['mm_tilt'] > 0.70) & (df['total_tilt'] > 0.70),
            'mm>65': df['mm_tilt'] > 0.65,
            'mm>70': df['mm_tilt'] > 0.70,
            'total>65': df['total_tilt'] > 0.65,
            'total>70': df['total_tilt'] > 0.70,
        }
        
        for name, mask in scenarios.items():
            rets = df.loc[mask, rc].dropna()
            w = (rets > 0).mean() if len(rets) >= 3 else np.nan
            m = rets.mean() if len(rets) >= 3 else np.nan
            w_str = f"{w*100:.1f}%" if not np.isnan(w) else "N/A"
            m_str = f"{m:.1f}" if not np.isnan(m) else "N/A"
            print(f"  {name:<30} WR={w_str:>7} Mean={m_str:>7}bps N={len(rets)}")
            
            agree_results[f'{name}_{rl}'] = {
                'wr': round(float(w), 4) if not np.isnan(w) else None,
                'mean_bps': round(float(m), 2) if not np.isnan(m) else None,
                'n': len(rets),
            }
    
    results['agreement_disagreement'] = agree_results
    
    # ==========================================
    # 6. QUINTILE ANALYSIS
    # ==========================================
    print(f"\n{'='*80}")
    print("6. QUINTILE ANALYSIS")
    print(f"{'='*80}")
    
    quint_results = {}
    for rc, rl in [('ret_1h', '1H'), ('ret_3h', '3H')]:
        print(f"\n--- {rl} Forward ---")
        for col in tilt_cols:
            valid = df[[col, rc]].dropna()
            if len(valid) < 50:
                continue
            try:
                valid['q'] = pd.qcut(valid[col], 5, labels=[1,2,3,4,5], duplicates='drop')
            except ValueError:
                continue
            if valid['q'].nunique() < 4:
                continue
            
            qs = valid.groupby('q')[rc].agg(['mean', 'count', lambda x: (x > 0).mean()])
            qs.columns = ['mean_ret', 'n', 'wr']
            
            means = qs['mean_ret'].values
            mono = all(means[i] <= means[i+1] for i in range(len(means)-1))
            spread = float(qs.iloc[-1]['mean_ret'] - qs.iloc[0]['mean_ret'])
            
            label = PARTICIPANT_LABELS.get(col.replace('_tilt', ''), col)
            print(f"\n  {label}:")
            for q_idx in qs.index:
                r = qs.loc[q_idx]
                print(f"    Q{q_idx}: Mean={r['mean_ret']:>7.1f}bps WR={r['wr']*100:>5.1f}% N={int(r['n'])}")
            print(f"    Q5-Q1: {spread:>+.1f}bps  Monotonic: {'YES' if mono else 'NO'}")
            
            quint_results[f'{col}_{rl}'] = {
                'spread_bps': round(spread, 2),
                'monotonic': mono,
                'quintiles': {str(int(k)): {'mean': round(float(v['mean_ret']), 2), 'wr': round(float(v['wr']), 4), 'n': int(v['n'])} for k, v in qs.iterrows()},
            }
    
    results['quintile_analysis'] = quint_results
    
    # ==========================================
    # 7. CUSTOMER/FIRM CONTRARIAN
    # ==========================================
    print(f"\n{'='*80}")
    print("7. CONTRARIAN ANALYSIS (CUSTOMER & FIRM)")
    print(f"{'='*80}")
    
    contrarian = {}
    for rc, rl in [('ret_1h', '1H'), ('ret_3h', '3H')]:
        print(f"\n--- {rl} Forward ---")
        for p in ['cust', 'firm', 'procust']:
            col = f'{p}_tilt'
            label = PARTICIPANT_LABELS.get(p, p)
            
            for thresh_label, mask in [('High >75%', df[col] > 0.75), ('High >70%', df[col] > 0.70),
                                        ('Low <30%', df[col] < 0.30), ('Low <25%', df[col] < 0.25)]:
                rets = df.loc[mask, rc].dropna()
                w = (rets > 0).mean() if len(rets) >= 5 else np.nan
                m = rets.mean() if len(rets) >= 5 else np.nan
                w_str = f"{w*100:.1f}%" if not np.isnan(w) else "N/A"
                m_str = f"{m:.1f}" if not np.isnan(m) else "N/A"
                print(f"  {label} {thresh_label:<12} WR={w_str:>7} Mean={m_str:>7}bps N={len(rets)}")
                
                contrarian[f'{p}_{thresh_label}_{rl}'] = {
                    'wr': round(float(w), 4) if not np.isnan(w) else None,
                    'mean_bps': round(float(m), 2) if not np.isnan(m) else None,
                    'n': len(rets),
                }
    
    results['contrarian_analysis'] = contrarian
    
    # ==========================================
    # 8. WEIGHTED COMBINATIONS
    # ==========================================
    print(f"\n{'='*80}")
    print("8. WEIGHTED PARTICIPANT COMBINATIONS vs TOTAL BASELINE")
    print(f"{'='*80}")
    
    combo_results = {}
    schemes = {
        'total_baseline': {'total_tilt': 1.0},
        'equal_5': {'mm_tilt': 0.2, 'cust_tilt': 0.2, 'firm_tilt': 0.2, 'procust_tilt': 0.2, 'bd_tilt': 0.2},
        'mm_heavy': {'mm_tilt': 0.5, 'cust_tilt': 0.15, 'firm_tilt': 0.15, 'procust_tilt': 0.1, 'bd_tilt': 0.1},
        'mm_only': {'mm_tilt': 1.0},
        'mm+procust': {'mm_tilt': 0.6, 'procust_tilt': 0.4},
        'no_customer': {'mm_tilt': 0.4, 'firm_tilt': 0.25, 'procust_tilt': 0.25, 'bd_tilt': 0.1},
        'cust_contrarian': {'mm_tilt': 0.5, 'cust_tilt': -0.2, 'firm_tilt': 0.3, 'procust_tilt': 0.2, 'bd_tilt': 0.2},
        'mm+bd': {'mm_tilt': 0.6, 'bd_tilt': 0.4},
    }
    
    for rc, rl in [('ret_1h', '1H'), ('ret_3h', '3H')]:
        print(f"\n--- {rl} Forward ---")
        print(f"  {'Scheme':<20} {'IS IC':>8} {'OOS IC':>8} {'Full IC':>8} {'WR>70':>8} {'N>70':>5} {'WR>65':>8} {'N>65':>5}")
        print(f"  {'-'*75}")
        
        for name, weights in schemes.items():
            weighted = sum(w * df[col].fillna(0.5) for col, w in weights.items())
            
            is_w = weighted.iloc[:split]
            oos_w = weighted.iloc[split:]
            
            i_ic, _ = ic(is_w.values, is_df[rc].values)
            o_ic, _ = ic(oos_w.values, oos_df[rc].values)
            f_ic, _ = ic(weighted.values, df[rc].values)
            
            w70, n70 = wr(weighted.values, df[rc].values, 0.70)
            w65, n65 = wr(weighted.values, df[rc].values, 0.65)
            
            w70s = f"{w70*100:.1f}%" if not np.isnan(w70) else "N/A"
            w65s = f"{w65*100:.1f}%" if not np.isnan(w65) else "N/A"
            
            print(f"  {name:<20} {i_ic:>8.4f} {o_ic:>8.4f} {f_ic:>8.4f} {w70s:>8} {n70:>5} {w65s:>8} {n65:>5}")
            
            combo_results[f'{name}_{rl}'] = {
                'is_ic': round(float(i_ic), 4) if not np.isnan(i_ic) else None,
                'oos_ic': round(float(o_ic), 4) if not np.isnan(o_ic) else None,
                'full_ic': round(float(f_ic), 4) if not np.isnan(f_ic) else None,
                'wr_70': round(float(w70), 4) if not np.isnan(w70) else None,
                'n_70': n70,
                'wr_65': round(float(w65), 4) if not np.isnan(w65) else None,
                'n_65': n65,
            }
    
    results['weighted_combinations'] = combo_results
    
    # ==========================================
    # 9. CONDITIONAL: TOTAL >70%, WHO DRIVES?
    # ==========================================
    print(f"\n{'='*80}")
    print("9. WHEN TOTAL TILT HIGH: WHICH PARTICIPANT PREDICTS SUCCESS?")
    print(f"{'='*80}")
    
    cond_results = {}
    for total_thresh in [0.65, 0.70, 0.75]:
        high = df[df['total_tilt'] > total_thresh]
        print(f"\n  Total tilt > {total_thresh*100:.0f}%: {len(high)} days")
        
        if len(high) < 10:
            continue
        
        for rc, rl in [('ret_1h', '1H'), ('ret_3h', '3H')]:
            rets_all = high[rc].dropna()
            base_wr = (rets_all > 0).mean() if len(rets_all) > 0 else np.nan
            base_mean = rets_all.mean() if len(rets_all) > 0 else np.nan
            print(f"\n    {rl}: Base WR={base_wr*100:.1f}%, Mean={base_mean:.1f}bps, N={len(rets_all)}")
            
            for p in PARTICIPANTS:
                col = f'{p}_tilt'
                label = PARTICIPANT_LABELS.get(p, p)
                
                # Split: participant also high vs not
                p_high = high[high[col] > total_thresh]
                p_low = high[high[col] <= total_thresh]
                
                r_ph = p_high[rc].dropna()
                r_pl = p_low[rc].dropna()
                
                wr_ph = (r_ph > 0).mean() if len(r_ph) >= 3 else np.nan
                wr_pl = (r_pl > 0).mean() if len(r_pl) >= 3 else np.nan
                
                wr_ph_s = f"{wr_ph*100:.1f}%" if not np.isnan(wr_ph) else "N/A"
                wr_pl_s = f"{wr_pl*100:.1f}%" if not np.isnan(wr_pl) else "N/A"
                
                print(f"      {label:<12} also>{total_thresh*100:.0f}%: WR={wr_ph_s:>7} N={len(r_ph):>3}  |  not>{total_thresh*100:.0f}%: WR={wr_pl_s:>7} N={len(r_pl):>3}")
                
                cond_results[f'{p}_{total_thresh}_{rl}'] = {
                    'high_wr': round(float(wr_ph), 4) if not np.isnan(wr_ph) else None,
                    'high_n': len(r_ph),
                    'low_wr': round(float(wr_pl), 4) if not np.isnan(wr_pl) else None,
                    'low_n': len(r_pl),
                }
    
    results['conditional_breakdown'] = cond_results
    
    # ==========================================
    # 10. MM TILT TOP DECILE DEEP DIVE
    # ==========================================
    print(f"\n{'='*80}")
    print("10. MM TILT TOP DECILE DEEP DIVE")
    print(f"{'='*80}")
    
    mm_thresh = df['mm_tilt'].quantile(0.90)
    print(f"  MM tilt 90th percentile: {mm_thresh:.3f}")
    
    mm_top = df[df['mm_tilt'] >= mm_thresh]
    print(f"  Days in top decile: {len(mm_top)}")
    
    for rc, rl in [('ret_1h', '1H'), ('ret_3h', '3H'), ('ret_close', 'Close')]:
        rets = mm_top[rc].dropna()
        w = (rets > 0).mean() if len(rets) >= 3 else np.nan
        m = rets.mean() if len(rets) > 0 else np.nan
        print(f"  {rl}: WR={w*100:.1f}%, Mean={m:.1f}bps, N={len(rets)}")
    
    # Same for total tilt top decile
    total_thresh = df['total_tilt'].quantile(0.90)
    print(f"\n  Total tilt 90th percentile: {total_thresh:.3f}")
    total_top = df[df['total_tilt'] >= total_thresh]
    print(f"  Days in top decile: {len(total_top)}")
    
    for rc, rl in [('ret_1h', '1H'), ('ret_3h', '3H'), ('ret_close', 'Close')]:
        rets = total_top[rc].dropna()
        w = (rets > 0).mean() if len(rets) >= 3 else np.nan
        m = rets.mean() if len(rets) > 0 else np.nan
        print(f"  {rl}: WR={w*100:.1f}%, Mean={m:.1f}bps, N={len(rets)}")
    
    return results


if __name__ == '__main__':
    df = process_all_days()
    df = df.sort_values('date').reset_index(drop=True)
    
    if len(df) < 50:
        print(f"ERROR: Only {len(df)} days. Need 50+.")
    else:
        results = run_analysis(df)
        
        results['metadata'] = {
            'n_days': len(df),
            'date_range': [df['date'].min(), df['date'].max()],
            'tilt_definition': 'neg_gamma: |negative_gamma above spot| / total |negative_gamma|',
            'is_oos_split': '60/40 chronological',
            'signal_time': 'closest to 10:00 ET (9:30-11:00)',
            'excluded': 'corrupted 2025-10-27 to 2026-02-17, FOMC days, holidays (<100K volume)',
        }
        
        with open(OUTPUT_FILE, 'w') as f:
            json.dump(results, f, indent=2, default=str)
        
        print(f"\n\nResults saved to {OUTPUT_FILE}")
