#!/usr/bin/env python3
"""
Deep dive on the 18 OOS signals from trace_mining.py.
Focus on actionable trading signals with quintile breakdowns, 
conditional analysis, and timing optimization.
"""

import pandas as pd
import numpy as np
from pathlib import Path
from scipy import stats
import json
import warnings
warnings.filterwarnings('ignore')

DATA_DIR = Path('/Users/lutherbot/.openclaw/workspace/data')
TRACE_DIR = DATA_DIR / 'trace_normalized'

with open(DATA_DIR / 'fomc_dates.json') as f:
    FOMC_DATES = set(json.load(f)['dates'])

GEX_TIERS = {
    'DEEP_NEG': (-np.inf, -100e6),
    'NEG': (-100e6, 0),
    'LOW_POS': (0, 100e6),
    'MID_POS': (100e6, 250e6),
    'HIGH_POS': (250e6, 500e6),
    'EXTREME_POS': (500e6, np.inf),
}

def get_gex_tier(val):
    for name, (lo, hi) in GEX_TIERS.items():
        if lo <= val < hi:
            return name
    return 'EXTREME_POS'

# Load SPX
print("Loading SPX...")
spx = pd.read_csv(DATA_DIR / 'spx_5min_polygon.csv', parse_dates=['datetime'])
spx['datetime'] = pd.to_datetime(spx['datetime'], utc=True).dt.tz_convert('America/New_York')
spx = spx.sort_values('datetime').reset_index(drop=True)
spx['date'] = spx['datetime'].dt.date

# Load all TRACE and build comprehensive per-timestamp + daily features
print("Loading TRACE files...")
files = sorted(TRACE_DIR.glob('intradayStrikeGEX_*.parquet'))

all_daily = []
for i, f in enumerate(files):
    date_str = f.stem.split('_')[-1]
    if date_str in FOMC_DATES:
        continue
    
    try:
        df = pd.read_parquet(f)
    except:
        continue
    
    # Per-timestamp aggregates
    ts_agg = df.groupby('timestamp').agg(
        net_mm=('mm_gamma', 'sum'),
        net_cust=('cust_gamma', 'sum'),
        net_firm=('firm_gamma', 'sum'),
        net_bd=('bd_gamma', 'sum'),
        net_procust=('procust_gamma', 'sum'),
        net_mm_0=('mm_gamma_0', 'sum'),
    ).reset_index()
    
    ts_agg['hour'] = ts_agg['timestamp'].dt.hour
    ts_agg['minute'] = ts_agg['timestamp'].dt.minute
    
    # Filter to RTH
    rth = ts_agg[((ts_agg['hour'] == 9) & (ts_agg['minute'] >= 30)) | 
                 ((ts_agg['hour'] >= 10) & (ts_agg['hour'] <= 15)) |
                 ((ts_agg['hour'] == 16) & (ts_agg['minute'] == 0))].sort_values('timestamp')
    
    if len(rth) < 5:
        continue
    
    row = {'date_str': date_str, 'date': pd.Timestamp(date_str)}
    
    # GEX at key times
    for target_h, target_m, label in [
        (9, 30, '0930'), (9, 40, '0940'), (10, 0, '1000'),
        (10, 30, '1030'), (11, 0, '1100'), (11, 30, '1130'),
        (12, 0, '1200'), (12, 30, '1230'), (13, 0, '1300'),
        (13, 30, '1330'), (14, 0, '1400'), (14, 30, '1430'),
        (15, 0, '1500'), (15, 30, '1530'),
    ]:
        snap = rth[(rth['hour'] == target_h) & (rth['minute'] == target_m)]
        if len(snap) > 0:
            s = snap.iloc[0]
            row[f'mm_{label}'] = s['net_mm']
            row[f'cust_{label}'] = s['net_cust']
            row[f'firm_{label}'] = s['net_firm']
            row[f'procust_{label}'] = s['net_procust']
            row[f'bd_{label}'] = s['net_bd']
            row[f'mm0_{label}'] = s['net_mm_0']
    
    # GEX velocity
    if 'mm_0930' in row and 'mm_1030' in row:
        row['mm_velocity_1h'] = row['mm_1030'] - row['mm_0930']
    if 'mm_1130' in row and 'mm_1200' in row:
        row['mm_chg_1130_1200'] = row['mm_1200'] - row['mm_1130']
    
    # Compute tilt at key times from raw strike data
    for target_h, target_m, label in [
        (9, 30, '0930'), (10, 0, '1000'), (10, 30, '1030'),
        (11, 0, '1100'), (12, 0, '1200'), (13, 0, '1300'), (14, 0, '1400'),
    ]:
        snap = df[df['timestamp'].dt.hour == target_h]
        snap = snap[snap['timestamp'].dt.minute == target_m]
        if len(snap) > 0:
            center = snap['strike_price'].median()
            above = snap[snap['strike_price'] >= center]['mm_gamma'].sum()
            below = snap[snap['strike_price'] < center]['mm_gamma'].sum()
            total = abs(above) + abs(below)
            if total > 0:
                row[f'tilt_{label}'] = above / total
            
            # Also compute absolute asymmetry (positive gamma above vs below)
            pos_above = snap[snap['strike_price'] >= center]['mm_gamma'].clip(lower=0).sum()
            pos_below = snap[snap['strike_price'] < center]['mm_gamma'].clip(lower=0).sum()
            neg_above = snap[snap['strike_price'] >= center]['mm_gamma'].clip(upper=0).sum()
            neg_below = snap[snap['strike_price'] < center]['mm_gamma'].clip(upper=0).sum()
            
            total_pos = pos_above + pos_below
            if total_pos > 0:
                row[f'call_wall_ratio_{label}'] = pos_above / total_pos  # >0.5 = more positive gamma above
    
    # GEX tier at open
    open_gex = row.get('mm_0930', row.get('mm_0940', np.nan))
    if not np.isnan(open_gex) if isinstance(open_gex, float) else True:
        row['gex_tier'] = get_gex_tier(open_gex)
        row['mm_open'] = open_gex
    
    # MM-Cust divergence
    if 'mm_0930' in row and 'cust_0930' in row:
        row['mm_cust_div'] = row['mm_0930'] - row['cust_0930']
    
    # Procust vs MM  
    if 'procust_0930' in row and 'mm_0930' in row:
        row['procust_mm_opposite'] = np.sign(row.get('procust_0930', 0)) != np.sign(row.get('mm_0930', 0))
    
    # HHI and spread
    snap_open = df[(df['timestamp'].dt.hour == 9) & (df['timestamp'].dt.minute == 30)]
    if len(snap_open) == 0:
        snap_open = df[(df['timestamp'].dt.hour == 9) & (df['timestamp'].dt.minute == 40)]
    if len(snap_open) > 0:
        mm_abs = snap_open['mm_gamma'].abs()
        total = mm_abs.sum()
        if total > 0:
            shares = mm_abs / total
            row['hhi_open'] = (shares ** 2).sum()
            weighted_mean = np.average(snap_open['strike_price'], weights=mm_abs)
            row['spread_open'] = np.sqrt(np.average((snap_open['strike_price'] - weighted_mean)**2, weights=mm_abs))
    
    all_daily.append(row)
    if (i+1) % 100 == 0:
        print(f"  {i+1} files...")

daily = pd.DataFrame(all_daily)
print(f"  Built {len(daily)} daily rows")

# Merge SPX returns
spx_daily = spx.groupby('date').agg(
    spx_open=('open', 'first'), spx_high=('high', 'max'),
    spx_low=('low', 'min'), spx_close=('close', 'last')
).reset_index()
spx_daily['date'] = pd.to_datetime(spx_daily['date'])
daily['date'] = pd.to_datetime(daily['date'])
daily = daily.merge(spx_daily, on='date', how='inner')
daily['ret_oc'] = daily['spx_close'] / daily['spx_open'] - 1
daily['range_pct'] = (daily['spx_high'] - daily['spx_low']) / daily['spx_open']
daily['dow'] = daily['date'].dt.dayofweek

# Build intraday returns from SPX
print("Building intraday returns...")
for idx, row in daily.iterrows():
    date_val = row['date'].date() if hasattr(row['date'], 'date') else row['date']
    day_spx = spx[spx['date'] == date_val].sort_values('datetime')
    if len(day_spx) < 5:
        continue
    close_price = day_spx.iloc[-1]['close']
    
    for th, tm, label in [
        (10, 0, '1000'), (10, 30, '1030'), (11, 0, '1100'),
        (11, 30, '1130'), (12, 0, '1200'), (13, 0, '1300'), (14, 0, '1400'),
    ]:
        snap = day_spx[(day_spx['datetime'].dt.hour == th) & (day_spx['datetime'].dt.minute == tm)]
        if len(snap) > 0:
            daily.loc[idx, f'ret_{label}_close'] = close_price / snap.iloc[0]['close'] - 1
    
    # First 15 and 30 min returns
    t930 = day_spx[(day_spx['datetime'].dt.hour == 9) & (day_spx['datetime'].dt.minute == 30)]
    t945 = day_spx[(day_spx['datetime'].dt.hour == 9) & (day_spx['datetime'].dt.minute == 45)]
    t1000 = day_spx[(day_spx['datetime'].dt.hour == 10) & (day_spx['datetime'].dt.minute == 0)]
    
    if len(t930) > 0 and len(t945) > 0:
        daily.loc[idx, 'ret_first_15'] = t945.iloc[0]['close'] / t930.iloc[0]['open'] - 1
    if len(t930) > 0 and len(t1000) > 0:
        daily.loc[idx, 'ret_first_30'] = t1000.iloc[0]['close'] / t930.iloc[0]['open'] - 1
    
    # 2H returns (for timing)
    for th, tm, label in [(10, 0, '1000'), (10, 30, '1030'), (11, 0, '1100'), (12, 0, '1200'), (13, 0, '1300')]:
        snap = day_spx[(day_spx['datetime'].dt.hour == th) & (day_spx['datetime'].dt.minute == tm)]
        # Find price 2 hours later
        target_h = th + 2
        target_m = tm
        snap2 = day_spx[(day_spx['datetime'].dt.hour == target_h) & (day_spx['datetime'].dt.minute == target_m)]
        if len(snap) > 0 and len(snap2) > 0:
            daily.loc[idx, f'ret_{label}_2h'] = snap2.iloc[0]['close'] / snap.iloc[0]['close'] - 1

print(f"  Done. {len(daily)} days with returns")

# Previous day features
daily = daily.sort_values('date').reset_index(drop=True)
daily['prev_mm_open'] = daily['mm_open'].shift(1)
daily['gex_dod'] = daily['mm_open'] - daily['prev_mm_open']
daily['prev_close'] = daily['spx_close'].shift(1)
daily['overnight_gap'] = daily['spx_open'] / daily['prev_close'] - 1

# IS/OOS split
all_dates = sorted(daily['date_str'].unique())
n_is = int(len(all_dates) * 0.6)
is_dates = set(all_dates[:n_is])
oos_dates = set(all_dates[n_is:])
daily['is_oos'] = daily['date_str'].isin(oos_dates)
daily['is_is'] = daily['date_str'].isin(is_dates)

print(f"\nIS: {len(is_dates)} days, OOS: {len(oos_dates)} days")
print(f"OOS range: {all_dates[n_is]} to {all_dates[-1]}")

# ============================================================
# DEEP DIVE ON TOP SIGNALS
# ============================================================

findings = []

def print_quintile_analysis(name, signal_col, target_col, df_subset, label=""):
    """Detailed quintile analysis"""
    sub = df_subset.dropna(subset=[signal_col, target_col]).copy()
    if len(sub) < 30:
        print(f"  {name}: N={len(sub)} insufficient")
        return None
    
    sub['q'] = pd.qcut(sub[signal_col], 5, labels=['Q1(low)', 'Q2', 'Q3', 'Q4', 'Q5(high)'], duplicates='drop')
    
    print(f"\n{'='*60}")
    print(f"  {name} {label} (N={len(sub)})")
    print(f"{'='*60}")
    
    result = {}
    for q in ['Q1(low)', 'Q2', 'Q3', 'Q4', 'Q5(high)']:
        qd = sub[sub['q'] == q]
        if len(qd) == 0:
            continue
        wr = (qd[target_col] > 0).mean()
        avg = qd[target_col].mean() * 10000  # bps
        print(f"  {q}: WR={wr:.1%}, Avg={avg:+.1f}bps, N={len(qd)}, Signal range=[{qd[signal_col].min():.4g}, {qd[signal_col].max():.4g}]")
        result[q] = {'wr': wr, 'avg_bps': avg, 'n': len(qd), 'range': [float(qd[signal_col].min()), float(qd[signal_col].max())]}
    
    ic, p = stats.spearmanr(sub[signal_col], sub[target_col])
    print(f"  IC={ic:.4f}, p={p:.4f}")
    return result

# ============================================================
# SIGNAL 1: Gamma Tilt - THE BIG ONE
# ============================================================
print("\n" + "#"*60)
print("# SIGNAL 1: GAMMA TILT (above/total MM gamma ratio)")
print("#"*60)

for time_label in ['0930', '1000', '1030', '1100', '1200', '1300', '1400']:
    tilt_col = f'tilt_{time_label}'
    if tilt_col not in daily.columns:
        continue
    
    # vs O→C
    print_quintile_analysis(f'Tilt@{time_label} → O→C', tilt_col, 'ret_oc', daily[daily['is_oos']], 'OOS')
    print_quintile_analysis(f'Tilt@{time_label} → O→C', tilt_col, 'ret_oc', daily[daily['is_is']], 'IS')
    
    # vs time→close
    ret_col = f'ret_{time_label}_close'
    if ret_col in daily.columns:
        print_quintile_analysis(f'Tilt@{time_label} → {time_label}→Close', tilt_col, ret_col, daily[daily['is_oos']], 'OOS')

# Tilt by GEX tier (is it regime-dependent?)
print("\n--- Tilt conditioned on GEX tier ---")
for tier in ['LOW_POS', 'MID_POS', 'HIGH_POS', 'EXTREME_POS']:
    sub = daily[(daily['gex_tier'] == tier) & daily['is_oos']].copy()
    if len(sub) < 20:
        continue
    
    for time_label in ['1030', '1200', '1400']:
        tilt_col = f'tilt_{time_label}'
        ret_col = 'ret_oc'
        if tilt_col in sub.columns:
            vals = sub.dropna(subset=[tilt_col, ret_col])
            if len(vals) > 10:
                ic, p = stats.spearmanr(vals[tilt_col], vals[ret_col])
                high_tilt = vals[vals[tilt_col] > 0.6]
                low_tilt = vals[vals[tilt_col] < 0.4]
                wr_high = (high_tilt[ret_col] > 0).mean() if len(high_tilt) > 0 else np.nan
                wr_low = (low_tilt[ret_col] > 0).mean() if len(low_tilt) > 0 else np.nan
                print(f"  {tier} | Tilt@{time_label}: IC={ic:.3f}, p={p:.3f}, N={len(vals)} | High tilt WR={wr_high:.1%}(n={len(high_tilt)}) Low tilt WR={wr_low:.1%}(n={len(low_tilt)})")

# ============================================================
# SIGNAL 2: GEX Velocity (9:30→10:30 change)
# ============================================================
print("\n" + "#"*60)
print("# SIGNAL 2: GEX VELOCITY (9:30→10:30)")
print("#"*60)

print_quintile_analysis('GEX Velocity → O→C', 'mm_velocity_1h', 'ret_oc', daily[daily['is_oos']], 'OOS')
print_quintile_analysis('GEX Velocity → O→C', 'mm_velocity_1h', 'ret_oc', daily[daily['is_is']], 'IS')

# Velocity → afternoon return (10:30→close)
print_quintile_analysis('GEX Velocity → 10:30→Close', 'mm_velocity_1h', 'ret_1030_close', daily[daily['is_oos']], 'OOS')

# Velocity by tier
print("\n--- Velocity conditioned on GEX tier ---")
for tier in ['LOW_POS', 'MID_POS', 'HIGH_POS', 'EXTREME_POS']:
    sub = daily[(daily['gex_tier'] == tier) & daily['is_oos']].dropna(subset=['mm_velocity_1h', 'ret_oc'])
    if len(sub) > 15:
        ic, p = stats.spearmanr(sub['mm_velocity_1h'], sub['ret_oc'])
        wr_pos_vel = (sub[sub['mm_velocity_1h'] > 0]['ret_oc'] > 0).mean() if (sub['mm_velocity_1h'] > 0).sum() > 0 else np.nan
        wr_neg_vel = (sub[sub['mm_velocity_1h'] < 0]['ret_oc'] > 0).mean() if (sub['mm_velocity_1h'] < 0).sum() > 0 else np.nan
        print(f"  {tier}: IC={ic:.3f}, p={p:.3f}, N={len(sub)} | +vel WR={wr_pos_vel:.1%} | -vel WR={wr_neg_vel:.1%}")

# ============================================================
# SIGNAL 3: GEX Change 11:30→12:00 → Afternoon
# ============================================================
print("\n" + "#"*60)
print("# SIGNAL 3: LUNCH GEX CHANGE (11:30→12:00)")
print("#"*60)

print_quintile_analysis('Lunch GEX Chg → 12:00→Close', 'mm_chg_1130_1200', 'ret_1200_close', daily[daily['is_oos']], 'OOS')
print_quintile_analysis('Lunch GEX Chg → 12:00→Close', 'mm_chg_1130_1200', 'ret_1200_close', daily[daily['is_is']], 'IS')

# Also test 2H forward (12:00→14:00)
print_quintile_analysis('Lunch GEX Chg → 12:00→2H', 'mm_chg_1130_1200', 'ret_1200_2h', daily[daily['is_oos']], 'OOS')

# ============================================================
# SIGNAL 4: Firm Gamma at Noon (contrarian)
# ============================================================
print("\n" + "#"*60)
print("# SIGNAL 4: FIRM GAMMA AT NOON")
print("#"*60)

print_quintile_analysis('Firm@12:00 → O→C', 'firm_1200', 'ret_oc', daily[daily['is_oos']], 'OOS')
print_quintile_analysis('Firm@12:00 → O→C', 'firm_1200', 'ret_oc', daily[daily['is_is']], 'IS')
print_quintile_analysis('Firm@12:00 → 12:00→Close', 'firm_1200', 'ret_1200_close', daily[daily['is_oos']], 'OOS')

# ============================================================
# SIGNAL 5: Procust vs MM Opposite
# ============================================================
print("\n" + "#"*60)
print("# SIGNAL 5: PROCUST vs MM OPPOSITE")
print("#"*60)

for split_name, mask in [('IS', daily['is_is']), ('OOS', daily['is_oos'])]:
    sub = daily[mask].dropna(subset=['ret_oc'])
    opp = sub[sub['procust_mm_opposite'] == True]
    same = sub[sub['procust_mm_opposite'] == False]
    print(f"\n  {split_name}:")
    print(f"    Opposite: WR={( opp['ret_oc'] > 0).mean():.1%}, Avg={opp['ret_oc'].mean()*10000:+.1f}bps, N={len(opp)}")
    print(f"    Same dir: WR={(same['ret_oc'] > 0).mean():.1%}, Avg={same['ret_oc'].mean()*10000:+.1f}bps, N={len(same)}")

# Does the direction of MM gamma matter when they're opposite?
print("\n  When opposite, direction of MM gamma:")
for split_name, mask in [('OOS', daily['is_oos'])]:
    sub = daily[mask & (daily['procust_mm_opposite'] == True)].dropna(subset=['ret_oc'])
    mm_pos = sub[sub['mm_0930'] > 0]
    mm_neg = sub[sub['mm_0930'] < 0]
    print(f"    MM positive (procust negative): WR={(mm_pos['ret_oc'] > 0).mean():.1%}, N={len(mm_pos)}")
    print(f"    MM negative (procust positive): WR={(mm_neg['ret_oc'] > 0).mean():.1%}, N={len(mm_neg)}")

# ============================================================
# SIGNAL 6: Vol Suppression - Hourly Granularity
# ============================================================
print("\n" + "#"*60)
print("# SIGNAL 6: VOL SUPPRESSION BY HOUR (already known, adding detail)")
print("#"*60)

# Already confirmed. Let me check if the suppression varies by hour.
# Is morning vol more suppressed than afternoon?
oos = daily[daily['is_oos']].copy()
for h in range(10, 16):
    col = f'range_h{h}' if f'range_h{h}' in oos.columns else None
    # We need to compute this from SPX
    # Actually let me compute hourly ranges inline
    pass

# Instead check: which GEX tiers have the most asymmetric path?
print("\n--- Intraday path by GEX tier (OOS) ---")
for tier in ['DEEP_NEG', 'NEG', 'LOW_POS', 'MID_POS', 'HIGH_POS', 'EXTREME_POS']:
    sub = oos[oos['gex_tier'] == tier]
    if len(sub) < 5:
        continue
    wr = (sub['ret_oc'] > 0).mean()
    avg = sub['ret_oc'].mean() * 10000
    rng = sub['range_pct'].mean() * 10000
    print(f"  {tier:12s}: N={len(sub):3d}, O→C WR={wr:.1%}, Avg={avg:+.1f}bps, Avg Range={rng:.0f}bps")

# ============================================================
# SIGNAL 7: Tilt + GEX + Velocity COMBOS 
# ============================================================
print("\n" + "#"*60)
print("# SIGNAL 7: COMBO SIGNALS")
print("#"*60)

# Best combo: High tilt + positive velocity + MID/HIGH GEX
print("\n--- High Tilt@10:30 (>60%) + Positive Velocity ---")
for split_name, mask in [('IS', daily['is_is']), ('OOS', daily['is_oos'])]:
    sub = daily[mask].dropna(subset=['tilt_1030', 'mm_velocity_1h', 'ret_oc'])
    cond = (sub['tilt_1030'] > 0.6) & (sub['mm_velocity_1h'] > 0)
    triggered = sub[cond]
    if len(triggered) > 0:
        wr = (triggered['ret_oc'] > 0).mean()
        avg = triggered['ret_oc'].mean() * 10000
        print(f"  {split_name}: WR={wr:.1%}, Avg={avg:+.1f}bps, N={len(triggered)}")

print("\n--- Low Tilt@10:30 (<40%) + Negative Velocity ---")
for split_name, mask in [('IS', daily['is_is']), ('OOS', daily['is_oos'])]:
    sub = daily[mask].dropna(subset=['tilt_1030', 'mm_velocity_1h', 'ret_oc'])
    cond = (sub['tilt_1030'] < 0.4) & (sub['mm_velocity_1h'] < 0)
    triggered = sub[cond]
    if len(triggered) > 0:
        wr = (triggered['ret_oc'] > 0).mean()
        avg = triggered['ret_oc'].mean() * 10000
        print(f"  {split_name}: WR={wr:.1%}, Avg={avg:+.1f}bps, N={len(triggered)}")

# Tilt extremes (>70% or <30%)
print("\n--- Extreme Tilt@10:30 (>70%) + MID/HIGH/EXT POS ---")
for split_name, mask in [('IS', daily['is_is']), ('OOS', daily['is_oos'])]:
    sub = daily[mask].dropna(subset=['tilt_1030', 'ret_oc'])
    cond = (sub['tilt_1030'] > 0.7) & (sub['gex_tier'].isin(['MID_POS', 'HIGH_POS', 'EXTREME_POS']))
    triggered = sub[cond]
    if len(triggered) > 0:
        wr = (triggered['ret_oc'] > 0).mean()
        avg = triggered['ret_oc'].mean() * 10000
        print(f"  {split_name}: WR={wr:.1%}, Avg={avg:+.1f}bps, N={len(triggered)}")

print("\n--- Extreme Low Tilt@10:30 (<30%) ---")
for split_name, mask in [('IS', daily['is_is']), ('OOS', daily['is_oos'])]:
    sub = daily[mask].dropna(subset=['tilt_1030', 'ret_oc'])
    cond = (sub['tilt_1030'] < 0.3)
    triggered = sub[cond]
    if len(triggered) > 0:
        wr = (triggered['ret_oc'] > 0).mean()
        avg = triggered['ret_oc'].mean() * 10000
        print(f"  {split_name}: WR={wr:.1%}, Avg={avg:+.1f}bps, N={len(triggered)}")

# ============================================================
# SIGNAL 8: Tilt CHANGE (delta from 9:30 to 10:30)
# ============================================================
print("\n" + "#"*60)
print("# SIGNAL 8: TILT CHANGE (9:30→10:30)")
print("#"*60)

if 'tilt_0930' in daily.columns and 'tilt_1030' in daily.columns:
    daily['tilt_delta_0930_1030'] = daily['tilt_1030'] - daily['tilt_0930']
    print_quintile_analysis('Tilt Change 9:30→10:30 → O→C', 'tilt_delta_0930_1030', 'ret_oc', daily[daily['is_oos']], 'OOS')
    print_quintile_analysis('Tilt Change 9:30→10:30 → O→C', 'tilt_delta_0930_1030', 'ret_oc', daily[daily['is_is']], 'IS')

# Tilt change 10:30→12:00
if 'tilt_1030' in daily.columns and 'tilt_1200' in daily.columns:
    daily['tilt_delta_1030_1200'] = daily['tilt_1200'] - daily['tilt_1030']
    print_quintile_analysis('Tilt Change 10:30→12:00 → 12→Close', 'tilt_delta_1030_1200', 'ret_1200_close', daily[daily['is_oos']], 'OOS')

# ============================================================
# SIGNAL 9: Day-of-week × GEX tier interaction
# ============================================================
print("\n" + "#"*60)
print("# SIGNAL 9: DAY OF WEEK × GEX TIER")
print("#"*60)

print("\n--- OOS WR by DOW × Tier ---")
for dow, name in [(0, 'Mon'), (1, 'Tue'), (2, 'Wed'), (3, 'Thu'), (4, 'Fri')]:
    for tier in ['NEG', 'LOW_POS', 'MID_POS', 'HIGH_POS', 'EXTREME_POS']:
        sub = daily[(daily['dow'] == dow) & (daily['gex_tier'] == tier) & daily['is_oos']]
        if len(sub) >= 5:
            wr = (sub['ret_oc'] > 0).mean()
            avg = sub['ret_oc'].mean() * 10000
            marker = ' ⭐' if (wr > 0.65 or wr < 0.35) and len(sub) >= 8 else ''
            print(f"  {name} × {tier:12s}: WR={wr:.1%}, Avg={avg:+.1f}bps, N={len(sub)}{marker}")

# ============================================================
# SIGNAL 10: Overnight gap × GEX magnitude 
# ============================================================
print("\n" + "#"*60)
print("# SIGNAL 10: OVERNIGHT GAP × GEX")
print("#"*60)

oos = daily[daily['is_oos']].dropna(subset=['overnight_gap', 'ret_oc', 'mm_open']).copy()
oos['gap_up'] = oos['overnight_gap'] > 0.002  # >20bps
oos['gap_down'] = oos['overnight_gap'] < -0.002
oos['gap_faded'] = np.sign(oos['overnight_gap']) != np.sign(oos['ret_oc'])

for tier in ['NEG', 'LOW_POS', 'MID_POS', 'HIGH_POS', 'EXTREME_POS']:
    sub = oos[oos['gex_tier'] == tier]
    gap_up = sub[sub['gap_up']]
    gap_down = sub[sub['gap_down']]
    if len(gap_up) >= 5:
        fade_rate = gap_up['gap_faded'].mean()
        print(f"  {tier:12s} + Gap UP: Fade rate={fade_rate:.1%}, N={len(gap_up)}")
    if len(gap_down) >= 5:
        fade_rate = gap_down['gap_faded'].mean()
        print(f"  {tier:12s} + Gap DOWN: Fade rate={fade_rate:.1%}, N={len(gap_down)}")

# ============================================================
# SIGNAL 11: GEX velocity at DIFFERENT intervals
# ============================================================
print("\n" + "#"*60)
print("# SIGNAL 11: GEX VELOCITY AT VARIOUS INTERVALS")
print("#"*60)

# Test velocity at different intervals
velocity_pairs = [
    ('0930', '1000', '30min'), ('0930', '1030', '1h'), ('1000', '1030', '30min_late'),
    ('1030', '1100', 'mid_morning'), ('1100', '1200', 'late_morning'),
]

for from_t, to_t, label in velocity_pairs:
    from_col = f'mm_{from_t}'
    to_col = f'mm_{to_t}'
    if from_col in daily.columns and to_col in daily.columns:
        daily[f'vel_{label}'] = daily[to_col] - daily[from_col]
        # Test vs remaining day return
        ret_col = f'ret_{to_t}_close'
        if ret_col in daily.columns:
            sub = daily[daily['is_oos']].dropna(subset=[f'vel_{label}', ret_col])
            if len(sub) > 30:
                ic, p = stats.spearmanr(sub[f'vel_{label}'], sub[ret_col])
                print(f"  Velocity {from_t}→{to_t} ({label}) vs {to_t}→Close: IC={ic:.4f}, p={p:.4f}, N={len(sub)}")

# ============================================================
# SIGNAL 12: First 15/30 min × GEX tier (continuation vs reversal)
# ============================================================
print("\n" + "#"*60)
print("# SIGNAL 12: FIRST 15/30 MIN × GEX TIER")
print("#"*60)

oos = daily[daily['is_oos']].copy()

print("\n--- First 15 min UP → Rest of Day WR by GEX tier ---")
for tier in ['NEG', 'LOW_POS', 'MID_POS', 'HIGH_POS', 'EXTREME_POS']:
    sub = oos[(oos['gex_tier'] == tier)].dropna(subset=['ret_first_15', 'ret_1000_close'])
    up = sub[sub['ret_first_15'] > 0]
    down = sub[sub['ret_first_15'] < 0]
    if len(up) >= 5:
        wr = (up['ret_1000_close'] > 0).mean()
        print(f"  {tier:12s} | First 15 UP: rest WR={wr:.1%} (cont), N={len(up)}")
    if len(down) >= 5:
        wr = (down['ret_1000_close'] > 0).mean()
        print(f"  {tier:12s} | First 15 DN: rest WR={wr:.1%}, N={len(down)}")

# ============================================================
# SIGNAL 13: Tilt at 14:00 → Last 2 Hours
# ============================================================
print("\n" + "#"*60)
print("# SIGNAL 13: TILT@14:00 → LAST 2 HOURS (strongest tilt signal)")
print("#"*60)

print_quintile_analysis('Tilt@14:00 → 14:00→Close', 'tilt_1400', 'ret_1400_close', daily[daily['is_oos']], 'OOS')
print_quintile_analysis('Tilt@14:00 → 14:00→Close', 'tilt_1400', 'ret_1400_close', daily[daily['is_is']], 'IS')

# Check if this is a lagging indicator (tilt follows price) or leading
# Test: Does 14:00 return predict tilt@14:00? (reverse causality check)
if 'ret_1400_close' in daily.columns and 'tilt_1400' in daily.columns:
    # Forward return: from 14:00 to close
    # But also check: return from open to 14:00 vs tilt@14:00
    oos_sub = daily[daily['is_oos']].dropna(subset=['tilt_1400', 'ret_oc'])
    if 'ret_1400_close' in oos_sub.columns:
        # Ret from open to 14:00 (approximate)
        oos_sub = oos_sub.copy()
        oos_sub['ret_open_1400'] = oos_sub['ret_oc'] - oos_sub.get('ret_1400_close', 0)
        if 'ret_open_1400' in oos_sub.columns:
            ic_lag, p_lag = stats.spearmanr(oos_sub['ret_open_1400'].dropna(), 
                                             oos_sub.loc[oos_sub['ret_open_1400'].notna(), 'tilt_1400'])
            print(f"\n  ⚠️ REVERSE CAUSALITY CHECK: Open→14:00 return vs Tilt@14:00: IC={ic_lag:.4f}, p={p_lag:.4f}")
            print(f"  (If this is strong, tilt@14:00 may be lagging price, not predicting it)")

# ============================================================
# SIGNAL 14: Call Wall Ratio
# ============================================================
print("\n" + "#"*60)
print("# SIGNAL 14: CALL WALL RATIO (positive gamma above/below median)")
print("#"*60)

for time_label in ['1030', '1200', '1400']:
    col = f'call_wall_ratio_{time_label}'
    if col in daily.columns:
        print_quintile_analysis(f'Call Wall Ratio@{time_label} → O→C', col, 'ret_oc', daily[daily['is_oos']], 'OOS')

# ============================================================
# SUMMARY TABLE
# ============================================================
print("\n\n" + "="*80)
print("FINAL SUMMARY - ACTIONABLE SIGNALS")
print("="*80)

# Compile all key IC/WR stats for OOS
summary = []
oos = daily[daily['is_oos']]

tests = [
    ('Tilt@10:30', 'tilt_1030', 'ret_oc', 'direction'),
    ('Tilt@12:00', 'tilt_1200', 'ret_oc', 'direction'),
    ('Tilt@14:00', 'tilt_1400', 'ret_oc', 'direction'),
    ('Tilt@14:00→Close', 'tilt_1400', 'ret_1400_close', 'direction'),
    ('GEX Velocity 1H', 'mm_velocity_1h', 'ret_oc', 'direction'),
    ('GEX Velocity→10:30 close', 'mm_velocity_1h', 'ret_1030_close', 'direction'),
    ('Lunch GEX Chg→Close', 'mm_chg_1130_1200', 'ret_1200_close', 'direction'),
    ('Firm@12:00', 'firm_1200', 'ret_oc', 'direction'),
    ('GEX mean→ret', 'mm_mean', 'ret_oc', 'direction') if 'mm_mean' in daily.columns else None,
    ('Abs GEX→range', 'mm_abs_open', 'range_pct', 'vol') if 'mm_abs_open' in daily.columns else None,
    ('HHI→range', 'hhi_open', 'range_pct', 'vol'),
    ('Spread→range', 'spread_open', 'range_pct', 'vol'),
    ('MM-Cust Div→range', 'mm_cust_div', 'range_pct', 'vol'),
]

for t in tests:
    if t is None:
        continue
    name, sig, tgt, signal_type = t
    sub = oos.dropna(subset=[sig, tgt])
    if len(sub) < 20:
        continue
    ic, p = stats.spearmanr(sub[sig], sub[tgt])
    print(f"  {name:30s}: IC={ic:+.4f}, p={p:.4f}, N={len(sub)}, Type={signal_type}")

print("\nDone with deep dive!")
