from __future__ import annotations import json from dataclasses import asdict from pathlib import Path import pandas as pd from dragon_branch_configs import ( alpha_first_glued_selective_veto_config, alpha_first_selective_veto_config, workbook_preserving_config, ) from dragon_strategy import DragonRuleEngine from dragon_strategy_config import StrategyConfig def _load_indicator_snapshot(base_dir: Path) -> pd.DataFrame: df = pd.read_csv(base_dir / "dragon_indicator_snapshot.csv", encoding="utf-8-sig") df["date"] = pd.to_datetime(df["date"]) return df.set_index("date", drop=False) def _load_true_trade_events(base_dir: Path) -> pd.DataFrame: return pd.read_csv(base_dir / "true_trade_events.csv", encoding="utf-8-sig") def _profit_factor(series: pd.Series) -> float: gross_profit = series[series > 0].sum() gross_loss = -series[series < 0].sum() if gross_loss == 0: return float("inf") if gross_profit > 0 else 0.0 return float(gross_profit / gross_loss) def _holding_bucket(days: int) -> str: if days <= 5: return "00-05d" if days <= 10: return "06-10d" if days <= 20: return "11-20d" if days <= 40: return "21-40d" return "41d+" def _format_pct(value: float) -> str: if pd.isna(value): return "NA" if value == float("inf"): return "inf" return f"{value:.2%}" def _format_num(value: float) -> str: if pd.isna(value): return "NA" if value == float("inf"): return "inf" return f"{value:.2f}" def _event_match(strategy_events: pd.DataFrame, workbook_events: pd.DataFrame, side: str) -> tuple[int, int, int]: wb = set(workbook_events[(workbook_events["side"] == side) & (workbook_events["layer"] == "real_trade")]["date"]) st = set(strategy_events[(strategy_events["side"] == side) & (strategy_events["layer"] == "real_trade")]["date"]) return len(wb & st), len(wb - st), len(st - wb) def _segment_stats(df: pd.DataFrame) -> dict[str, float | int]: if df.empty: return { "trades": 0, "win_rate": float("nan"), "avg_return": float("nan"), "profit_factor": float("nan"), "compounded_return": float("nan"), } returns = df["return_pct"].astype(float) return { "trades": int(len(df)), "win_rate": float((returns > 0).mean()), "avg_return": float(returns.mean()), "profit_factor": _profit_factor(returns), "compounded_return": float((1.0 + returns).prod() - 1.0), } def _build_walk_forward(trades: pd.DataFrame, branch_name: str) -> pd.DataFrame: years = sorted(int(year) for year in trades["sell_year"].unique()) rows: list[dict[str, object]] = [] for idx, test_year in enumerate(years): if idx >= 1: train_years = years[:idx] train_df = trades[trades["sell_year"].isin(train_years)] test_df = trades[trades["sell_year"] == test_year] rows.append( { "branch": branch_name, "scheme": "anchored_expanding", "train_start_year": train_years[0], "train_end_year": train_years[-1], "test_year": test_year, **{f"train_{k}": v for k, v in _segment_stats(train_df).items()}, **{f"test_{k}": v for k, v in _segment_stats(test_df).items()}, } ) if idx >= 3: train_years = years[idx - 3 : idx] train_df = trades[trades["sell_year"].isin(train_years)] test_df = trades[trades["sell_year"] == test_year] rows.append( { "branch": branch_name, "scheme": "rolling_3y", "train_start_year": train_years[0], "train_end_year": train_years[-1], "test_year": test_year, **{f"train_{k}": v for k, v in _segment_stats(train_df).items()}, **{f"test_{k}": v for k, v in _segment_stats(test_df).items()}, } ) return pd.DataFrame(rows) def _run_branch( name: str, config: StrategyConfig, indicator_df: pd.DataFrame, workbook_events: pd.DataFrame, first_date: str, last_date: str, ) -> tuple[dict[str, object], pd.DataFrame, pd.DataFrame, pd.DataFrame]: engine = DragonRuleEngine(config=config) events, trades = engine.run(indicator_df) events = events[(events["date"] >= first_date) & (events["date"] <= last_date)].copy() trades = trades[ (trades["buy_date"] >= first_date) & (trades["buy_date"] <= last_date) & (trades["sell_date"] >= first_date) & (trades["sell_date"] <= last_date) ].copy() buy_overlap, buy_missing, buy_extra = _event_match(events, workbook_events, "BUY") sell_overlap, sell_missing, sell_extra = _event_match(events, workbook_events, "SELL") trades["branch"] = name trades["sell_dt"] = pd.to_datetime(trades["sell_date"]) trades["sell_year"] = trades["sell_dt"].dt.year.astype(int) trades["holding_bucket"] = trades["holding_days"].astype(int).map(_holding_bucket) returns = trades["return_pct"].astype(float) if not trades.empty else pd.Series(dtype=float) summary = { "branch": name, "trades": int(len(trades)), "win_rate": float((returns > 0).mean()) if not trades.empty else float("nan"), "avg_return": float(returns.mean()) if not trades.empty else float("nan"), "median_return": float(returns.median()) if not trades.empty else float("nan"), "profit_factor": _profit_factor(returns) if not trades.empty else float("nan"), "real_buy_overlap": int(buy_overlap), "real_buy_missing": int(buy_missing), "real_buy_extra": int(buy_extra), "real_sell_overlap": int(sell_overlap), "real_sell_missing": int(sell_missing), "real_sell_extra": int(sell_extra), "short_00_05d_avg_return": float(trades[trades["holding_bucket"] == "00-05d"]["return_pct"].mean()), "short_06_10d_avg_return": float(trades[trades["holding_bucket"] == "06-10d"]["return_pct"].mean()), } bucket_rows: list[dict[str, object]] = [] for bucket, group in trades.groupby("holding_bucket", dropna=False): bucket_rows.append( { "branch": name, "holding_bucket": bucket, "trades": int(len(group)), "win_rate": float((group["return_pct"] > 0).mean()), "avg_return": float(group["return_pct"].mean()), "profit_factor": _profit_factor(group["return_pct"]), } ) holding_df = pd.DataFrame(bucket_rows).sort_values("holding_bucket") walk_forward_df = _build_walk_forward(trades, name) return summary, trades, holding_df, walk_forward_df def _config_snapshot(config: StrategyConfig) -> dict[str, object]: snapshot = asdict(config) snapshot["disabled_rules"] = sorted(config.disabled_rules) return snapshot def _trade_set(df: pd.DataFrame) -> set[tuple[str, str, str, str]]: return set(zip(df["buy_date"], df["sell_date"], df["buy_reason"], df["sell_reason"])) def _trade_diff(source: pd.DataFrame, target: pd.DataFrame, removed_label: str, added_label: str) -> pd.DataFrame: source_set = _trade_set(source) target_set = _trade_set(target) rows: list[dict[str, object]] = [] for row in sorted(source_set - target_set): rows.append( { "change_type": removed_label, "buy_date": row[0], "sell_date": row[1], "buy_reason": row[2], "sell_reason": row[3], } ) for row in sorted(target_set - source_set): rows.append( { "change_type": added_label, "buy_date": row[0], "sell_date": row[1], "buy_reason": row[2], "sell_reason": row[3], } ) return pd.DataFrame(rows) def _wf_stats(df: pd.DataFrame, scheme: str) -> tuple[int, int, float]: view = df[df["scheme"] == scheme] positive = int((view["test_avg_return"] > 0).sum()) if not view.empty else 0 total = int(len(view)) avg_oos = float(view["test_avg_return"].mean()) if not view.empty else float("nan") return positive, total, avg_oos def main() -> None: base_dir = Path(__file__).resolve().parent indicator_df = _load_indicator_snapshot(base_dir) workbook_events = _load_true_trade_events(base_dir) first_date = workbook_events["date"].min() last_date = workbook_events["date"].max() branches = [ ("workbook_preserving", workbook_preserving_config()), ("alpha_first_selective_veto", alpha_first_selective_veto_config()), ("alpha_first_glued_selective_veto", alpha_first_glued_selective_veto_config()), ] summaries: list[dict[str, object]] = [] trades_by_branch: dict[str, pd.DataFrame] = {} holding_frames: list[pd.DataFrame] = [] walk_frames: list[pd.DataFrame] = [] for name, config in branches: summary, trades, holding_df, walk_df = _run_branch( name, config, indicator_df, workbook_events, first_date, last_date, ) summaries.append(summary) trades_by_branch[name] = trades holding_frames.append(holding_df) walk_frames.append(walk_df) summary_df = pd.DataFrame(summaries) summary_df.to_csv(base_dir / "dragon_glued_alpha_candidate_summary.csv", index=False, encoding="utf-8-sig") branch_lookup = {row["branch"]: row for row in summaries} workbook_row = branch_lookup["workbook_preserving"] alpha_row = branch_lookup["alpha_first_selective_veto"] glued_row = branch_lookup["alpha_first_glued_selective_veto"] comparison_rows: list[dict[str, object]] = [] for metric in [ "trades", "win_rate", "avg_return", "median_return", "profit_factor", "real_buy_overlap", "real_sell_overlap", "short_00_05d_avg_return", "short_06_10d_avg_return", ]: comparison_rows.append( { "metric": metric, "workbook_preserving": workbook_row[metric], "alpha_first_selective_veto": alpha_row[metric], "alpha_first_glued_selective_veto": glued_row[metric], "delta_glued_minus_alpha": glued_row[metric] - alpha_row[metric], "delta_glued_minus_workbook": glued_row[metric] - workbook_row[metric], } ) pd.DataFrame(comparison_rows).to_csv( base_dir / "dragon_glued_alpha_candidate_comparison.csv", index=False, encoding="utf-8-sig", ) pd.concat(holding_frames, ignore_index=True).to_csv( base_dir / "dragon_glued_alpha_candidate_holding_buckets.csv", index=False, encoding="utf-8-sig", ) combined_walk = pd.concat(walk_frames, ignore_index=True) combined_walk.to_csv( base_dir / "dragon_glued_alpha_candidate_walk_forward.csv", index=False, encoding="utf-8-sig", ) diff_vs_alpha = _trade_diff( trades_by_branch["alpha_first_selective_veto"], trades_by_branch["alpha_first_glued_selective_veto"], "removed_from_glued_candidate_vs_alpha", "added_in_glued_candidate_vs_alpha", ) diff_vs_workbook = _trade_diff( trades_by_branch["workbook_preserving"], trades_by_branch["alpha_first_glued_selective_veto"], "removed_from_glued_candidate_vs_workbook", "added_in_glued_candidate_vs_workbook", ) diff_vs_alpha.to_csv( base_dir / "dragon_glued_alpha_candidate_trade_diff_vs_alpha.csv", index=False, encoding="utf-8-sig", ) diff_vs_workbook.to_csv( base_dir / "dragon_glued_alpha_candidate_trade_diff_vs_workbook.csv", index=False, encoding="utf-8-sig", ) (base_dir / "dragon_glued_alpha_candidate_config_snapshot.json").write_text( json.dumps(_config_snapshot(alpha_first_glued_selective_veto_config()), indent=2, ensure_ascii=False) + "\n", encoding="utf-8", ) wb_anchor_pos, wb_anchor_total, wb_anchor_avg = _wf_stats(combined_walk[combined_walk["branch"] == "workbook_preserving"], "anchored_expanding") af_anchor_pos, af_anchor_total, af_anchor_avg = _wf_stats( combined_walk[combined_walk["branch"] == "alpha_first_selective_veto"], "anchored_expanding", ) glued_anchor_pos, glued_anchor_total, glued_anchor_avg = _wf_stats( combined_walk[combined_walk["branch"] == "alpha_first_glued_selective_veto"], "anchored_expanding", ) wb_roll_pos, wb_roll_total, wb_roll_avg = _wf_stats(combined_walk[combined_walk["branch"] == "workbook_preserving"], "rolling_3y") af_roll_pos, af_roll_total, af_roll_avg = _wf_stats( combined_walk[combined_walk["branch"] == "alpha_first_selective_veto"], "rolling_3y", ) glued_roll_pos, glued_roll_total, glued_roll_avg = _wf_stats( combined_walk[combined_walk["branch"] == "alpha_first_glued_selective_veto"], "rolling_3y", ) removed_vs_alpha = diff_vs_alpha[diff_vs_alpha["change_type"] == "removed_from_glued_candidate_vs_alpha"].copy() added_vs_alpha = diff_vs_alpha[diff_vs_alpha["change_type"] == "added_in_glued_candidate_vs_alpha"].copy() removed_glued_count = int((removed_vs_alpha["buy_reason"] == "glued_buy").sum()) if not removed_vs_alpha.empty else 0 added_replacement_text = "none" if not added_vs_alpha.empty: added_row = added_vs_alpha.iloc[0] added_replacement_text = ( f"{added_row['buy_date']} -> {added_row['sell_date']} / " f"{added_row['buy_reason']} -> {added_row['sell_reason']}" ) lines = [ "# Dragon Glued Alpha Candidate Review", "", "## Branches", "- `workbook_preserving`: official reconstruction baseline.", "- `alpha_first_selective_veto`: current formal alpha-first branch.", "- `alpha_first_glued_selective_veto`: alpha-first branch plus narrow glued hot/low veto.", "", "## Headline Comparison", f"- workbook_preserving: trades `{int(workbook_row['trades'])}`, avg_return `{_format_pct(float(workbook_row['avg_return']))}`, profit_factor `{_format_num(float(workbook_row['profit_factor']))}`, real BUY / SELL `{int(workbook_row['real_buy_overlap'])}/{int(workbook_row['real_sell_overlap'])}`", f"- alpha_first_selective_veto: trades `{int(alpha_row['trades'])}`, avg_return `{_format_pct(float(alpha_row['avg_return']))}`, profit_factor `{_format_num(float(alpha_row['profit_factor']))}`, real BUY / SELL `{int(alpha_row['real_buy_overlap'])}/{int(alpha_row['real_sell_overlap'])}`", f"- alpha_first_glued_selective_veto: trades `{int(glued_row['trades'])}`, avg_return `{_format_pct(float(glued_row['avg_return']))}`, profit_factor `{_format_num(float(glued_row['profit_factor']))}`, real BUY / SELL `{int(glued_row['real_buy_overlap'])}/{int(glued_row['real_sell_overlap'])}`", "", "## Short-Holding Impact", f"- `00-05d`: workbook `{_format_pct(float(workbook_row['short_00_05d_avg_return']))}`, alpha `{_format_pct(float(alpha_row['short_00_05d_avg_return']))}`, glued candidate `{_format_pct(float(glued_row['short_00_05d_avg_return']))}`", f"- `06-10d`: workbook `{_format_pct(float(workbook_row['short_06_10d_avg_return']))}`, alpha `{_format_pct(float(alpha_row['short_06_10d_avg_return']))}`, glued candidate `{_format_pct(float(glued_row['short_06_10d_avg_return']))}`", "", "## Walk-Forward Comparison", f"- Anchored expanding: workbook `{wb_anchor_pos}/{wb_anchor_total}` positive, avg `{_format_pct(wb_anchor_avg)}`; alpha `{af_anchor_pos}/{af_anchor_total}`, avg `{_format_pct(af_anchor_avg)}`; glued `{glued_anchor_pos}/{glued_anchor_total}`, avg `{_format_pct(glued_anchor_avg)}`", f"- Rolling 3Y: workbook `{wb_roll_pos}/{wb_roll_total}` positive, avg `{_format_pct(wb_roll_avg)}`; alpha `{af_roll_pos}/{af_roll_total}`, avg `{_format_pct(af_roll_avg)}`; glued `{glued_roll_pos}/{glued_roll_total}`, avg `{_format_pct(glued_roll_avg)}`", "", "## Trade-Diff Summary", f"- glued candidate vs alpha-first: removed `{int((diff_vs_alpha['change_type'] == 'removed_from_glued_candidate_vs_alpha').sum())}`, added `{int((diff_vs_alpha['change_type'] == 'added_in_glued_candidate_vs_alpha').sum())}`", f"- glued candidate vs workbook: removed `{int((diff_vs_workbook['change_type'] == 'removed_from_glued_candidate_vs_workbook').sum())}`, added `{int((diff_vs_workbook['change_type'] == 'added_in_glued_candidate_vs_workbook').sum())}`", f"- Removed vs alpha-first are almost entirely the intended target: `{removed_glued_count}` of `{int(len(removed_vs_alpha))}` are `glued_buy` trades.", f"- Added vs alpha-first is only a small fallback reroute: `{added_replacement_text}`.", "", "## Quant Judgment", "- The glued candidate clearly improves in-sample trade quality and short-holding drag beyond the current alpha-first branch.", "- The cost is no longer narrow: overlap drops materially from `102/101` to `90/89`, which is a much larger governance step than the current deep-oversold selective veto branch.", "- This means the glued candidate is a credible research branch, but not yet a clean replacement for the current formal alpha-first baseline.", "- Recommended governance: keep `alpha_first_selective_veto` as the official alpha-first baseline; treat `alpha_first_glued_selective_veto` as the next research branch for further residual attribution and out-of-sample stability review.", ] (base_dir / "dragon_glued_alpha_candidate_review.md").write_text("\n".join(lines) + "\n", encoding="utf-8") if __name__ == "__main__": main()