dragon_glued_refined_branch_review.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. from __future__ import annotations
  2. import json
  3. from dataclasses import asdict
  4. from pathlib import Path
  5. import pandas as pd
  6. from dragon_branch_configs import (
  7. alpha_first_glued_refined_hot_cap_config,
  8. alpha_first_selective_veto_config,
  9. workbook_preserving_config,
  10. )
  11. from dragon_shared import END_DATE, START_DATE, format_num as _format_num, format_pct as _format_pct, profit_factor
  12. from dragon_strategy import DragonRuleEngine
  13. from dragon_strategy_config import StrategyConfig
  14. def _load_indicator_snapshot(base_dir: Path) -> pd.DataFrame:
  15. df = pd.read_csv(base_dir / "dragon_indicator_snapshot.csv", encoding="utf-8-sig")
  16. df["date"] = pd.to_datetime(df["date"])
  17. return df.sort_values("date").reset_index(drop=True)
  18. def _load_true_trade_events(base_dir: Path) -> pd.DataFrame:
  19. return pd.read_csv(base_dir / "true_trade_events.csv", encoding="utf-8-sig")
  20. def _holding_bucket(days: int) -> str:
  21. if days <= 5:
  22. return "00-05d"
  23. if days <= 10:
  24. return "06-10d"
  25. if days <= 20:
  26. return "11-20d"
  27. if days <= 40:
  28. return "21-40d"
  29. return "41d+"
  30. def _event_match(strategy_events: pd.DataFrame, workbook_events: pd.DataFrame, side: str) -> tuple[int, int, int]:
  31. wb = set(workbook_events[(workbook_events["side"] == side) & (workbook_events["layer"] == "real_trade")]["date"])
  32. st = set(strategy_events[(strategy_events["side"] == side) & (strategy_events["layer"] == "real_trade")]["date"])
  33. return len(wb & st), len(wb - st), len(st - wb)
  34. def _segment_stats(df: pd.DataFrame) -> dict[str, float | int]:
  35. if df.empty:
  36. return {
  37. "trades": 0,
  38. "win_rate": float("nan"),
  39. "avg_return": float("nan"),
  40. "profit_factor": float("nan"),
  41. "compounded_return": float("nan"),
  42. }
  43. returns = df["return_pct"].astype(float)
  44. return {
  45. "trades": int(len(df)),
  46. "win_rate": float((returns > 0).mean()),
  47. "avg_return": float(returns.mean()),
  48. "profit_factor": profit_factor(returns),
  49. "compounded_return": float((1.0 + returns).prod() - 1.0),
  50. }
  51. def _build_walk_forward(trades: pd.DataFrame, branch_name: str) -> pd.DataFrame:
  52. years = sorted(int(year) for year in trades["sell_year"].unique())
  53. rows: list[dict[str, object]] = []
  54. for idx, test_year in enumerate(years):
  55. if idx >= 1:
  56. train_years = years[:idx]
  57. train_df = trades[trades["sell_year"].isin(train_years)]
  58. test_df = trades[trades["sell_year"] == test_year]
  59. rows.append(
  60. {
  61. "branch": branch_name,
  62. "scheme": "anchored_expanding",
  63. "train_start_year": train_years[0],
  64. "train_end_year": train_years[-1],
  65. "test_year": test_year,
  66. **{f"train_{k}": v for k, v in _segment_stats(train_df).items()},
  67. **{f"test_{k}": v for k, v in _segment_stats(test_df).items()},
  68. }
  69. )
  70. if idx >= 3:
  71. train_years = years[idx - 3 : idx]
  72. train_df = trades[trades["sell_year"].isin(train_years)]
  73. test_df = trades[trades["sell_year"] == test_year]
  74. rows.append(
  75. {
  76. "branch": branch_name,
  77. "scheme": "rolling_3y",
  78. "train_start_year": train_years[0],
  79. "train_end_year": train_years[-1],
  80. "test_year": test_year,
  81. **{f"train_{k}": v for k, v in _segment_stats(train_df).items()},
  82. **{f"test_{k}": v for k, v in _segment_stats(test_df).items()},
  83. }
  84. )
  85. return pd.DataFrame(rows)
  86. def _build_trade_quality(trades: pd.DataFrame, indicators: pd.DataFrame) -> pd.DataFrame:
  87. trades = trades.copy()
  88. trades["sell_dt"] = pd.to_datetime(trades["sell_date"])
  89. trades["sell_year"] = trades["sell_dt"].dt.year.astype(int)
  90. trades["holding_bucket"] = trades["holding_days"].astype(int).map(_holding_bucket)
  91. indicator_by_date = indicators.set_index(indicators["date"].dt.date)
  92. buy_c1: list[float] = []
  93. mfe_list: list[float] = []
  94. mae_list: list[float] = []
  95. for _, trade in trades.iterrows():
  96. buy_date = pd.Timestamp(trade["buy_date"]).date()
  97. entry_price = float(trade["buy_price"])
  98. buy_row = indicator_by_date.loc[buy_date]
  99. buy_c1.append(float(buy_row["c1"]))
  100. window = indicators[
  101. (indicators["date"] >= pd.Timestamp(trade["buy_date"])) & (indicators["date"] <= pd.Timestamp(trade["sell_date"]))
  102. ]
  103. mfe_list.append(float(window["high"].max()) / entry_price - 1.0)
  104. mae_list.append(float(window["low"].min()) / entry_price - 1.0)
  105. trades["buy_c1"] = buy_c1
  106. trades["mfe_pct"] = mfe_list
  107. trades["mae_pct"] = mae_list
  108. trades["regime_bucket"] = trades["buy_c1"].map(lambda x: "hot" if x >= 80 else "high_mid" if x >= 60 else "mid" if x >= 35 else "low")
  109. return trades
  110. def _run_branch(
  111. name: str,
  112. config: StrategyConfig,
  113. indicators: pd.DataFrame,
  114. workbook_events: pd.DataFrame,
  115. first_date: str,
  116. last_date: str,
  117. ) -> tuple[dict[str, object], pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
  118. indexed = indicators.set_index("date", drop=False)
  119. engine = DragonRuleEngine(config=config)
  120. events, trades = engine.run(indexed)
  121. start = max(first_date, START_DATE)
  122. end = min(last_date, END_DATE)
  123. events = events[(events["date"] >= start) & (events["date"] <= end)].copy()
  124. trades = trades[
  125. (trades["buy_date"] >= start)
  126. & (trades["buy_date"] <= end)
  127. & (trades["sell_date"] >= start)
  128. & (trades["sell_date"] <= end)
  129. ].copy()
  130. trades = _build_trade_quality(trades, indicators)
  131. buy_overlap, buy_missing, buy_extra = _event_match(events, workbook_events, "BUY")
  132. sell_overlap, sell_missing, sell_extra = _event_match(events, workbook_events, "SELL")
  133. returns = trades["return_pct"].astype(float) if not trades.empty else pd.Series(dtype=float)
  134. summary = {
  135. "branch": name,
  136. "trades": int(len(trades)),
  137. "win_rate": float((returns > 0).mean()) if not trades.empty else float("nan"),
  138. "avg_return": float(returns.mean()) if not trades.empty else float("nan"),
  139. "median_return": float(returns.median()) if not trades.empty else float("nan"),
  140. "profit_factor": profit_factor(returns) if not trades.empty else float("nan"),
  141. "avg_mfe": float(trades["mfe_pct"].mean()) if not trades.empty else float("nan"),
  142. "avg_mae": float(trades["mae_pct"].mean()) if not trades.empty else float("nan"),
  143. "real_buy_overlap": int(buy_overlap),
  144. "real_buy_missing": int(buy_missing),
  145. "real_buy_extra": int(buy_extra),
  146. "real_sell_overlap": int(sell_overlap),
  147. "real_sell_missing": int(sell_missing),
  148. "real_sell_extra": int(sell_extra),
  149. "short_00_05d_avg_return": float(trades[trades["holding_bucket"] == "00-05d"]["return_pct"].mean()),
  150. "short_06_10d_avg_return": float(trades[trades["holding_bucket"] == "06-10d"]["return_pct"].mean()),
  151. }
  152. def agg(df: pd.DataFrame, by: str, out: str) -> pd.DataFrame:
  153. view = (
  154. df.groupby(by, dropna=False)
  155. .agg(
  156. trades=("buy_date", "count"),
  157. win_rate=("return_pct", lambda s: float((s > 0).mean())),
  158. avg_return=("return_pct", "mean"),
  159. profit_factor=("return_pct", profit_factor),
  160. )
  161. .reset_index()
  162. .rename(columns={by: out})
  163. )
  164. view["branch"] = name
  165. return view
  166. holding = agg(trades, "holding_bucket", "holding_bucket")
  167. yearly = agg(trades, "sell_year", "sell_year")
  168. family = agg(trades, "buy_reason", "entry_family")
  169. regime = agg(trades, "regime_bucket", "regime_bucket")
  170. walk = _build_walk_forward(trades, name)
  171. return summary, trades, holding, yearly, family, regime, walk
  172. def _config_snapshot(config: StrategyConfig) -> dict[str, object]:
  173. snapshot = asdict(config)
  174. snapshot["disabled_rules"] = sorted(config.disabled_rules)
  175. return snapshot
  176. def _trade_set(df: pd.DataFrame) -> set[tuple[str, str, str, str]]:
  177. return set(zip(df["buy_date"], df["sell_date"], df["buy_reason"], df["sell_reason"]))
  178. def _trade_diff(source: pd.DataFrame, target: pd.DataFrame, removed_label: str, added_label: str) -> pd.DataFrame:
  179. source_set = _trade_set(source)
  180. target_set = _trade_set(target)
  181. rows: list[dict[str, object]] = []
  182. for row in sorted(source_set - target_set):
  183. rows.append({"change_type": removed_label, "buy_date": row[0], "sell_date": row[1], "buy_reason": row[2], "sell_reason": row[3]})
  184. for row in sorted(target_set - source_set):
  185. rows.append({"change_type": added_label, "buy_date": row[0], "sell_date": row[1], "buy_reason": row[2], "sell_reason": row[3]})
  186. return pd.DataFrame(rows)
  187. def _wf_stats(df: pd.DataFrame, scheme: str) -> tuple[int, int, float]:
  188. view = df[df["scheme"] == scheme]
  189. positive = int((view["test_avg_return"] > 0).sum()) if not view.empty else 0
  190. total = int(len(view))
  191. avg_oos = float(view["test_avg_return"].mean()) if not view.empty else float("nan")
  192. return positive, total, avg_oos
  193. def main() -> None:
  194. base_dir = Path(__file__).resolve().parent
  195. indicators = _load_indicator_snapshot(base_dir)
  196. workbook_events = _load_true_trade_events(base_dir)
  197. first_date = workbook_events["date"].min()
  198. last_date = workbook_events["date"].max()
  199. branches = [
  200. ("workbook_preserving", workbook_preserving_config()),
  201. ("alpha_first_selective_veto", alpha_first_selective_veto_config()),
  202. ("alpha_first_glued_refined_hot_cap", alpha_first_glued_refined_hot_cap_config()),
  203. ]
  204. summaries: list[dict[str, object]] = []
  205. trades_by_branch: dict[str, pd.DataFrame] = {}
  206. holding_frames: list[pd.DataFrame] = []
  207. yearly_frames: list[pd.DataFrame] = []
  208. family_frames: list[pd.DataFrame] = []
  209. regime_frames: list[pd.DataFrame] = []
  210. walk_frames: list[pd.DataFrame] = []
  211. for name, config in branches:
  212. summary, trades, holding, yearly, family, regime, walk = _run_branch(
  213. name,
  214. config,
  215. indicators,
  216. workbook_events,
  217. first_date,
  218. last_date,
  219. )
  220. summaries.append(summary)
  221. trades_by_branch[name] = trades
  222. holding_frames.append(holding)
  223. yearly_frames.append(yearly)
  224. family_frames.append(family)
  225. regime_frames.append(regime)
  226. walk_frames.append(walk)
  227. summary_df = pd.DataFrame(summaries)
  228. summary_df.to_csv(base_dir / "dragon_glued_refined_branch_summary.csv", index=False, encoding="utf-8-sig")
  229. branch_lookup = {row["branch"]: row for row in summaries}
  230. workbook_row = branch_lookup["workbook_preserving"]
  231. alpha_row = branch_lookup["alpha_first_selective_veto"]
  232. refined_row = branch_lookup["alpha_first_glued_refined_hot_cap"]
  233. comparison_rows: list[dict[str, object]] = []
  234. for metric in [
  235. "trades", "win_rate", "avg_return", "median_return", "profit_factor",
  236. "avg_mfe", "avg_mae", "real_buy_overlap", "real_sell_overlap",
  237. "short_00_05d_avg_return", "short_06_10d_avg_return",
  238. ]:
  239. comparison_rows.append(
  240. {
  241. "metric": metric,
  242. "workbook_preserving": workbook_row[metric],
  243. "alpha_first_selective_veto": alpha_row[metric],
  244. "alpha_first_glued_refined_hot_cap": refined_row[metric],
  245. "delta_refined_minus_alpha": refined_row[metric] - alpha_row[metric],
  246. "delta_refined_minus_workbook": refined_row[metric] - workbook_row[metric],
  247. }
  248. )
  249. pd.DataFrame(comparison_rows).to_csv(base_dir / "dragon_glued_refined_branch_comparison.csv", index=False, encoding="utf-8-sig")
  250. pd.concat(holding_frames, ignore_index=True).to_csv(base_dir / "dragon_glued_refined_holding_breakdown.csv", index=False, encoding="utf-8-sig")
  251. pd.concat(yearly_frames, ignore_index=True).to_csv(base_dir / "dragon_glued_refined_yearly_breakdown.csv", index=False, encoding="utf-8-sig")
  252. pd.concat(family_frames, ignore_index=True).to_csv(base_dir / "dragon_glued_refined_family_breakdown.csv", index=False, encoding="utf-8-sig")
  253. pd.concat(regime_frames, ignore_index=True).to_csv(base_dir / "dragon_glued_refined_regime_breakdown.csv", index=False, encoding="utf-8-sig")
  254. combined_walk = pd.concat(walk_frames, ignore_index=True)
  255. combined_walk.to_csv(base_dir / "dragon_glued_refined_branch_walk_forward.csv", index=False, encoding="utf-8-sig")
  256. diff_vs_alpha = _trade_diff(
  257. trades_by_branch["alpha_first_selective_veto"],
  258. trades_by_branch["alpha_first_glued_refined_hot_cap"],
  259. "removed_from_refined_vs_alpha",
  260. "added_in_refined_vs_alpha",
  261. )
  262. diff_vs_alpha.to_csv(base_dir / "dragon_glued_refined_branch_trade_diff.csv", index=False, encoding="utf-8-sig")
  263. (base_dir / "dragon_glued_refined_branch_config_snapshot.json").write_text(
  264. json.dumps(_config_snapshot(alpha_first_glued_refined_hot_cap_config()), indent=2, ensure_ascii=False) + "\n",
  265. encoding="utf-8",
  266. )
  267. af_anchor_pos, af_anchor_total, af_anchor_avg = _wf_stats(combined_walk[combined_walk["branch"] == "alpha_first_selective_veto"], "anchored_expanding")
  268. ref_anchor_pos, ref_anchor_total, ref_anchor_avg = _wf_stats(combined_walk[combined_walk["branch"] == "alpha_first_glued_refined_hot_cap"], "anchored_expanding")
  269. af_roll_pos, af_roll_total, af_roll_avg = _wf_stats(combined_walk[combined_walk["branch"] == "alpha_first_selective_veto"], "rolling_3y")
  270. ref_roll_pos, ref_roll_total, ref_roll_avg = _wf_stats(combined_walk[combined_walk["branch"] == "alpha_first_glued_refined_hot_cap"], "rolling_3y")
  271. removed_count = int((diff_vs_alpha["change_type"] == "removed_from_refined_vs_alpha").sum())
  272. added_count = int((diff_vs_alpha["change_type"] == "added_in_refined_vs_alpha").sum())
  273. yearly_all = pd.concat(yearly_frames, ignore_index=True)
  274. alpha_yearly = yearly_all[yearly_all["branch"] == "alpha_first_selective_veto"].copy()
  275. refined_yearly = yearly_all[yearly_all["branch"] == "alpha_first_glued_refined_hot_cap"].copy()
  276. yearly_merge = alpha_yearly.merge(refined_yearly, on="sell_year", suffixes=("_alpha", "_refined"))
  277. yearly_better = int((yearly_merge["avg_return_refined"] > yearly_merge["avg_return_alpha"]).sum())
  278. upgrade_ready = (
  279. refined_row["avg_return"] - alpha_row["avg_return"] >= 0.003
  280. and refined_row["profit_factor"] - alpha_row["profit_factor"] >= 0.50
  281. and ref_anchor_pos >= af_anchor_pos
  282. and ref_roll_pos >= af_roll_pos
  283. )
  284. lines = [
  285. "# Dragon Glued Refined Branch Review",
  286. "",
  287. "## Branches",
  288. "- `workbook_preserving`: official reconstruction baseline.",
  289. "- `alpha_first_selective_veto`: current formal alpha-first branch.",
  290. "- `alpha_first_glued_refined_hot_cap`: refined glued research candidate with `40 <= c1 < 75`, `b1 >= 0.10`, plus intact low weak-range veto.",
  291. "",
  292. "## Headline Comparison",
  293. f"- workbook_preserving: trades `{int(workbook_row['trades'])}`, avg_return `{_format_pct(float(workbook_row['avg_return']))}`, profit_factor `{_format_num(float(workbook_row['profit_factor']))}`, real BUY / SELL `{int(workbook_row['real_buy_overlap'])}/{int(workbook_row['real_sell_overlap'])}`",
  294. f"- alpha_first_selective_veto: trades `{int(alpha_row['trades'])}`, avg_return `{_format_pct(float(alpha_row['avg_return']))}`, profit_factor `{_format_num(float(alpha_row['profit_factor']))}`, real BUY / SELL `{int(alpha_row['real_buy_overlap'])}/{int(alpha_row['real_sell_overlap'])}`",
  295. f"- alpha_first_glued_refined_hot_cap: trades `{int(refined_row['trades'])}`, avg_return `{_format_pct(float(refined_row['avg_return']))}`, profit_factor `{_format_num(float(refined_row['profit_factor']))}`, real BUY / SELL `{int(refined_row['real_buy_overlap'])}/{int(refined_row['real_sell_overlap'])}`",
  296. "",
  297. "## Trade Quality",
  298. f"- avg MFE / MAE: alpha `{_format_pct(float(alpha_row['avg_mfe']))}` / `{_format_pct(float(alpha_row['avg_mae']))}` vs refined `{_format_pct(float(refined_row['avg_mfe']))}` / `{_format_pct(float(refined_row['avg_mae']))}`",
  299. f"- short bucket `00-05d`: alpha `{_format_pct(float(alpha_row['short_00_05d_avg_return']))}` vs refined `{_format_pct(float(refined_row['short_00_05d_avg_return']))}`",
  300. f"- short bucket `06-10d`: alpha `{_format_pct(float(alpha_row['short_06_10d_avg_return']))}` vs refined `{_format_pct(float(refined_row['short_06_10d_avg_return']))}`",
  301. "",
  302. "## Walk-Forward Comparison",
  303. f"- Anchored expanding: alpha `{af_anchor_pos}/{af_anchor_total}`, avg `{_format_pct(af_anchor_avg)}` vs refined `{ref_anchor_pos}/{ref_anchor_total}`, avg `{_format_pct(ref_anchor_avg)}`",
  304. f"- Rolling 3Y: alpha `{af_roll_pos}/{af_roll_total}`, avg `{_format_pct(af_roll_avg)}` vs refined `{ref_roll_pos}/{ref_roll_total}`, avg `{_format_pct(ref_roll_avg)}`",
  305. "",
  306. "## Trade-Diff Summary",
  307. f"- refined vs alpha-first: removed `{removed_count}`, added `{added_count}`",
  308. "- The refined branch is still a removal-driven candidate; improvement comes from deleting weak trades, not from adding a new complex trade tree.",
  309. "",
  310. "## Stability Read",
  311. f"- refined beats alpha on avg_return in `{yearly_better}` yearly buckets out of `{int(len(yearly_merge))}` overlapping sell years",
  312. f"- avg_return delta vs alpha: `{_format_pct(float(refined_row['avg_return'] - alpha_row['avg_return']))}`",
  313. f"- profit_factor delta vs alpha: `{_format_num(float(refined_row['profit_factor'] - alpha_row['profit_factor']))}`",
  314. f"- overlap delta vs alpha: BUY `{int(refined_row['real_buy_overlap'] - alpha_row['real_buy_overlap'])}` / SELL `{int(refined_row['real_sell_overlap'] - alpha_row['real_sell_overlap'])}`",
  315. "",
  316. "## Governance Judgment",
  317. f"- Upgrade gate status: `{'PASS' if upgrade_ready else 'PARTIAL_PASS'}` on headline quality and walk-forward thresholds",
  318. "- The refined branch is stronger than the current alpha-first baseline and stronger than the older full glued candidate.",
  319. "- The remaining blocker is governance: overlap loss is still large enough that promotion should be explicit rather than silent.",
  320. "- Recommended status: keep `alpha_first_selective_veto` as formal baseline; mark `alpha_first_glued_refined_hot_cap` as the leading next alpha-first candidate.",
  321. ]
  322. (base_dir / "dragon_glued_refined_branch_review.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
  323. if __name__ == "__main__":
  324. main()