dragon_glued_alpha_candidate.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. from __future__ import annotations
  2. import json
  3. from dataclasses import asdict
  4. from pathlib import Path
  5. import pandas as pd
  6. from dragon_branch_configs import (
  7. alpha_first_glued_selective_veto_config,
  8. alpha_first_selective_veto_config,
  9. workbook_preserving_config,
  10. )
  11. from dragon_strategy import DragonRuleEngine
  12. from dragon_strategy_config import StrategyConfig
  13. def _load_indicator_snapshot(base_dir: Path) -> pd.DataFrame:
  14. df = pd.read_csv(base_dir / "dragon_indicator_snapshot.csv", encoding="utf-8-sig")
  15. df["date"] = pd.to_datetime(df["date"])
  16. return df.set_index("date", drop=False)
  17. def _load_true_trade_events(base_dir: Path) -> pd.DataFrame:
  18. return pd.read_csv(base_dir / "true_trade_events.csv", encoding="utf-8-sig")
  19. def _profit_factor(series: pd.Series) -> float:
  20. gross_profit = series[series > 0].sum()
  21. gross_loss = -series[series < 0].sum()
  22. if gross_loss == 0:
  23. return float("inf") if gross_profit > 0 else 0.0
  24. return float(gross_profit / gross_loss)
  25. def _holding_bucket(days: int) -> str:
  26. if days <= 5:
  27. return "00-05d"
  28. if days <= 10:
  29. return "06-10d"
  30. if days <= 20:
  31. return "11-20d"
  32. if days <= 40:
  33. return "21-40d"
  34. return "41d+"
  35. def _format_pct(value: float) -> str:
  36. if pd.isna(value):
  37. return "NA"
  38. if value == float("inf"):
  39. return "inf"
  40. return f"{value:.2%}"
  41. def _format_num(value: float) -> str:
  42. if pd.isna(value):
  43. return "NA"
  44. if value == float("inf"):
  45. return "inf"
  46. return f"{value:.2f}"
  47. def _event_match(strategy_events: pd.DataFrame, workbook_events: pd.DataFrame, side: str) -> tuple[int, int, int]:
  48. wb = set(workbook_events[(workbook_events["side"] == side) & (workbook_events["layer"] == "real_trade")]["date"])
  49. st = set(strategy_events[(strategy_events["side"] == side) & (strategy_events["layer"] == "real_trade")]["date"])
  50. return len(wb & st), len(wb - st), len(st - wb)
  51. def _segment_stats(df: pd.DataFrame) -> dict[str, float | int]:
  52. if df.empty:
  53. return {
  54. "trades": 0,
  55. "win_rate": float("nan"),
  56. "avg_return": float("nan"),
  57. "profit_factor": float("nan"),
  58. "compounded_return": float("nan"),
  59. }
  60. returns = df["return_pct"].astype(float)
  61. return {
  62. "trades": int(len(df)),
  63. "win_rate": float((returns > 0).mean()),
  64. "avg_return": float(returns.mean()),
  65. "profit_factor": _profit_factor(returns),
  66. "compounded_return": float((1.0 + returns).prod() - 1.0),
  67. }
  68. def _build_walk_forward(trades: pd.DataFrame, branch_name: str) -> pd.DataFrame:
  69. years = sorted(int(year) for year in trades["sell_year"].unique())
  70. rows: list[dict[str, object]] = []
  71. for idx, test_year in enumerate(years):
  72. if idx >= 1:
  73. train_years = years[:idx]
  74. train_df = trades[trades["sell_year"].isin(train_years)]
  75. test_df = trades[trades["sell_year"] == test_year]
  76. rows.append(
  77. {
  78. "branch": branch_name,
  79. "scheme": "anchored_expanding",
  80. "train_start_year": train_years[0],
  81. "train_end_year": train_years[-1],
  82. "test_year": test_year,
  83. **{f"train_{k}": v for k, v in _segment_stats(train_df).items()},
  84. **{f"test_{k}": v for k, v in _segment_stats(test_df).items()},
  85. }
  86. )
  87. if idx >= 3:
  88. train_years = years[idx - 3 : idx]
  89. train_df = trades[trades["sell_year"].isin(train_years)]
  90. test_df = trades[trades["sell_year"] == test_year]
  91. rows.append(
  92. {
  93. "branch": branch_name,
  94. "scheme": "rolling_3y",
  95. "train_start_year": train_years[0],
  96. "train_end_year": train_years[-1],
  97. "test_year": test_year,
  98. **{f"train_{k}": v for k, v in _segment_stats(train_df).items()},
  99. **{f"test_{k}": v for k, v in _segment_stats(test_df).items()},
  100. }
  101. )
  102. return pd.DataFrame(rows)
  103. def _run_branch(
  104. name: str,
  105. config: StrategyConfig,
  106. indicator_df: pd.DataFrame,
  107. workbook_events: pd.DataFrame,
  108. first_date: str,
  109. last_date: str,
  110. ) -> tuple[dict[str, object], pd.DataFrame, pd.DataFrame, pd.DataFrame]:
  111. engine = DragonRuleEngine(config=config)
  112. events, trades = engine.run(indicator_df)
  113. events = events[(events["date"] >= first_date) & (events["date"] <= last_date)].copy()
  114. trades = trades[
  115. (trades["buy_date"] >= first_date)
  116. & (trades["buy_date"] <= last_date)
  117. & (trades["sell_date"] >= first_date)
  118. & (trades["sell_date"] <= last_date)
  119. ].copy()
  120. buy_overlap, buy_missing, buy_extra = _event_match(events, workbook_events, "BUY")
  121. sell_overlap, sell_missing, sell_extra = _event_match(events, workbook_events, "SELL")
  122. trades["branch"] = name
  123. trades["sell_dt"] = pd.to_datetime(trades["sell_date"])
  124. trades["sell_year"] = trades["sell_dt"].dt.year.astype(int)
  125. trades["holding_bucket"] = trades["holding_days"].astype(int).map(_holding_bucket)
  126. returns = trades["return_pct"].astype(float) if not trades.empty else pd.Series(dtype=float)
  127. summary = {
  128. "branch": name,
  129. "trades": int(len(trades)),
  130. "win_rate": float((returns > 0).mean()) if not trades.empty else float("nan"),
  131. "avg_return": float(returns.mean()) if not trades.empty else float("nan"),
  132. "median_return": float(returns.median()) if not trades.empty else float("nan"),
  133. "profit_factor": _profit_factor(returns) if not trades.empty else float("nan"),
  134. "real_buy_overlap": int(buy_overlap),
  135. "real_buy_missing": int(buy_missing),
  136. "real_buy_extra": int(buy_extra),
  137. "real_sell_overlap": int(sell_overlap),
  138. "real_sell_missing": int(sell_missing),
  139. "real_sell_extra": int(sell_extra),
  140. "short_00_05d_avg_return": float(trades[trades["holding_bucket"] == "00-05d"]["return_pct"].mean()),
  141. "short_06_10d_avg_return": float(trades[trades["holding_bucket"] == "06-10d"]["return_pct"].mean()),
  142. }
  143. bucket_rows: list[dict[str, object]] = []
  144. for bucket, group in trades.groupby("holding_bucket", dropna=False):
  145. bucket_rows.append(
  146. {
  147. "branch": name,
  148. "holding_bucket": bucket,
  149. "trades": int(len(group)),
  150. "win_rate": float((group["return_pct"] > 0).mean()),
  151. "avg_return": float(group["return_pct"].mean()),
  152. "profit_factor": _profit_factor(group["return_pct"]),
  153. }
  154. )
  155. holding_df = pd.DataFrame(bucket_rows).sort_values("holding_bucket")
  156. walk_forward_df = _build_walk_forward(trades, name)
  157. return summary, trades, holding_df, walk_forward_df
  158. def _config_snapshot(config: StrategyConfig) -> dict[str, object]:
  159. snapshot = asdict(config)
  160. snapshot["disabled_rules"] = sorted(config.disabled_rules)
  161. return snapshot
  162. def _trade_set(df: pd.DataFrame) -> set[tuple[str, str, str, str]]:
  163. return set(zip(df["buy_date"], df["sell_date"], df["buy_reason"], df["sell_reason"]))
  164. def _trade_diff(source: pd.DataFrame, target: pd.DataFrame, removed_label: str, added_label: str) -> pd.DataFrame:
  165. source_set = _trade_set(source)
  166. target_set = _trade_set(target)
  167. rows: list[dict[str, object]] = []
  168. for row in sorted(source_set - target_set):
  169. rows.append(
  170. {
  171. "change_type": removed_label,
  172. "buy_date": row[0],
  173. "sell_date": row[1],
  174. "buy_reason": row[2],
  175. "sell_reason": row[3],
  176. }
  177. )
  178. for row in sorted(target_set - source_set):
  179. rows.append(
  180. {
  181. "change_type": added_label,
  182. "buy_date": row[0],
  183. "sell_date": row[1],
  184. "buy_reason": row[2],
  185. "sell_reason": row[3],
  186. }
  187. )
  188. return pd.DataFrame(rows)
  189. def _wf_stats(df: pd.DataFrame, scheme: str) -> tuple[int, int, float]:
  190. view = df[df["scheme"] == scheme]
  191. positive = int((view["test_avg_return"] > 0).sum()) if not view.empty else 0
  192. total = int(len(view))
  193. avg_oos = float(view["test_avg_return"].mean()) if not view.empty else float("nan")
  194. return positive, total, avg_oos
  195. def main() -> None:
  196. base_dir = Path(__file__).resolve().parent
  197. indicator_df = _load_indicator_snapshot(base_dir)
  198. workbook_events = _load_true_trade_events(base_dir)
  199. first_date = workbook_events["date"].min()
  200. last_date = workbook_events["date"].max()
  201. branches = [
  202. ("workbook_preserving", workbook_preserving_config()),
  203. ("alpha_first_selective_veto", alpha_first_selective_veto_config()),
  204. ("alpha_first_glued_selective_veto", alpha_first_glued_selective_veto_config()),
  205. ]
  206. summaries: list[dict[str, object]] = []
  207. trades_by_branch: dict[str, pd.DataFrame] = {}
  208. holding_frames: list[pd.DataFrame] = []
  209. walk_frames: list[pd.DataFrame] = []
  210. for name, config in branches:
  211. summary, trades, holding_df, walk_df = _run_branch(
  212. name,
  213. config,
  214. indicator_df,
  215. workbook_events,
  216. first_date,
  217. last_date,
  218. )
  219. summaries.append(summary)
  220. trades_by_branch[name] = trades
  221. holding_frames.append(holding_df)
  222. walk_frames.append(walk_df)
  223. summary_df = pd.DataFrame(summaries)
  224. summary_df.to_csv(base_dir / "dragon_glued_alpha_candidate_summary.csv", index=False, encoding="utf-8-sig")
  225. branch_lookup = {row["branch"]: row for row in summaries}
  226. workbook_row = branch_lookup["workbook_preserving"]
  227. alpha_row = branch_lookup["alpha_first_selective_veto"]
  228. glued_row = branch_lookup["alpha_first_glued_selective_veto"]
  229. comparison_rows: list[dict[str, object]] = []
  230. for metric in [
  231. "trades",
  232. "win_rate",
  233. "avg_return",
  234. "median_return",
  235. "profit_factor",
  236. "real_buy_overlap",
  237. "real_sell_overlap",
  238. "short_00_05d_avg_return",
  239. "short_06_10d_avg_return",
  240. ]:
  241. comparison_rows.append(
  242. {
  243. "metric": metric,
  244. "workbook_preserving": workbook_row[metric],
  245. "alpha_first_selective_veto": alpha_row[metric],
  246. "alpha_first_glued_selective_veto": glued_row[metric],
  247. "delta_glued_minus_alpha": glued_row[metric] - alpha_row[metric],
  248. "delta_glued_minus_workbook": glued_row[metric] - workbook_row[metric],
  249. }
  250. )
  251. pd.DataFrame(comparison_rows).to_csv(
  252. base_dir / "dragon_glued_alpha_candidate_comparison.csv",
  253. index=False,
  254. encoding="utf-8-sig",
  255. )
  256. pd.concat(holding_frames, ignore_index=True).to_csv(
  257. base_dir / "dragon_glued_alpha_candidate_holding_buckets.csv",
  258. index=False,
  259. encoding="utf-8-sig",
  260. )
  261. combined_walk = pd.concat(walk_frames, ignore_index=True)
  262. combined_walk.to_csv(
  263. base_dir / "dragon_glued_alpha_candidate_walk_forward.csv",
  264. index=False,
  265. encoding="utf-8-sig",
  266. )
  267. diff_vs_alpha = _trade_diff(
  268. trades_by_branch["alpha_first_selective_veto"],
  269. trades_by_branch["alpha_first_glued_selective_veto"],
  270. "removed_from_glued_candidate_vs_alpha",
  271. "added_in_glued_candidate_vs_alpha",
  272. )
  273. diff_vs_workbook = _trade_diff(
  274. trades_by_branch["workbook_preserving"],
  275. trades_by_branch["alpha_first_glued_selective_veto"],
  276. "removed_from_glued_candidate_vs_workbook",
  277. "added_in_glued_candidate_vs_workbook",
  278. )
  279. diff_vs_alpha.to_csv(
  280. base_dir / "dragon_glued_alpha_candidate_trade_diff_vs_alpha.csv",
  281. index=False,
  282. encoding="utf-8-sig",
  283. )
  284. diff_vs_workbook.to_csv(
  285. base_dir / "dragon_glued_alpha_candidate_trade_diff_vs_workbook.csv",
  286. index=False,
  287. encoding="utf-8-sig",
  288. )
  289. (base_dir / "dragon_glued_alpha_candidate_config_snapshot.json").write_text(
  290. json.dumps(_config_snapshot(alpha_first_glued_selective_veto_config()), indent=2, ensure_ascii=False) + "\n",
  291. encoding="utf-8",
  292. )
  293. wb_anchor_pos, wb_anchor_total, wb_anchor_avg = _wf_stats(combined_walk[combined_walk["branch"] == "workbook_preserving"], "anchored_expanding")
  294. af_anchor_pos, af_anchor_total, af_anchor_avg = _wf_stats(
  295. combined_walk[combined_walk["branch"] == "alpha_first_selective_veto"],
  296. "anchored_expanding",
  297. )
  298. glued_anchor_pos, glued_anchor_total, glued_anchor_avg = _wf_stats(
  299. combined_walk[combined_walk["branch"] == "alpha_first_glued_selective_veto"],
  300. "anchored_expanding",
  301. )
  302. wb_roll_pos, wb_roll_total, wb_roll_avg = _wf_stats(combined_walk[combined_walk["branch"] == "workbook_preserving"], "rolling_3y")
  303. af_roll_pos, af_roll_total, af_roll_avg = _wf_stats(
  304. combined_walk[combined_walk["branch"] == "alpha_first_selective_veto"],
  305. "rolling_3y",
  306. )
  307. glued_roll_pos, glued_roll_total, glued_roll_avg = _wf_stats(
  308. combined_walk[combined_walk["branch"] == "alpha_first_glued_selective_veto"],
  309. "rolling_3y",
  310. )
  311. removed_vs_alpha = diff_vs_alpha[diff_vs_alpha["change_type"] == "removed_from_glued_candidate_vs_alpha"].copy()
  312. added_vs_alpha = diff_vs_alpha[diff_vs_alpha["change_type"] == "added_in_glued_candidate_vs_alpha"].copy()
  313. removed_glued_count = int((removed_vs_alpha["buy_reason"] == "glued_buy").sum()) if not removed_vs_alpha.empty else 0
  314. added_replacement_text = "none"
  315. if not added_vs_alpha.empty:
  316. added_row = added_vs_alpha.iloc[0]
  317. added_replacement_text = (
  318. f"{added_row['buy_date']} -> {added_row['sell_date']} / "
  319. f"{added_row['buy_reason']} -> {added_row['sell_reason']}"
  320. )
  321. lines = [
  322. "# Dragon Glued Alpha Candidate Review",
  323. "",
  324. "## Branches",
  325. "- `workbook_preserving`: official reconstruction baseline.",
  326. "- `alpha_first_selective_veto`: current formal alpha-first branch.",
  327. "- `alpha_first_glued_selective_veto`: alpha-first branch plus narrow glued hot/low veto.",
  328. "",
  329. "## Headline Comparison",
  330. f"- workbook_preserving: trades `{int(workbook_row['trades'])}`, avg_return `{_format_pct(float(workbook_row['avg_return']))}`, profit_factor `{_format_num(float(workbook_row['profit_factor']))}`, real BUY / SELL `{int(workbook_row['real_buy_overlap'])}/{int(workbook_row['real_sell_overlap'])}`",
  331. f"- alpha_first_selective_veto: trades `{int(alpha_row['trades'])}`, avg_return `{_format_pct(float(alpha_row['avg_return']))}`, profit_factor `{_format_num(float(alpha_row['profit_factor']))}`, real BUY / SELL `{int(alpha_row['real_buy_overlap'])}/{int(alpha_row['real_sell_overlap'])}`",
  332. f"- alpha_first_glued_selective_veto: trades `{int(glued_row['trades'])}`, avg_return `{_format_pct(float(glued_row['avg_return']))}`, profit_factor `{_format_num(float(glued_row['profit_factor']))}`, real BUY / SELL `{int(glued_row['real_buy_overlap'])}/{int(glued_row['real_sell_overlap'])}`",
  333. "",
  334. "## Short-Holding Impact",
  335. f"- `00-05d`: workbook `{_format_pct(float(workbook_row['short_00_05d_avg_return']))}`, alpha `{_format_pct(float(alpha_row['short_00_05d_avg_return']))}`, glued candidate `{_format_pct(float(glued_row['short_00_05d_avg_return']))}`",
  336. f"- `06-10d`: workbook `{_format_pct(float(workbook_row['short_06_10d_avg_return']))}`, alpha `{_format_pct(float(alpha_row['short_06_10d_avg_return']))}`, glued candidate `{_format_pct(float(glued_row['short_06_10d_avg_return']))}`",
  337. "",
  338. "## Walk-Forward Comparison",
  339. f"- Anchored expanding: workbook `{wb_anchor_pos}/{wb_anchor_total}` positive, avg `{_format_pct(wb_anchor_avg)}`; alpha `{af_anchor_pos}/{af_anchor_total}`, avg `{_format_pct(af_anchor_avg)}`; glued `{glued_anchor_pos}/{glued_anchor_total}`, avg `{_format_pct(glued_anchor_avg)}`",
  340. f"- Rolling 3Y: workbook `{wb_roll_pos}/{wb_roll_total}` positive, avg `{_format_pct(wb_roll_avg)}`; alpha `{af_roll_pos}/{af_roll_total}`, avg `{_format_pct(af_roll_avg)}`; glued `{glued_roll_pos}/{glued_roll_total}`, avg `{_format_pct(glued_roll_avg)}`",
  341. "",
  342. "## Trade-Diff Summary",
  343. f"- glued candidate vs alpha-first: removed `{int((diff_vs_alpha['change_type'] == 'removed_from_glued_candidate_vs_alpha').sum())}`, added `{int((diff_vs_alpha['change_type'] == 'added_in_glued_candidate_vs_alpha').sum())}`",
  344. f"- glued candidate vs workbook: removed `{int((diff_vs_workbook['change_type'] == 'removed_from_glued_candidate_vs_workbook').sum())}`, added `{int((diff_vs_workbook['change_type'] == 'added_in_glued_candidate_vs_workbook').sum())}`",
  345. f"- Removed vs alpha-first are almost entirely the intended target: `{removed_glued_count}` of `{int(len(removed_vs_alpha))}` are `glued_buy` trades.",
  346. f"- Added vs alpha-first is only a small fallback reroute: `{added_replacement_text}`.",
  347. "",
  348. "## Quant Judgment",
  349. "- The glued candidate clearly improves in-sample trade quality and short-holding drag beyond the current alpha-first branch.",
  350. "- The cost is no longer narrow: overlap drops materially from `102/101` to `90/89`, which is a much larger governance step than the current deep-oversold selective veto branch.",
  351. "- This means the glued candidate is a credible research branch, but not yet a clean replacement for the current formal alpha-first baseline.",
  352. "- Recommended governance: keep `alpha_first_selective_veto` as the official alpha-first baseline; treat `alpha_first_glued_selective_veto` as the next research branch for further residual attribution and out-of-sample stability review.",
  353. ]
  354. (base_dir / "dragon_glued_alpha_candidate_review.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
  355. if __name__ == "__main__":
  356. main()