dragon_refined_alpha_attribution.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586
  1. from __future__ import annotations
  2. from pathlib import Path
  3. import pandas as pd
  4. from dragon_branch_configs import alpha_first_glued_refined_hot_cap_config, alpha_first_selective_veto_config
  5. from dragon_strategy import DragonRuleEngine
  6. START_DATE = "2016-01-01"
  7. END_DATE = "2025-12-31"
  8. BUY_REASON_STATE = {
  9. "deep_oversold_rebound_buy": "low_oversold_regime",
  10. "oversold_recovery_buy": "low_oversold_regime",
  11. "oversold_reversal_after_ql_buy": "rebound_after_sell_regime",
  12. "post_sell_rebound_buy": "rebound_after_sell_regime",
  13. "post_washout_kdj_reentry_buy": "rebound_after_sell_regime",
  14. "predictive_error_reentry_buy": "rebound_after_sell_regime",
  15. "hot_exit_reentry_buy": "rebound_after_sell_regime",
  16. "early_crash_probe_buy": "crash_probe_regime",
  17. "dual_gold_resonance_buy": "low_oversold_regime",
  18. "glued_buy": "mid_regime",
  19. "non_glued_positive_expansion_buy": "high_regime",
  20. }
  21. SELL_REASON_MANAGEMENT = {
  22. "crash_protection_exit": "predictive_risk_exit",
  23. "predictive_b1_break_exit": "predictive_risk_exit",
  24. "prewarning_reduction_exit": "prewarning_exit",
  25. "high_regime_momentum_break": "prewarning_exit",
  26. "high_regime_confirmed_exit:kdj_sell": "confirmed_trend_exit",
  27. "ql_high_zone_take_profit": "high_regime_take_profit",
  28. "ql_mid_zone_take_profit": "high_regime_take_profit",
  29. "medium_hot_take_profit": "high_regime_take_profit",
  30. "high_zone_post_ql_fade_exit": "ql_followthrough_exit",
  31. "post_ql_decay_exit": "ql_followthrough_exit",
  32. "post_dual_sell_decay_exit": "ql_followthrough_exit",
  33. "knife_take_profit_1": "first_take_profit",
  34. "knife_take_profit_2_glued": "first_take_profit",
  35. "knife_take_profit_2_wait_ql_s": "first_take_profit",
  36. "early_positive_take_profit": "first_take_profit",
  37. "oversold_rebound_take_profit": "first_take_profit",
  38. "glued_exit:kdj_sell": "confirmed_trend_exit",
  39. "small_positive_a1_declining:ql_sell": "confirmed_trend_exit",
  40. "negative_a1_no_b1_recovery:kdj_sell": "negative_a1_exit",
  41. "negative_a1_no_b1_recovery:ql_sell": "negative_a1_exit",
  42. "negative_a1_b1_not_strong:kdj_sell": "negative_a1_exit",
  43. "low_zone_dual_gold_exit:kdj_sell": "negative_a1_exit",
  44. "hard_exit:kdj_sell": "hard_risk_exit",
  45. "hard_exit:ql_sell": "hard_risk_exit",
  46. "early_failed_rebound_exit": "predictive_risk_exit",
  47. }
  48. def _load_csv(base_dir: Path, name: str) -> pd.DataFrame:
  49. return pd.read_csv(base_dir / name, encoding="utf-8-sig")
  50. def _load_indicator_snapshot(base_dir: Path) -> pd.DataFrame:
  51. df = _load_csv(base_dir, "dragon_indicator_snapshot.csv")
  52. df["date"] = pd.to_datetime(df["date"])
  53. return df.sort_values("date").reset_index(drop=True)
  54. def _profit_factor(series: pd.Series) -> float:
  55. gross_profit = series[series > 0].sum()
  56. gross_loss = -series[series < 0].sum()
  57. if gross_loss == 0:
  58. return float("inf") if gross_profit > 0 else 0.0
  59. return float(gross_profit / gross_loss)
  60. def _holding_bucket(days: int) -> str:
  61. if days <= 5:
  62. return "00-05d"
  63. if days <= 10:
  64. return "06-10d"
  65. if days <= 20:
  66. return "11-20d"
  67. if days <= 40:
  68. return "21-40d"
  69. return "41d+"
  70. def _format_pct(value: float) -> str:
  71. if pd.isna(value):
  72. return "NA"
  73. if value == float("inf"):
  74. return "inf"
  75. return f"{value:.2%}"
  76. def _format_num(value: float) -> str:
  77. if pd.isna(value):
  78. return "NA"
  79. if value == float("inf"):
  80. return "inf"
  81. return f"{value:.2f}"
  82. def _entry_family(reason: str) -> str:
  83. return str(reason).split(":", 1)[0]
  84. def _entry_variant(reason: str) -> str:
  85. parts = str(reason).split(":", 1)
  86. return "base" if len(parts) == 1 else parts[1]
  87. def _infer_state_layer(buy_reason: str, buy_c1: float) -> str:
  88. state = BUY_REASON_STATE.get(_entry_family(buy_reason))
  89. if state == "mid_regime":
  90. if buy_c1 < 20:
  91. return "low_oversold_regime"
  92. if buy_c1 >= 70:
  93. return "high_regime"
  94. return state or "mid_regime"
  95. def _infer_exit_management_layer(sell_reason: str) -> str:
  96. return SELL_REASON_MANAGEMENT.get(sell_reason, "default_exit_management")
  97. def _entry_role(reason: str) -> str:
  98. family = _entry_family(reason)
  99. if family in {"glued_buy", "early_crash_probe_buy", "oversold_recovery_buy"}:
  100. return "core_alpha_family"
  101. if reason in {"deep_oversold_rebound_buy:classic_oversold", "dual_gold_resonance_buy"}:
  102. return "support_alpha_family"
  103. if family in {"predictive_error_reentry_buy", "hot_exit_reentry_buy"}:
  104. return "bridge_reentry_family"
  105. if family == "post_washout_kdj_reentry_buy":
  106. return "workbook_restart_family"
  107. if family in {"post_sell_rebound_buy", "oversold_reversal_after_ql_buy"}:
  108. return "secondary_research_family"
  109. if family == "deep_oversold_rebound_buy":
  110. return "weak_research_family"
  111. return "other_family"
  112. def _group_stats(df: pd.DataFrame, group_cols: list[str]) -> pd.DataFrame:
  113. rows: list[dict[str, object]] = []
  114. for key, group in df.groupby(group_cols, dropna=False):
  115. if not isinstance(key, tuple):
  116. key = (key,)
  117. row = {col: val for col, val in zip(group_cols, key)}
  118. returns = group["return_pct"].astype(float)
  119. row.update(
  120. {
  121. "trades": int(len(group)),
  122. "trade_share": float(len(group) / len(df)),
  123. "win_rate": float((returns > 0).mean()),
  124. "avg_return": float(returns.mean()),
  125. "median_return": float(returns.median()),
  126. "sum_return_pct": float(returns.sum()),
  127. "profit_factor": _profit_factor(returns),
  128. "avg_holding_days": float(group["holding_days"].mean()),
  129. "avg_mfe_pct": float(group["mfe_pct"].mean()),
  130. "avg_mae_pct": float(group["mae_pct"].mean()),
  131. "avg_giveback_from_peak_pct": float(group["giveback_from_peak_pct"].mean()),
  132. "avg_entry_forward_5d_pct": float(group["entry_forward_5d_pct"].mean()),
  133. "avg_exit_followthrough_5d_pct": float(group["exit_followthrough_5d_pct"].mean()),
  134. "avg_exit_rebound_5d_pct": float(group["exit_rebound_5d_pct"].mean()),
  135. }
  136. )
  137. rows.append(row)
  138. return pd.DataFrame(rows)
  139. def _top_value(group: pd.DataFrame, col: str) -> str:
  140. counts = group[col].value_counts(dropna=False)
  141. if counts.empty:
  142. return ""
  143. return str(counts.index[0])
  144. def _veto_bucket(c1: float, b1: float) -> str:
  145. if 23 <= c1 < 28 and b1 <= 0.02:
  146. return "low_weak_range"
  147. if 40 <= c1 < 75 and b1 >= 0.10:
  148. return "hot_positive_b1_cap75"
  149. return "other"
  150. def _recheck_verdict(row: pd.Series) -> tuple[str, str]:
  151. ret = float(row["return_pct"])
  152. mfe = float(row["mfe_pct"])
  153. holding = int(row["holding_days"])
  154. forward = float(row["entry_forward_5d_pct"])
  155. follow = float(row["exit_followthrough_5d_pct"])
  156. replacement_ret = row["replacement_return_pct"]
  157. if ret < 0 and holding <= 10 and follow <= 0:
  158. return "KEEP_REMOVAL", "short loser and price still weakened after the exit"
  159. if ret < 0 and mfe <= 0.02:
  160. return "KEEP_REMOVAL", "trade never developed enough profit room to defend inclusion"
  161. if ret < 0 and forward <= 0:
  162. return "KEEP_REMOVAL", "entry had no useful short-term follow-through and remained weak"
  163. if ret > 0 and ret <= 0.01 and pd.notna(replacement_ret) and float(replacement_ret) <= ret:
  164. return "OBSERVE_REMOVAL", "micro-winner but replacement path is not clearly worse"
  165. if ret > 0:
  166. return "OVER_REMOVAL", "removed trade kept meaningful alpha and should not be deleted silently"
  167. return "KEEP_REMOVAL", "removed trade remains weak under the alpha-first objective"
  168. def _build_trade_quality(trades: pd.DataFrame, indicators: pd.DataFrame) -> pd.DataFrame:
  169. trades = trades.copy()
  170. trades["buy_dt"] = pd.to_datetime(trades["buy_date"])
  171. trades["sell_dt"] = pd.to_datetime(trades["sell_date"])
  172. trades["sell_year"] = trades["sell_dt"].dt.year.astype(int)
  173. trades["holding_bucket"] = trades["holding_days"].astype(int).map(_holding_bucket)
  174. indicator_by_date = indicators.set_index(indicators["date"].dt.date)
  175. pos_lookup = {dt.date().isoformat(): idx for idx, dt in enumerate(indicators["date"])}
  176. buy_a1: list[float] = []
  177. buy_b1: list[float] = []
  178. buy_c1: list[float] = []
  179. sell_a1: list[float] = []
  180. sell_b1: list[float] = []
  181. sell_c1: list[float] = []
  182. mfe_list: list[float] = []
  183. mae_list: list[float] = []
  184. giveback_list: list[float] = []
  185. entry_forward_list: list[float] = []
  186. exit_followthrough_list: list[float] = []
  187. exit_rebound_list: list[float] = []
  188. for _, trade in trades.iterrows():
  189. buy_date = pd.Timestamp(trade["buy_date"]).date()
  190. sell_date = pd.Timestamp(trade["sell_date"]).date()
  191. buy_row = indicator_by_date.loc[buy_date]
  192. sell_row = indicator_by_date.loc[sell_date]
  193. buy_a1.append(float(buy_row["a1"]))
  194. buy_b1.append(float(buy_row["b1"]))
  195. buy_c1.append(float(buy_row["c1"]))
  196. sell_a1.append(float(sell_row["a1"]))
  197. sell_b1.append(float(sell_row["b1"]))
  198. sell_c1.append(float(sell_row["c1"]))
  199. entry_price = float(trade["buy_price"])
  200. exit_price = float(trade["sell_price"])
  201. buy_idx = pos_lookup[trade["buy_date"]]
  202. sell_idx = pos_lookup[trade["sell_date"]]
  203. window = indicators[(indicators["date"] >= trade["buy_dt"]) & (indicators["date"] <= trade["sell_dt"])]
  204. max_high = float(window["high"].max())
  205. min_low = float(window["low"].min())
  206. mfe_list.append(max_high / entry_price - 1.0)
  207. mae_list.append(min_low / entry_price - 1.0)
  208. giveback_list.append(exit_price / max_high - 1.0)
  209. buy_future = indicators.iloc[buy_idx + 1 : buy_idx + 6]
  210. sell_future = indicators.iloc[sell_idx + 1 : sell_idx + 6]
  211. entry_forward_list.append(float("nan") if buy_future.empty else float(buy_future["close"].iloc[-1]) / entry_price - 1.0)
  212. exit_followthrough_list.append(float("nan") if sell_future.empty else float(sell_future["low"].min()) / exit_price - 1.0)
  213. exit_rebound_list.append(float("nan") if sell_future.empty else float(sell_future["high"].max()) / exit_price - 1.0)
  214. trades["buy_a1"] = buy_a1
  215. trades["buy_b1"] = buy_b1
  216. trades["buy_c1"] = buy_c1
  217. trades["sell_a1"] = sell_a1
  218. trades["sell_b1"] = sell_b1
  219. trades["sell_c1"] = sell_c1
  220. trades["mfe_pct"] = mfe_list
  221. trades["mae_pct"] = mae_list
  222. trades["giveback_from_peak_pct"] = giveback_list
  223. trades["entry_forward_5d_pct"] = entry_forward_list
  224. trades["exit_followthrough_5d_pct"] = exit_followthrough_list
  225. trades["exit_rebound_5d_pct"] = exit_rebound_list
  226. trades["entry_family"] = trades["buy_reason"].map(_entry_family)
  227. trades["entry_variant"] = trades["buy_reason"].map(_entry_variant)
  228. trades["entry_role"] = trades["buy_reason"].map(_entry_role)
  229. trades["market_state_layer"] = trades.apply(lambda row: _infer_state_layer(str(row["buy_reason"]), float(row["buy_c1"])), axis=1)
  230. trades["exit_management_layer"] = trades["sell_reason"].map(_infer_exit_management_layer)
  231. return trades
  232. def _run_branch(indicators: pd.DataFrame, config) -> pd.DataFrame:
  233. indexed = indicators.set_index("date", drop=False)
  234. engine = DragonRuleEngine(config=config)
  235. _, trades = engine.run(indexed)
  236. trades = trades[
  237. (trades["buy_date"] >= START_DATE)
  238. & (trades["buy_date"] <= END_DATE)
  239. & (trades["sell_date"] >= START_DATE)
  240. & (trades["sell_date"] <= END_DATE)
  241. ].copy()
  242. return _build_trade_quality(trades, indicators)
  243. def _trade_key(df: pd.DataFrame) -> set[tuple[str, str, str, str]]:
  244. return set(zip(df["buy_date"], df["sell_date"], df["buy_reason"], df["sell_reason"]))
  245. def _branch_snapshot(df: pd.DataFrame) -> dict[str, float]:
  246. returns = df["return_pct"].astype(float)
  247. return {
  248. "trades": float(len(df)),
  249. "win_rate": float((returns > 0).mean()),
  250. "avg_return": float(returns.mean()),
  251. "profit_factor": _profit_factor(returns),
  252. "avg_mfe": float(df["mfe_pct"].mean()),
  253. "avg_mae": float(df["mae_pct"].mean()),
  254. }
  255. def _family_decomposition(refined: pd.DataFrame) -> pd.DataFrame:
  256. level_frames: list[pd.DataFrame] = []
  257. for level_name, group_cols in [
  258. ("entry_family", ["entry_family", "entry_role"]),
  259. ("entry_reason", ["buy_reason", "entry_role", "market_state_layer"]),
  260. ]:
  261. frame = _group_stats(refined, group_cols)
  262. if "entry_family" in frame.columns:
  263. frame["group_name"] = frame["entry_family"]
  264. else:
  265. frame["group_name"] = frame["buy_reason"]
  266. frame["decomposition_level"] = level_name
  267. top_exit = []
  268. for _, row in frame.iterrows():
  269. if level_name == "entry_family":
  270. group = refined[refined["entry_family"] == row["group_name"]]
  271. else:
  272. group = refined[refined["buy_reason"] == row["group_name"]]
  273. top_exit.append(_top_value(group, "sell_reason"))
  274. frame["top_exit_reason"] = top_exit
  275. level_frames.append(frame)
  276. result = pd.concat(level_frames, ignore_index=True, sort=False)
  277. result = result.sort_values(["decomposition_level", "sum_return_pct", "trades"], ascending=[True, False, False]).reset_index(drop=True)
  278. result["contribution_rank"] = result.groupby("decomposition_level")["sum_return_pct"].rank(method="dense", ascending=False).astype(int)
  279. cols = [
  280. "decomposition_level",
  281. "group_name",
  282. "entry_role",
  283. "market_state_layer",
  284. "buy_reason",
  285. "entry_family",
  286. "top_exit_reason",
  287. "trades",
  288. "trade_share",
  289. "win_rate",
  290. "avg_return",
  291. "median_return",
  292. "sum_return_pct",
  293. "profit_factor",
  294. "avg_holding_days",
  295. "avg_mfe_pct",
  296. "avg_mae_pct",
  297. "avg_giveback_from_peak_pct",
  298. "avg_entry_forward_5d_pct",
  299. "avg_exit_followthrough_5d_pct",
  300. "avg_exit_rebound_5d_pct",
  301. "contribution_rank",
  302. ]
  303. return result[[col for col in cols if col in result.columns]].copy()
  304. def _alpha_attribution(refined: pd.DataFrame) -> pd.DataFrame:
  305. frame = _group_stats(
  306. refined,
  307. ["market_state_layer", "entry_family", "buy_reason", "exit_management_layer", "sell_reason"],
  308. )
  309. frame["attribution_label"] = frame.apply(
  310. lambda row: "core_alpha_source"
  311. if row["sum_return_pct"] > 0.20 and row["avg_return"] > 0
  312. else "drag_source"
  313. if row["sum_return_pct"] < 0
  314. else "mixed_source",
  315. axis=1,
  316. )
  317. frame = frame.sort_values(["sum_return_pct", "trades"], ascending=[False, False]).reset_index(drop=True)
  318. frame["sum_return_rank"] = frame["sum_return_pct"].rank(method="dense", ascending=False).astype(int)
  319. return frame
  320. def _removed_trade_recheck(alpha: pd.DataFrame, refined: pd.DataFrame, workbook_events: pd.DataFrame) -> pd.DataFrame:
  321. workbook_buy = set(workbook_events[(workbook_events["layer"] == "real_trade") & (workbook_events["side"] == "BUY")]["date"])
  322. workbook_sell = set(workbook_events[(workbook_events["layer"] == "real_trade") & (workbook_events["side"] == "SELL")]["date"])
  323. removed = pd.DataFrame(
  324. sorted(_trade_key(alpha) - _trade_key(refined)),
  325. columns=["buy_date", "sell_date", "buy_reason", "sell_reason"],
  326. )
  327. rows: list[dict[str, object]] = []
  328. for _, removed_row in removed.iterrows():
  329. trade = alpha[
  330. (alpha["buy_date"] == removed_row["buy_date"])
  331. & (alpha["sell_date"] == removed_row["sell_date"])
  332. & (alpha["buy_reason"] == removed_row["buy_reason"])
  333. & (alpha["sell_reason"] == removed_row["sell_reason"])
  334. ].iloc[0]
  335. sell_dt = pd.Timestamp(trade["sell_date"])
  336. replacement = refined[
  337. (pd.to_datetime(refined["buy_date"]) > sell_dt)
  338. & (pd.to_datetime(refined["buy_date"]) <= sell_dt + pd.Timedelta(days=10))
  339. ].sort_values("buy_date")
  340. replacement_row = replacement.iloc[0] if not replacement.empty else None
  341. replacement_ret = float("nan") if replacement_row is None else float(replacement_row["return_pct"])
  342. verdict, verdict_reason = _recheck_verdict(
  343. pd.Series({**trade.to_dict(), "replacement_return_pct": replacement_ret})
  344. )
  345. rows.append(
  346. {
  347. "buy_date": trade["buy_date"],
  348. "sell_date": trade["sell_date"],
  349. "buy_reason": trade["buy_reason"],
  350. "sell_reason": trade["sell_reason"],
  351. "entry_family": trade["entry_family"],
  352. "entry_role": trade["entry_role"],
  353. "market_state_layer": trade["market_state_layer"],
  354. "exit_management_layer": trade["exit_management_layer"],
  355. "veto_bucket": _veto_bucket(float(trade["buy_c1"]), float(trade["buy_b1"])),
  356. "holding_bucket": trade["holding_bucket"],
  357. "holding_days": int(trade["holding_days"]),
  358. "return_pct": float(trade["return_pct"]),
  359. "mfe_pct": float(trade["mfe_pct"]),
  360. "mae_pct": float(trade["mae_pct"]),
  361. "giveback_from_peak_pct": float(trade["giveback_from_peak_pct"]),
  362. "entry_forward_5d_pct": float(trade["entry_forward_5d_pct"]),
  363. "exit_followthrough_5d_pct": float(trade["exit_followthrough_5d_pct"]),
  364. "exit_rebound_5d_pct": float(trade["exit_rebound_5d_pct"]),
  365. "buy_a1": float(trade["buy_a1"]),
  366. "buy_b1": float(trade["buy_b1"]),
  367. "buy_c1": float(trade["buy_c1"]),
  368. "buy_aligned_with_workbook": trade["buy_date"] in workbook_buy,
  369. "sell_aligned_with_workbook": trade["sell_date"] in workbook_sell,
  370. "replacement_buy_date": "" if replacement_row is None else str(replacement_row["buy_date"]),
  371. "replacement_sell_date": "" if replacement_row is None else str(replacement_row["sell_date"]),
  372. "replacement_buy_reason": "" if replacement_row is None else str(replacement_row["buy_reason"]),
  373. "replacement_sell_reason": "" if replacement_row is None else str(replacement_row["sell_reason"]),
  374. "replacement_return_pct": replacement_ret,
  375. "replacement_gap_days": float("nan")
  376. if replacement_row is None
  377. else int((pd.Timestamp(replacement_row["buy_date"]) - sell_dt).days),
  378. "verdict": verdict,
  379. "verdict_reason": verdict_reason,
  380. }
  381. )
  382. return pd.DataFrame(rows).sort_values(["veto_bucket", "buy_date"]).reset_index(drop=True)
  383. def _winner_structure(refined: pd.DataFrame) -> pd.DataFrame:
  384. winners = refined[refined["return_pct"] > 0].copy()
  385. if winners.empty:
  386. return pd.DataFrame()
  387. frame = _group_stats(winners, ["entry_family", "holding_bucket"])
  388. frame = frame.sort_values(["sum_return_pct", "trades"], ascending=[False, False]).reset_index(drop=True)
  389. return frame
  390. def main() -> None:
  391. base_dir = Path(__file__).resolve().parent
  392. indicators = _load_indicator_snapshot(base_dir)
  393. workbook_events = _load_csv(base_dir, "true_trade_events.csv")
  394. alpha = _run_branch(indicators, alpha_first_selective_veto_config())
  395. refined = _run_branch(indicators, alpha_first_glued_refined_hot_cap_config())
  396. family_decomposition = _family_decomposition(refined)
  397. alpha_attribution = _alpha_attribution(refined)
  398. removed_recheck = _removed_trade_recheck(alpha, refined, workbook_events)
  399. winner_structure = _winner_structure(refined)
  400. family_decomposition.to_csv(base_dir / "dragon_refined_family_decomposition.csv", index=False, encoding="utf-8-sig")
  401. alpha_attribution.to_csv(base_dir / "dragon_refined_alpha_attribution.csv", index=False, encoding="utf-8-sig")
  402. removed_recheck.to_csv(base_dir / "dragon_refined_removed_trade_recheck.csv", index=False, encoding="utf-8-sig")
  403. refined_snapshot = _branch_snapshot(refined)
  404. alpha_snapshot = _branch_snapshot(alpha)
  405. removed_pf = _profit_factor(removed_recheck["return_pct"]) if not removed_recheck.empty else float("nan")
  406. removed_pf_text = _format_num(removed_pf)
  407. top_family = family_decomposition[
  408. (family_decomposition["decomposition_level"] == "entry_family") & (family_decomposition["trades"] >= 3)
  409. ].head(5)
  410. weak_family = family_decomposition[
  411. (family_decomposition["decomposition_level"] == "entry_reason")
  412. & (family_decomposition["trades"] >= 1)
  413. & (family_decomposition["entry_role"].isin(["weak_research_family", "secondary_research_family"]))
  414. ].sort_values(["avg_return", "sum_return_pct"]).head(5)
  415. top_combo = alpha_attribution.head(8)
  416. drag_combo = alpha_attribution[alpha_attribution["sum_return_pct"] < 0].head(8)
  417. winner_top = winner_structure.head(5)
  418. lines = [
  419. "# Dragon Refined Edge Review",
  420. "",
  421. "## Scope",
  422. "- Target branch: `alpha_first_glued_refined_hot_cap`",
  423. "- Control branch: `alpha_first_selective_veto`",
  424. "- Evaluation window: `2016-01-01` to `2025-12-31`",
  425. "",
  426. "## Headline",
  427. f"- control: trades `{int(alpha_snapshot['trades'])}`, win_rate `{_format_pct(alpha_snapshot['win_rate'])}`, avg_return `{_format_pct(alpha_snapshot['avg_return'])}`, profit_factor `{_format_num(alpha_snapshot['profit_factor'])}`",
  428. f"- refined: trades `{int(refined_snapshot['trades'])}`, win_rate `{_format_pct(refined_snapshot['win_rate'])}`, avg_return `{_format_pct(refined_snapshot['avg_return'])}`, profit_factor `{_format_num(refined_snapshot['profit_factor'])}`",
  429. f"- refined minus control: trades `{int(refined_snapshot['trades'] - alpha_snapshot['trades'])}`, avg_return `{_format_pct(refined_snapshot['avg_return'] - alpha_snapshot['avg_return'])}`, profit_factor `{_format_num(refined_snapshot['profit_factor'] - alpha_snapshot['profit_factor'])}`",
  430. "",
  431. "## Main Edge Source",
  432. "- Refined alpha is still primarily a `glued_buy` story, but now with stricter removal of weak short-holding glued trades.",
  433. "- The branch is not winning by adding new complex trade paths; it is winning by deleting low-quality short trades while preserving the medium and long-holding winners.",
  434. "",
  435. "## Entry Family Decomposition",
  436. ]
  437. for _, row in top_family.iterrows():
  438. lines.append(
  439. f"- `{row['group_name']}` [{row['entry_role']}]: trades `{int(row['trades'])}`, share `{_format_pct(float(row['trade_share']))}`, "
  440. f"avg_return `{_format_pct(float(row['avg_return']))}`, sum_return `{_format_pct(float(row['sum_return_pct']))}`, "
  441. f"profit_factor `{_format_num(float(row['profit_factor']))}`, top_exit `{row['top_exit_reason']}`"
  442. )
  443. lines.extend(["", "## Weak Research Pockets"])
  444. for _, row in weak_family.iterrows():
  445. lines.append(
  446. f"- `{row['group_name']}`: trades `{int(row['trades'])}`, avg_return `{_format_pct(float(row['avg_return']))}`, "
  447. f"sum_return `{_format_pct(float(row['sum_return_pct']))}`, profit_factor `{_format_num(float(row['profit_factor']))}`"
  448. )
  449. lines.extend(["", "## Kept Winner Structure"])
  450. for _, row in winner_top.iterrows():
  451. lines.append(
  452. f"- `{row['entry_family']} / {row['holding_bucket']}`: winners `{int(row['trades'])}`, avg_return `{_format_pct(float(row['avg_return']))}`, "
  453. f"sum_return `{_format_pct(float(row['sum_return_pct']))}`"
  454. )
  455. lines.extend(["", "## Entry / Exit Interaction Attribution"])
  456. for _, row in top_combo.iterrows():
  457. lines.append(
  458. f"- positive `{row['market_state_layer']} / {row['buy_reason']} -> {row['sell_reason']}`: trades `{int(row['trades'])}`, "
  459. f"sum_return `{_format_pct(float(row['sum_return_pct']))}`, avg_return `{_format_pct(float(row['avg_return']))}`, "
  460. f"PF `{_format_num(float(row['profit_factor']))}`"
  461. )
  462. lines.extend(["", "## Drag Interaction Pockets"])
  463. for _, row in drag_combo.iterrows():
  464. lines.append(
  465. f"- drag `{row['market_state_layer']} / {row['buy_reason']} -> {row['sell_reason']}`: trades `{int(row['trades'])}`, "
  466. f"sum_return `{_format_pct(float(row['sum_return_pct']))}`, avg_return `{_format_pct(float(row['avg_return']))}`, "
  467. f"PF `{_format_num(float(row['profit_factor']))}`"
  468. )
  469. lines.extend(
  470. [
  471. "",
  472. "## Removed-Trade Recheck",
  473. f"- removed trades vs control: `{int(len(removed_recheck))}`",
  474. f"- removed-set avg_return `{_format_pct(float(removed_recheck['return_pct'].mean()))}`",
  475. f"- removed-set win_rate `{_format_pct(float((removed_recheck['return_pct'] > 0).mean()))}`",
  476. f"- removed-set profit_factor `{removed_pf_text}`",
  477. f"- KEEP_REMOVAL `{int((removed_recheck['verdict'] == 'KEEP_REMOVAL').sum())}` | OBSERVE_REMOVAL `{int((removed_recheck['verdict'] == 'OBSERVE_REMOVAL').sum())}` | OVER_REMOVAL `{int((removed_recheck['verdict'] == 'OVER_REMOVAL').sum())}`",
  478. ]
  479. )
  480. for bucket, group in removed_recheck.groupby("veto_bucket", dropna=False):
  481. lines.append(
  482. f"- `{bucket}`: trades `{len(group)}`, avg_return `{_format_pct(float(group['return_pct'].mean()))}`, "
  483. f"avg_holding `{group['holding_days'].mean():.1f}`, avg_mfe `{_format_pct(float(group['mfe_pct'].mean()))}`"
  484. )
  485. lines.extend(
  486. [
  487. "",
  488. "## Quant Judgment",
  489. "- Core alpha remains concentrated in `glued_buy`, `early_crash_probe_buy`, and the preserved medium/long holding structure.",
  490. "- `dual_gold_resonance_buy` and `deep_oversold_rebound_buy:classic_oversold` remain support families, not the main alpha engine.",
  491. "- Weak pockets still exist in secondary rebound / weak deep-oversold variants, but they are not where the refined branch gets its headline improvement.",
  492. "- The refined branch improves mainly by deleting low-quality short glued trades; this remains explainable and not dependent on deleting profitable samples.",
  493. "- The next step should therefore move to execution-aware robustness, not back to workbook-style residual tuning.",
  494. ]
  495. )
  496. (base_dir / "dragon_refined_edge_review.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
  497. if __name__ == "__main__":
  498. main()