dragon_glued_refined_removed_trade_attribution.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. from __future__ import annotations
  2. from pathlib import Path
  3. from typing import Optional
  4. import pandas as pd
  5. from dragon_branch_configs import alpha_first_glued_refined_hot_cap_config, alpha_first_selective_veto_config
  6. from dragon_strategy import DragonRuleEngine
  7. START_DATE = "2016-01-01"
  8. END_DATE = "2025-12-31"
  9. def _load_csv(base_dir: Path, name: str) -> pd.DataFrame:
  10. return pd.read_csv(base_dir / name, encoding="utf-8-sig")
  11. def _load_indicator_snapshot(base_dir: Path) -> pd.DataFrame:
  12. df = _load_csv(base_dir, "dragon_indicator_snapshot.csv")
  13. df["date"] = pd.to_datetime(df["date"])
  14. return df.sort_values("date").reset_index(drop=True)
  15. def _profit_factor(series: pd.Series) -> float:
  16. gross_profit = series[series > 0].sum()
  17. gross_loss = -series[series < 0].sum()
  18. if gross_loss == 0:
  19. return float("inf") if gross_profit > 0 else 0.0
  20. return float(gross_profit / gross_loss)
  21. def _pct(value: Optional[float]) -> str:
  22. if value is None or pd.isna(value):
  23. return "n/a"
  24. if value == float("inf"):
  25. return "inf"
  26. return f"{float(value):.2%}"
  27. def _holding_bucket(days: int) -> str:
  28. if days <= 5:
  29. return "00-05d"
  30. if days <= 10:
  31. return "06-10d"
  32. if days <= 20:
  33. return "11-20d"
  34. if days <= 40:
  35. return "21-40d"
  36. return "41d+"
  37. def _build_trade_quality(trades: pd.DataFrame, indicators: pd.DataFrame) -> pd.DataFrame:
  38. trades = trades.copy()
  39. trades["holding_bucket"] = trades["holding_days"].astype(int).map(_holding_bucket)
  40. pos_lookup = {dt.date().isoformat(): idx for idx, dt in enumerate(indicators["date"])}
  41. indicator_by_date = indicators.set_index(indicators["date"].dt.date)
  42. buy_a1: list[float] = []
  43. buy_b1: list[float] = []
  44. buy_c1: list[float] = []
  45. mfe_list: list[float] = []
  46. mae_list: list[float] = []
  47. entry_forward_list: list[float] = []
  48. exit_followthrough_list: list[float] = []
  49. for _, trade in trades.iterrows():
  50. buy_date = pd.Timestamp(trade["buy_date"]).date()
  51. entry_price = float(trade["buy_price"])
  52. exit_price = float(trade["sell_price"])
  53. buy_row = indicator_by_date.loc[buy_date]
  54. buy_a1.append(float(buy_row["a1"]))
  55. buy_b1.append(float(buy_row["b1"]))
  56. buy_c1.append(float(buy_row["c1"]))
  57. buy_idx = pos_lookup[trade["buy_date"]]
  58. sell_idx = pos_lookup[trade["sell_date"]]
  59. window = indicators[
  60. (indicators["date"] >= pd.Timestamp(trade["buy_date"])) & (indicators["date"] <= pd.Timestamp(trade["sell_date"]))
  61. ]
  62. mfe_list.append(float(window["high"].max()) / entry_price - 1.0)
  63. mae_list.append(float(window["low"].min()) / entry_price - 1.0)
  64. buy_future = indicators.iloc[buy_idx + 1 : buy_idx + 6]
  65. sell_future = indicators.iloc[sell_idx + 1 : sell_idx + 6]
  66. entry_forward_list.append(float("nan") if buy_future.empty else float(buy_future["close"].iloc[-1]) / entry_price - 1.0)
  67. exit_followthrough_list.append(float("nan") if sell_future.empty else float(sell_future["low"].min()) / exit_price - 1.0)
  68. trades["buy_a1"] = buy_a1
  69. trades["buy_b1"] = buy_b1
  70. trades["buy_c1"] = buy_c1
  71. trades["mfe_pct"] = mfe_list
  72. trades["mae_pct"] = mae_list
  73. trades["entry_forward_5d_pct"] = entry_forward_list
  74. trades["exit_followthrough_5d_pct"] = exit_followthrough_list
  75. return trades
  76. def _run_branch(indicators: pd.DataFrame, config) -> pd.DataFrame:
  77. indexed = indicators.set_index("date", drop=False)
  78. engine = DragonRuleEngine(config=config)
  79. _, trades = engine.run(indexed)
  80. trades = trades[
  81. (trades["buy_date"] >= START_DATE)
  82. & (trades["buy_date"] <= END_DATE)
  83. & (trades["sell_date"] >= START_DATE)
  84. & (trades["sell_date"] <= END_DATE)
  85. ].copy()
  86. return _build_trade_quality(trades.copy(), indicators)
  87. def _trade_key(df: pd.DataFrame) -> set[tuple[str, str, str, str]]:
  88. return set(zip(df["buy_date"], df["sell_date"], df["buy_reason"], df["sell_reason"]))
  89. def _veto_bucket(c1: float, b1: float) -> str:
  90. if 23 <= c1 < 28 and b1 <= 0.02:
  91. return "low_weak_range"
  92. if 40 <= c1 < 75 and b1 >= 0.10:
  93. return "hot_positive_b1_cap75"
  94. return "other"
  95. def _recommendation(row: pd.Series) -> tuple[str, str]:
  96. ret = float(row["return_pct"])
  97. mfe = float(row["mfe_pct"])
  98. holding = int(row["holding_days"])
  99. replacement_ret = row.get("replacement_return_pct")
  100. if ret < 0 and holding <= 10 and float(row["exit_followthrough_5d_pct"]) <= 0:
  101. return "KEEP_REMOVAL", "Removed trade is a short loser and price still weakens after exit."
  102. if ret < 0 and mfe <= 0.02:
  103. return "KEEP_REMOVAL", "Removed trade never developed enough profit room to defend its inclusion."
  104. if ret > 0:
  105. if ret <= 0.01 and pd.notna(replacement_ret) and float(replacement_ret) <= ret:
  106. return "OBSERVE_REMOVAL", "Removed trade is only a micro-winner and the replacement path is not stronger."
  107. return "OVER_REMOVAL", "Removed trade keeps meaningful alpha and should not be discarded without a better replacement."
  108. return "KEEP_REMOVAL", "Removed trade remains a weak short-holding sample under the alpha-first objective."
  109. def main() -> None:
  110. base_dir = Path(__file__).resolve().parent
  111. indicators = _load_indicator_snapshot(base_dir)
  112. workbook_events = _load_csv(base_dir, "true_trade_events.csv")
  113. alpha_trades = _run_branch(indicators, alpha_first_selective_veto_config())
  114. refined_trades = _run_branch(indicators, alpha_first_glued_refined_hot_cap_config())
  115. workbook_buy = set(workbook_events[(workbook_events["layer"] == "real_trade") & (workbook_events["side"] == "BUY")]["date"])
  116. workbook_sell = set(workbook_events[(workbook_events["layer"] == "real_trade") & (workbook_events["side"] == "SELL")]["date"])
  117. removed = pd.DataFrame(
  118. sorted(_trade_key(alpha_trades) - _trade_key(refined_trades)),
  119. columns=["buy_date", "sell_date", "buy_reason", "sell_reason"],
  120. )
  121. rows: list[dict[str, object]] = []
  122. for _, removed_row in removed.iterrows():
  123. trade = alpha_trades[
  124. (alpha_trades["buy_date"] == removed_row["buy_date"])
  125. & (alpha_trades["sell_date"] == removed_row["sell_date"])
  126. & (alpha_trades["buy_reason"] == removed_row["buy_reason"])
  127. & (alpha_trades["sell_reason"] == removed_row["sell_reason"])
  128. ].iloc[0]
  129. sell_dt = pd.Timestamp(trade["sell_date"])
  130. replacement = refined_trades[
  131. (pd.to_datetime(refined_trades["buy_date"]) > sell_dt)
  132. & (pd.to_datetime(refined_trades["buy_date"]) <= sell_dt + pd.Timedelta(days=10))
  133. ].sort_values("buy_date")
  134. replacement_row = replacement.iloc[0] if not replacement.empty else None
  135. recommendation, reason = _recommendation(
  136. pd.Series(
  137. {
  138. **trade.to_dict(),
  139. "replacement_return_pct": None if replacement_row is None else float(replacement_row["return_pct"]),
  140. }
  141. )
  142. )
  143. rows.append(
  144. {
  145. "buy_date": trade["buy_date"],
  146. "sell_date": trade["sell_date"],
  147. "buy_reason": trade["buy_reason"],
  148. "sell_reason": trade["sell_reason"],
  149. "veto_bucket": _veto_bucket(float(trade["buy_c1"]), float(trade["buy_b1"])),
  150. "holding_bucket": trade["holding_bucket"],
  151. "holding_days": int(trade["holding_days"]),
  152. "return_pct": float(trade["return_pct"]),
  153. "mfe_pct": float(trade["mfe_pct"]),
  154. "mae_pct": float(trade["mae_pct"]),
  155. "entry_forward_5d_pct": float(trade["entry_forward_5d_pct"]),
  156. "exit_followthrough_5d_pct": float(trade["exit_followthrough_5d_pct"]),
  157. "buy_a1": float(trade["buy_a1"]),
  158. "buy_b1": float(trade["buy_b1"]),
  159. "buy_c1": float(trade["buy_c1"]),
  160. "buy_aligned_with_workbook": trade["buy_date"] in workbook_buy,
  161. "sell_aligned_with_workbook": trade["sell_date"] in workbook_sell,
  162. "replacement_buy_date": "" if replacement_row is None else str(replacement_row["buy_date"]),
  163. "replacement_sell_date": "" if replacement_row is None else str(replacement_row["sell_date"]),
  164. "replacement_buy_reason": "" if replacement_row is None else str(replacement_row["buy_reason"]),
  165. "replacement_sell_reason": "" if replacement_row is None else str(replacement_row["sell_reason"]),
  166. "replacement_return_pct": float("nan") if replacement_row is None else float(replacement_row["return_pct"]),
  167. "replacement_gap_days": float("nan")
  168. if replacement_row is None
  169. else int((pd.Timestamp(replacement_row["buy_date"]) - sell_dt).days),
  170. "recommendation": recommendation,
  171. "recommendation_reason": reason,
  172. }
  173. )
  174. attribution = pd.DataFrame(rows).sort_values(["veto_bucket", "buy_date"]).reset_index(drop=True)
  175. attribution.to_csv(base_dir / "dragon_glued_refined_removed_trade_attribution.csv", index=False, encoding="utf-8-sig")
  176. pf = _profit_factor(attribution["return_pct"])
  177. pf_text = "inf" if pf == float("inf") else f"{pf:.2f}"
  178. lines = [
  179. "# Dragon Glued Refined Removed-Trade Review",
  180. "",
  181. "## Snapshot",
  182. f"- removed trades vs current alpha-first: `{len(attribution)}`",
  183. f"- avg_return of removed set: `{_pct(float(attribution['return_pct'].mean()))}`",
  184. f"- win_rate of removed set: `{_pct(float((attribution['return_pct'] > 0).mean()))}`",
  185. f"- profit_factor of removed set: `{pf_text}`",
  186. "",
  187. "## Recommendation Mix",
  188. f"- KEEP_REMOVAL: `{int((attribution['recommendation'] == 'KEEP_REMOVAL').sum())}`",
  189. f"- OBSERVE_REMOVAL: `{int((attribution['recommendation'] == 'OBSERVE_REMOVAL').sum())}`",
  190. f"- OVER_REMOVAL: `{int((attribution['recommendation'] == 'OVER_REMOVAL').sum())}`",
  191. "",
  192. "## Bucket View",
  193. ]
  194. for bucket, group in attribution.groupby("veto_bucket", dropna=False):
  195. lines.append(
  196. f"- `{bucket}`: trades `{len(group)}`, avg_return `{_pct(float(group['return_pct'].mean()))}`, "
  197. f"win_rate `{_pct(float((group['return_pct'] > 0).mean()))}`, avg_mfe `{_pct(float(group['mfe_pct'].mean()))}`, "
  198. f"avg_mae `{_pct(float(group['mae_pct'].mean()))}`"
  199. )
  200. lines.extend(
  201. [
  202. "",
  203. "## Quant Judgment",
  204. "- The refined branch mostly removes weak short-holding glued trades rather than medium-quality alpha trades.",
  205. "- If this review remains dominated by KEEP_REMOVAL and contains no meaningful OVER_REMOVAL bucket, the branch is structurally explainable rather than a black-box overfit.",
  206. "",
  207. "## Detailed Cards",
  208. ]
  209. )
  210. for _, row in attribution.iterrows():
  211. replacement = "none"
  212. if isinstance(row["replacement_buy_date"], str) and row["replacement_buy_date"]:
  213. replacement = (
  214. f"{row['replacement_buy_date']} -> {row['replacement_sell_date']} / "
  215. f"{row['replacement_buy_reason']} -> {row['replacement_sell_reason']} / "
  216. f"{_pct(row['replacement_return_pct'])}"
  217. )
  218. lines.extend(
  219. [
  220. f"### {row['buy_date']} -> {row['sell_date']}",
  221. f"- Bucket: `{row['veto_bucket']}` | holding `{row['holding_bucket']}`",
  222. f"- Trade: `{row['buy_reason']} -> {row['sell_reason']}` | return `{_pct(row['return_pct'])}` | holding `{int(row['holding_days'])}` days",
  223. f"- MFE / MAE: `{_pct(row['mfe_pct'])}` / `{_pct(row['mae_pct'])}`",
  224. f"- Entry 5d / Exit followthrough 5d: `{_pct(row['entry_forward_5d_pct'])}` / `{_pct(row['exit_followthrough_5d_pct'])}`",
  225. f"- Entry indicators: `a1={float(row['buy_a1']):.4f}` `b1={float(row['buy_b1']):.4f}` `c1={float(row['buy_c1']):.2f}`",
  226. f"- Workbook aligned: buy `{bool(row['buy_aligned_with_workbook'])}` / sell `{bool(row['sell_aligned_with_workbook'])}`",
  227. f"- Replacement path within 10d: `{replacement}`",
  228. f"- Recommendation: `{row['recommendation']}` | {row['recommendation_reason']}",
  229. "",
  230. ]
  231. )
  232. (base_dir / "dragon_glued_refined_removed_trade_review.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
  233. if __name__ == "__main__":
  234. main()