dragon_glued_refine_experiments.py 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. from __future__ import annotations
  2. import json
  3. from dataclasses import asdict
  4. from pathlib import Path
  5. import pandas as pd
  6. from dragon_branch_configs import alpha_first_selective_veto_config
  7. from dragon_strategy import DragonRuleEngine
  8. from dragon_strategy_config import StrategyConfig
  9. def _load_indicator_snapshot(base_dir: Path) -> pd.DataFrame:
  10. df = pd.read_csv(base_dir / "dragon_indicator_snapshot.csv", encoding="utf-8-sig")
  11. df["date"] = pd.to_datetime(df["date"])
  12. return df.set_index("date", drop=False)
  13. def _load_true_trade_events(base_dir: Path) -> pd.DataFrame:
  14. return pd.read_csv(base_dir / "true_trade_events.csv", encoding="utf-8-sig")
  15. def _profit_factor(series: pd.Series) -> float:
  16. gross_profit = series[series > 0].sum()
  17. gross_loss = -series[series < 0].sum()
  18. if gross_loss == 0:
  19. return float("inf") if gross_profit > 0 else 0.0
  20. return float(gross_profit / gross_loss)
  21. def _holding_bucket(days: int) -> str:
  22. if days <= 5:
  23. return "00-05d"
  24. if days <= 10:
  25. return "06-10d"
  26. if days <= 20:
  27. return "11-20d"
  28. if days <= 40:
  29. return "21-40d"
  30. return "41d+"
  31. def _event_match(strategy_events: pd.DataFrame, workbook_events: pd.DataFrame, side: str) -> tuple[int, int]:
  32. wb = set(workbook_events[(workbook_events["side"] == side) & (workbook_events["layer"] == "real_trade")]["date"])
  33. st = set(strategy_events[(strategy_events["side"] == side) & (strategy_events["layer"] == "real_trade")]["date"])
  34. return len(wb & st), len(st - wb)
  35. def _run_branch(
  36. label: str,
  37. config: StrategyConfig,
  38. indicator_df: pd.DataFrame,
  39. workbook_events: pd.DataFrame,
  40. first_date: str,
  41. last_date: str,
  42. ) -> tuple[dict[str, object], pd.DataFrame]:
  43. engine = DragonRuleEngine(config)
  44. events, trades = engine.run(indicator_df)
  45. events = events[(events["date"] >= first_date) & (events["date"] <= last_date)].copy()
  46. trades = trades[
  47. (trades["buy_date"] >= first_date)
  48. & (trades["buy_date"] <= last_date)
  49. & (trades["sell_date"] >= first_date)
  50. & (trades["sell_date"] <= last_date)
  51. ].copy()
  52. trades["holding_bucket"] = trades["holding_days"].astype(int).map(_holding_bucket)
  53. short = trades[trades["holding_bucket"].isin({"00-05d", "06-10d"})].copy()
  54. buy_overlap, buy_extra = _event_match(events, workbook_events, "BUY")
  55. sell_overlap, sell_extra = _event_match(events, workbook_events, "SELL")
  56. row = {
  57. "experiment": label,
  58. "trades": int(len(trades)),
  59. "win_rate": float((trades["return_pct"] > 0).mean()) if not trades.empty else float("nan"),
  60. "avg_return": float(trades["return_pct"].mean()) if not trades.empty else float("nan"),
  61. "median_return": float(trades["return_pct"].median()) if not trades.empty else float("nan"),
  62. "profit_factor": _profit_factor(trades["return_pct"]) if not trades.empty else float("nan"),
  63. "real_buy_overlap": int(buy_overlap),
  64. "real_buy_extra": int(buy_extra),
  65. "real_sell_overlap": int(sell_overlap),
  66. "real_sell_extra": int(sell_extra),
  67. "short_trade_count": int(len(short)),
  68. "short_avg_return": float(short["return_pct"].mean()) if not short.empty else float("nan"),
  69. "short_00_05d_avg_return": float(short[short["holding_bucket"] == "00-05d"]["return_pct"].mean()),
  70. "short_06_10d_avg_return": float(short[short["holding_bucket"] == "06-10d"]["return_pct"].mean()),
  71. }
  72. diff = trades[["buy_date", "sell_date", "buy_reason", "sell_reason", "holding_days", "return_pct"]].copy()
  73. diff["experiment"] = label
  74. return row, diff
  75. def main() -> None:
  76. base_dir = Path(__file__).resolve().parent
  77. indicator_df = _load_indicator_snapshot(base_dir)
  78. workbook_events = _load_true_trade_events(base_dir)
  79. first_date = workbook_events["date"].min()
  80. last_date = workbook_events["date"].max()
  81. baseline = alpha_first_selective_veto_config()
  82. experiments = [
  83. ("baseline_alpha_first", baseline),
  84. (
  85. "glued_veto_low_weak_range",
  86. baseline.with_updates(
  87. glued_selective_low_c1_min=23.0,
  88. glued_selective_low_c1_max=28.0,
  89. glued_selective_low_b1_max=0.02,
  90. ),
  91. ),
  92. (
  93. "glued_veto_hot_and_low",
  94. baseline.with_updates(
  95. glued_selective_hot_c1_min=40.0,
  96. glued_selective_hot_b1_min=0.10,
  97. glued_selective_low_c1_min=23.0,
  98. glued_selective_low_c1_max=28.0,
  99. glued_selective_low_b1_max=0.02,
  100. ),
  101. ),
  102. (
  103. "glued_veto_hot_cap75_and_low",
  104. baseline.with_updates(
  105. glued_selective_hot_c1_min=40.0,
  106. glued_selective_hot_c1_max=75.0,
  107. glued_selective_hot_b1_min=0.10,
  108. glued_selective_low_c1_min=23.0,
  109. glued_selective_low_c1_max=28.0,
  110. glued_selective_low_b1_max=0.02,
  111. ),
  112. ),
  113. ]
  114. experiment_configs = {label: config for label, config in experiments}
  115. rows: list[dict[str, object]] = []
  116. diffs: list[pd.DataFrame] = []
  117. trade_sets: dict[str, set[tuple[str, str, str, str]]] = {}
  118. for label, config in experiments:
  119. row, diff = _run_branch(label, config, indicator_df, workbook_events, first_date, last_date)
  120. rows.append(row)
  121. diffs.append(diff)
  122. trade_sets[label] = set(map(tuple, diff[["buy_date", "sell_date", "buy_reason", "sell_reason"]].values.tolist()))
  123. result_df = pd.DataFrame(rows)
  124. baseline_row = result_df[result_df["experiment"] == "baseline_alpha_first"].iloc[0]
  125. for col in [
  126. "trades",
  127. "win_rate",
  128. "avg_return",
  129. "median_return",
  130. "profit_factor",
  131. "real_buy_overlap",
  132. "real_sell_overlap",
  133. "short_trade_count",
  134. "short_avg_return",
  135. "short_00_05d_avg_return",
  136. "short_06_10d_avg_return",
  137. ]:
  138. result_df[f"delta_{col}"] = result_df[col] - baseline_row[col]
  139. diff_df = pd.concat(diffs, ignore_index=True)
  140. result_df.to_csv(base_dir / "dragon_glued_refine_experiments.csv", index=False, encoding="utf-8-sig")
  141. diff_df.to_csv(base_dir / "dragon_glued_refine_experiment_trades.csv", index=False, encoding="utf-8-sig")
  142. refined_label = "glued_veto_hot_cap75_and_low"
  143. refined_config = experiment_configs[refined_label]
  144. snapshot = asdict(refined_config)
  145. snapshot["disabled_rules"] = sorted(refined_config.disabled_rules)
  146. (base_dir / "dragon_glued_refined_candidate_config.json").write_text(
  147. json.dumps(snapshot, indent=2, ensure_ascii=False) + "\n",
  148. encoding="utf-8",
  149. )
  150. full_label = "glued_veto_hot_and_low"
  151. refined_vs_full_rows: list[dict[str, object]] = []
  152. for row in sorted(trade_sets[full_label] - trade_sets[refined_label]):
  153. refined_vs_full_rows.append(
  154. {
  155. "change_type": "removed_from_refined_vs_full",
  156. "buy_date": row[0],
  157. "sell_date": row[1],
  158. "buy_reason": row[2],
  159. "sell_reason": row[3],
  160. }
  161. )
  162. for row in sorted(trade_sets[refined_label] - trade_sets[full_label]):
  163. refined_vs_full_rows.append(
  164. {
  165. "change_type": "added_in_refined_vs_full",
  166. "buy_date": row[0],
  167. "sell_date": row[1],
  168. "buy_reason": row[2],
  169. "sell_reason": row[3],
  170. }
  171. )
  172. refined_vs_full_df = pd.DataFrame(refined_vs_full_rows)
  173. refined_vs_full_df.to_csv(
  174. base_dir / "dragon_glued_refined_trade_diff_vs_full.csv",
  175. index=False,
  176. encoding="utf-8-sig",
  177. )
  178. lines = [
  179. "# Dragon Glued Refine Experiments",
  180. "",
  181. "- Baseline branch: `alpha_first_selective_veto`.",
  182. "- Goal: verify whether the hot glued veto can be narrowed after attribution without giving back too much trade quality.",
  183. "",
  184. "## Summary",
  185. ]
  186. for _, row in result_df.iterrows():
  187. lines.append(
  188. f"- `{row['experiment']}`: trades `{int(row['trades'])}`, avg_return `{row['avg_return']:.2%}`, "
  189. f"profit_factor `{row['profit_factor']:.2f}`, short_avg_return `{row['short_avg_return']:.2%}`, "
  190. f"`00-05d` `{row['short_00_05d_avg_return']:.2%}`, `06-10d` `{row['short_06_10d_avg_return']:.2%}`, "
  191. f"real BUY / SELL `{int(row['real_buy_overlap'])}/{int(row['real_sell_overlap'])}`"
  192. )
  193. lines.extend(["", "## Delta Vs Alpha-First Baseline"])
  194. for _, row in result_df[result_df["experiment"] != "baseline_alpha_first"].iterrows():
  195. lines.append(
  196. f"- `{row['experiment']}`: delta_avg_return `{row['delta_avg_return']:.2%}`, "
  197. f"delta_profit_factor `{row['delta_profit_factor']:.2f}`, delta_short_avg_return `{row['delta_short_avg_return']:.2%}`, "
  198. f"real BUY / SELL `{int(row['real_buy_overlap'])}/{int(row['real_sell_overlap'])}`"
  199. )
  200. lines.extend(
  201. [
  202. "",
  203. "## Quant Judgment",
  204. "- `glued_veto_low_weak_range` is the clean conservative upgrade candidate if governance still prioritizes overlap preservation.",
  205. "- `glued_veto_hot_and_low` remains the strongest quality-improvement branch but may still be too aggressive on overlap.",
  206. "- `glued_veto_hot_cap75_and_low` specifically tests whether the only super-hot positive sample can be restored without giving back too much of the glued cleanup benefit.",
  207. "- Current result is stronger than expected: `glued_veto_hot_cap75_and_low` dominates the old full glued candidate on both quality and overlap.",
  208. "- Refined-vs-full trade diff is minimal and interpretable: it restores `2021-11-05 -> 2021-11-18` and removes the fallback reroute `2021-11-22 -> 2021-11-30`.",
  209. "- Candidate snapshot file: `dragon_glued_refined_candidate_config.json`.",
  210. ]
  211. )
  212. (base_dir / "dragon_glued_refine_experiments.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
  213. if __name__ == "__main__":
  214. main()