dragon_system_review.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. from __future__ import annotations
  2. import json
  3. from dataclasses import dataclass
  4. from pathlib import Path
  5. from typing import Iterable
  6. import pandas as pd
  7. @dataclass(frozen=True)
  8. class WindowScript:
  9. path: str
  10. scope: str
  11. category: str
  12. expectation: str
  13. WINDOW_SCRIPTS = [
  14. WindowScript("dragon_backtest.py", "workbook window", "evaluation", "dual_bound"),
  15. WindowScript("dragon_cost_stress_test.py", "fixed release window", "evaluation", "dual_bound"),
  16. WindowScript("dragon_deep_oversold_confirmation_experiments.py", "workbook window", "experiment", "dual_bound"),
  17. WindowScript("dragon_deep_oversold_experiments.py", "workbook window", "experiment", "dual_bound"),
  18. WindowScript("dragon_deep_oversold_selective_veto_experiments.py", "workbook window", "experiment", "dual_bound"),
  19. WindowScript("dragon_equity_curve_review.py", "fixed release window", "evaluation", "dual_bound"),
  20. WindowScript("dragon_glued_alpha_candidate.py", "workbook window", "evaluation", "dual_bound"),
  21. WindowScript("dragon_glued_refined_branch_review.py", "hybrid bounded window", "evaluation", "dual_bound"),
  22. WindowScript("dragon_glued_refined_removed_trade_attribution.py", "fixed release window", "attribution", "dual_bound"),
  23. WindowScript("dragon_glued_refined_sensitivity.py", "fixed release window", "evaluation", "dual_bound"),
  24. WindowScript("dragon_glued_refine_experiments.py", "workbook window", "experiment", "dual_bound"),
  25. WindowScript("dragon_predictive_break_experiments.py", "workbook window", "experiment", "dual_bound"),
  26. WindowScript("dragon_rc1_release.py", "fixed release window", "release", "dual_bound"),
  27. WindowScript("dragon_refined_alpha_attribution.py", "fixed release window", "attribution", "dual_bound"),
  28. WindowScript("dragon_refined_execution_validation.py", "fixed release window", "evaluation", "dual_bound"),
  29. WindowScript("dragon_rule_ablation.py", "workbook window", "evaluation", "dual_bound"),
  30. WindowScript("dragon_short_holding_audit.py", "workbook window", "audit", "dual_bound"),
  31. WindowScript("dragon_short_holding_experiments.py", "workbook window", "experiment", "dual_bound"),
  32. WindowScript("dragon_strategy_overview.py", "fixed release window", "overview", "dual_bound"),
  33. WindowScript("dragon_threshold_perturbation.py", "workbook window", "evaluation", "dual_bound"),
  34. WindowScript("dragon_daily_signal_pipeline.py", "live / forward window", "live", "live_exception"),
  35. ]
  36. def _load_csv(path: Path) -> pd.DataFrame:
  37. return pd.DataFrame() if not path.exists() else pd.read_csv(path, encoding="utf-8-sig")
  38. def _fmt_pct(value: object) -> str:
  39. if value is None or pd.isna(value):
  40. return "NA"
  41. return f"{float(value):.2%}"
  42. def _fmt_num(value: object, digits: int = 2) -> str:
  43. if value is None or pd.isna(value):
  44. return "NA"
  45. return f"{float(value):.{digits}f}"
  46. def _window_filter_status(text: str, expectation: str) -> tuple[str, str]:
  47. has_buy = 'trades["buy_date"]' in text
  48. has_sell = 'trades["sell_date"]' in text
  49. if expectation == "live_exception":
  50. return "EXEMPT", "Live / forward pipeline intentionally keeps open-ended trade coverage."
  51. if has_buy and has_sell:
  52. return "PASS", "Trade filter constrains both buy_date and sell_date."
  53. if has_buy and not has_sell:
  54. return "FAIL", "Trade filter constrains buy_date only."
  55. return "FAIL", "No recognizable trade-window filter found."
  56. def build_window_consistency_report(base_dir: Path) -> pd.DataFrame:
  57. rows: list[dict[str, object]] = []
  58. for item in WINDOW_SCRIPTS:
  59. text = (base_dir / item.path).read_text(encoding="utf-8")
  60. status, note = _window_filter_status(text, item.expectation)
  61. rows.append(
  62. {
  63. "script": item.path,
  64. "scope": item.scope,
  65. "category": item.category,
  66. "expectation": item.expectation,
  67. "status": status,
  68. "note": note,
  69. }
  70. )
  71. df = pd.DataFrame(rows)
  72. passed = int((df["status"] == "PASS").sum())
  73. exempt = int((df["status"] == "EXEMPT").sum())
  74. failed = int((df["status"] == "FAIL").sum())
  75. lines = [
  76. "# Dragon Review - Window Consistency",
  77. "",
  78. "## Scope",
  79. "- Goal: verify that in-sample / workbook-window trade statistics do not keep window-external exits.",
  80. "- Rule: for bounded research windows, trade filters must constrain both `buy_date` and `sell_date`.",
  81. "",
  82. "## Summary",
  83. f"- PASS: `{passed}`",
  84. f"- EXEMPT: `{exempt}`",
  85. f"- FAIL: `{failed}`",
  86. "",
  87. "## Result Table",
  88. ]
  89. for _, row in df.iterrows():
  90. lines.append(
  91. f"- `{row['script']}` | {row['category']} | {row['scope']} | `{row['status']}` | {row['note']}"
  92. )
  93. if failed == 0:
  94. lines.extend(
  95. [
  96. "",
  97. "## Judgment",
  98. "- The bounded research/evaluation pack is now on a consistent dual-bound trade-window rule.",
  99. "- `dragon_daily_signal_pipeline.py` remains intentionally exempt because it serves the live / forward chain rather than workbook-window evaluation.",
  100. ]
  101. )
  102. (base_dir / "dragon_review_window_consistency.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
  103. return df
  104. def _branch_source_frames(base_dir: Path) -> dict[str, pd.DataFrame]:
  105. frames: dict[str, pd.DataFrame] = {}
  106. overview = _load_csv(base_dir / "dragon_strategy_overview.csv")
  107. if not overview.empty:
  108. frames["dragon_strategy_overview.csv"] = overview
  109. glued = _load_csv(base_dir / "dragon_glued_refined_branch_summary.csv")
  110. if not glued.empty:
  111. frames["dragon_glued_refined_branch_summary.csv"] = glued
  112. alpha = _load_csv(base_dir / "dragon_alpha_first_branch_summary.csv")
  113. if not alpha.empty:
  114. frames["dragon_alpha_first_branch_summary.csv"] = alpha
  115. return frames
  116. def _iter_metric_values(base_dir: Path) -> Iterable[dict[str, object]]:
  117. metrics = [
  118. "trades",
  119. "win_rate",
  120. "avg_return",
  121. "median_return",
  122. "profit_factor",
  123. "compounded_return",
  124. "cagr",
  125. "real_buy_overlap",
  126. "real_sell_overlap",
  127. ]
  128. for source, df in _branch_source_frames(base_dir).items():
  129. for _, row in df.iterrows():
  130. branch = str(row["branch"])
  131. for metric in metrics:
  132. if metric in row.index:
  133. yield {
  134. "source": source,
  135. "branch": branch,
  136. "metric": metric,
  137. "value": row[metric],
  138. }
  139. rc1_path = base_dir / "dragon_rc1_config_snapshot.json"
  140. if rc1_path.exists():
  141. payload = json.loads(rc1_path.read_text(encoding="utf-8"))
  142. for metric, source_metric in [
  143. ("trades", "trade_count"),
  144. ("win_rate", "win_rate"),
  145. ("avg_return", "avg_return"),
  146. ("median_return", "median_return"),
  147. ("profit_factor", "profit_factor"),
  148. ("compounded_return", "compounded_return"),
  149. ("cagr", "cagr"),
  150. ]:
  151. yield {
  152. "source": "dragon_rc1_config_snapshot.json",
  153. "branch": str(payload["branch_name"]),
  154. "metric": metric,
  155. "value": payload[source_metric],
  156. }
  157. def build_branch_metric_consistency(base_dir: Path) -> pd.DataFrame:
  158. raw = pd.DataFrame(list(_iter_metric_values(base_dir)))
  159. if raw.empty:
  160. raw.to_csv(base_dir / "dragon_review_branch_metric_consistency.csv", index=False, encoding="utf-8-sig")
  161. (base_dir / "dragon_review_branch_metric_consistency.md").write_text(
  162. "# Dragon Review - Branch Metric Consistency\n\n- No source data found.\n",
  163. encoding="utf-8",
  164. )
  165. return raw
  166. raw["value"] = pd.to_numeric(raw["value"], errors="coerce")
  167. pivot = raw.pivot_table(index=["branch", "metric"], columns="source", values="value", aggfunc="last").reset_index()
  168. source_cols = [col for col in pivot.columns if col not in {"branch", "metric"}]
  169. records: list[dict[str, object]] = []
  170. for _, row in pivot.iterrows():
  171. values = [row[col] for col in source_cols if pd.notna(row[col])]
  172. max_abs_diff = float(max(values) - min(values)) if values else float("nan")
  173. if len(values) <= 1:
  174. status = "single_source"
  175. else:
  176. tol = 1e-12 if row["metric"] in {"trades", "real_buy_overlap", "real_sell_overlap"} else 1e-9
  177. status = "match" if max_abs_diff <= tol else "mismatch"
  178. rec = {
  179. "branch": row["branch"],
  180. "metric": row["metric"],
  181. "source_count": int(len(values)),
  182. "min_value": float(min(values)) if values else float("nan"),
  183. "max_value": float(max(values)) if values else float("nan"),
  184. "max_abs_diff": max_abs_diff,
  185. "status": status,
  186. }
  187. for col in source_cols:
  188. rec[col] = row[col]
  189. records.append(rec)
  190. result = pd.DataFrame(records).sort_values(["branch", "metric"]).reset_index(drop=True)
  191. result.to_csv(base_dir / "dragon_review_branch_metric_consistency.csv", index=False, encoding="utf-8-sig")
  192. mismatches = result[result["status"] == "mismatch"].copy()
  193. lines = [
  194. "# Dragon Review - Branch Metric Consistency",
  195. "",
  196. "## Scope",
  197. "- Sources compared:",
  198. "- `dragon_strategy_overview.csv`",
  199. "- `dragon_glued_refined_branch_summary.csv`",
  200. "- `dragon_alpha_first_branch_summary.csv`",
  201. "- `dragon_rc1_config_snapshot.json`",
  202. "",
  203. f"- Compared rows: `{len(result)}`",
  204. f"- Mismatches: `{len(mismatches)}`",
  205. "",
  206. ]
  207. if mismatches.empty:
  208. lines.append("- All compared branch metrics are consistent across current outputs.")
  209. else:
  210. lines.extend(["## Mismatches"])
  211. for _, row in mismatches.iterrows():
  212. parts = []
  213. for col in source_cols:
  214. if pd.notna(row[col]):
  215. parts.append(f"{col}={row[col]}")
  216. lines.append(
  217. f"- `{row['branch']}` / `{row['metric']}` | spread `{row['max_abs_diff']}` | " + "; ".join(parts)
  218. )
  219. lines.extend(
  220. [
  221. "",
  222. "## Judgment",
  223. "- `match` means the same branch/metric agrees across all currently available source files.",
  224. "- `mismatch` means at least one report family is using a different metric definition or evaluation window.",
  225. "- `single_source` means only one current artifact exposes that metric, so cross-check confidence is lower.",
  226. ]
  227. )
  228. (base_dir / "dragon_review_branch_metric_consistency.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
  229. return result
  230. def _calc_removed_trade_over_removal_count(base_dir: Path) -> float:
  231. df = _load_csv(base_dir / "dragon_glued_refined_removed_trade_attribution.csv")
  232. if df.empty or "recommendation" not in df.columns:
  233. return float("nan")
  234. return float((df["recommendation"].astype(str) == "OVER_REMOVAL").sum())
  235. def _calc_local_sensitivity_robust_case_count(base_dir: Path) -> float:
  236. df = _load_csv(base_dir / "dragon_glued_refined_sensitivity.csv")
  237. if df.empty or "label" not in df.columns:
  238. return float("nan")
  239. candidate = df[df["label"] == "refined_candidate_baseline"].copy()
  240. if candidate.empty:
  241. return float("nan")
  242. candidate_row = candidate.iloc[0]
  243. neighborhood = df[~df["label"].isin(["current_alpha_control", "refined_candidate_baseline"])].copy()
  244. robust = neighborhood[
  245. (neighborhood["avg_return"] >= float(candidate_row["avg_return"]) - 0.0015)
  246. & (neighborhood["profit_factor"] >= float(candidate_row["profit_factor"]) - 0.20)
  247. & (neighborhood["real_buy_overlap"] >= int(candidate_row["real_buy_overlap"]) - 1)
  248. & (neighborhood["real_sell_overlap"] >= int(candidate_row["real_sell_overlap"]) - 1)
  249. ]
  250. return float(len(robust))
  251. def build_execution_monitor_review(base_dir: Path) -> None:
  252. monitor = _load_csv(base_dir / "dragon_daily_monitor_snapshot.csv")
  253. weekly = _load_csv(base_dir / "dragon_forward_weekly_summary.csv")
  254. html_text = (base_dir / "dragon_forward_weekly_review.html").read_text(encoding="utf-8")
  255. daily_text = (base_dir / "dragon_daily_signal_pipeline.py").read_text(encoding="utf-8")
  256. exec_text = (base_dir / "dragon_refined_execution_validation.py").read_text(encoding="utf-8")
  257. actual_removed = _calc_removed_trade_over_removal_count(base_dir)
  258. actual_robust = _calc_local_sensitivity_robust_case_count(base_dir)
  259. monitor_map = {
  260. str(row["metric"]): float(row["actual_value"])
  261. for _, row in monitor.iterrows()
  262. if pd.notna(row["actual_value"])
  263. }
  264. removed_match = pd.notna(actual_removed) and abs(monitor_map.get("removed_trade_over_removal_count", float("nan")) - actual_removed) < 1e-12
  265. robust_match = pd.notna(actual_robust) and abs(monitor_map.get("local_sensitivity_robust_case_count", float("nan")) - actual_robust) < 1e-12
  266. daily_uses_nan = 'float("nan") if buy_next is None' in daily_text and 'float("nan") if sell_next is None' in daily_text
  267. exec_uses_nan = 'float("nan") if buy_next is None' in exec_text and 'float("nan") if sell_next is None' in exec_text
  268. weekly_has_system_monitor = not weekly.empty and "system_monitor" in set(weekly["branch"].astype(str))
  269. weekly_html_has_system_monitor = "system_monitor" in html_text
  270. lines = [
  271. "# Dragon Review - Execution And Monitor Consistency",
  272. "",
  273. "## Governance Metrics",
  274. f"- `removed_trade_over_removal_count`: monitor `{monitor_map.get('removed_trade_over_removal_count')}` vs source-derived `{actual_removed}` -> `{'match' if removed_match else 'mismatch'}`",
  275. f"- `local_sensitivity_robust_case_count`: monitor `{monitor_map.get('local_sensitivity_robust_case_count')}` vs source-derived `{actual_robust}` -> `{'match' if robust_match else 'mismatch'}`",
  276. "",
  277. "## Execution Fallback Rule",
  278. f"- `dragon_daily_signal_pipeline.py` uses NaN on missing next-bar execution prices: `{daily_uses_nan}`",
  279. f"- `dragon_refined_execution_validation.py` uses NaN on missing next-bar execution prices: `{exec_uses_nan}`",
  280. "",
  281. "## Weekly Monitor Separation",
  282. f"- `dragon_forward_weekly_summary.csv` includes `system_monitor` row: `{weekly_has_system_monitor}`",
  283. f"- `dragon_forward_weekly_review.html` includes `system_monitor` text: `{weekly_html_has_system_monitor}`",
  284. "",
  285. "## Judgment",
  286. "- The monitor chain is trustworthy only if governance metrics are derived from current source artifacts rather than hard-coded constants.",
  287. "- The execution-aware chain is trustworthy only if missing next-bar prices do not silently fall back to same-bar close.",
  288. "- Weekly summary is cleaner now because branch rows and system-level monitor counts are separated.",
  289. ]
  290. (base_dir / "dragon_review_execution_monitor.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
  291. def build_reporting_integrity_review(base_dir: Path) -> None:
  292. latest_bar = json.loads((base_dir / "dragon_forward_observation_state.json").read_text(encoding="utf-8")).get("latest_bar_date", "latest")
  293. files_to_check = [
  294. base_dir / "dragon_reports_index.html",
  295. base_dir / "dragon_daily_signal_report.html",
  296. base_dir / "dragon_forward_weekly_review.html",
  297. base_dir / "dragon_historical_trade_details.html",
  298. base_dir / "dragon_indicator_strategy_guide_cn.html",
  299. base_dir / "html_reports" / "index.html",
  300. base_dir / "html_reports" / f"dragon_daily_signal_report_{latest_bar}.html",
  301. base_dir / "html_reports" / f"dragon_forward_weekly_review_{latest_bar}.html",
  302. base_dir / "html_reports" / f"dragon_historical_trade_details_{latest_bar}.html",
  303. ]
  304. existence_rows = [{"path": str(path.relative_to(base_dir)), "exists": path.exists()} for path in files_to_check]
  305. index_text = (base_dir / "dragon_reports_index.html").read_text(encoding="utf-8")
  306. archive_index_text = (base_dir / "html_reports" / "index.html").read_text(encoding="utf-8")
  307. detail_text = (base_dir / "dragon_historical_trade_details.html").read_text(encoding="utf-8")
  308. daily_text = (base_dir / "dragon_daily_signal_report.html").read_text(encoding="utf-8")
  309. checks = [
  310. ("root index links to root daily report", 'href="dragon_daily_signal_report.html"' in index_text),
  311. (
  312. "root index links to archived daily report",
  313. f'href="html_reports/dragon_daily_signal_report_{latest_bar}.html"' in index_text,
  314. ),
  315. (
  316. "archive index links locally inside html_reports",
  317. f'href="dragon_daily_signal_report_{latest_bar}.html"' in archive_index_text,
  318. ),
  319. ("detail page contains snapshot summary strip", "snapshot-summary" in detail_text),
  320. ("detail page contains event summary labels", "总事件" in detail_text and "前一条:" in detail_text),
  321. ("detail page contains query filters", "branch-filter" in detail_text and "keyword-filter" in detail_text),
  322. ("daily report links to historical detail page", 'href="dragon_historical_trade_details.html"' in daily_text),
  323. ]
  324. lines = [
  325. "# Dragon Review - Reporting Integrity",
  326. "",
  327. "## File Existence",
  328. ]
  329. for row in existence_rows:
  330. lines.append(f"- `{row['path']}` -> `{row['exists']}`")
  331. lines.extend(["", "## Link / Feature Checks"])
  332. for label, passed in checks:
  333. lines.append(f"- {label}: `{passed}`")
  334. lines.extend(
  335. [
  336. "",
  337. "## Judgment",
  338. "- Root and archive HTML outputs are present and linked through the expected root-vs-archive relative paths.",
  339. "- Historical detail reporting currently includes the new indicator snapshot event-summary strip and deep-link filter controls.",
  340. "- Terminal mojibake remains a shell-display issue; these checks only validate file presence and embedded text markers, not browser rendering fidelity.",
  341. ]
  342. )
  343. (base_dir / "dragon_review_reporting_integrity.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
  344. def build_system_final(base_dir: Path, branch_consistency: pd.DataFrame) -> None:
  345. mismatch_branches = branch_consistency[branch_consistency["status"] == "mismatch"].copy()
  346. trusted_direct = [
  347. "dragon_alpha_first_branch_summary.csv",
  348. "dragon_glued_refined_branch_summary.csv",
  349. "dragon_rc1_config_snapshot.json",
  350. "dragon_daily_monitor_snapshot.csv",
  351. ]
  352. if mismatch_branches.empty:
  353. trusted_direct.append("dragon_strategy_overview.csv")
  354. trusted_labeled = [
  355. "dragon_refined_execution_stress.csv",
  356. "dragon_cost_stress_test.csv",
  357. "dragon_forward_weekly_summary.csv",
  358. "dragon_historical_trade_details.html",
  359. ]
  360. not_recommended = []
  361. if not mismatch_branches.empty:
  362. not_recommended.append("dragon_strategy_overview.csv")
  363. lines = [
  364. "# Dragon System Review Final",
  365. "",
  366. "## Overall Judgment",
  367. "- The current workspace is much closer to a trustworthy research pack after the window-consistency sweep and monitor-fallback fixes.",
  368. "",
  369. "## Trust Tiers",
  370. "- 可信可直接使用:",
  371. ]
  372. if mismatch_branches.empty:
  373. lines.insert(4, "- Cross-report headline metrics are currently aligned across the main branch-summary, release, and overview artifacts.")
  374. lines.insert(5, "- The remaining cautions are about report interpretation and forward monitoring, not internal metric drift.")
  375. else:
  376. lines.insert(4, "- The main remaining risk is not strategy logic but metric-definition drift between a few report families.")
  377. lines.extend([f"- `{name}`" for name in trusted_direct])
  378. lines.append("- 可信但需标注口径:")
  379. lines.extend([f"- `{name}`" for name in trusted_labeled])
  380. lines.append("- 暂不建议直接引用:")
  381. if not_recommended:
  382. lines.extend([f"- `{name}`" for name in not_recommended])
  383. else:
  384. lines.append("- `none`")
  385. if not mismatch_branches.empty:
  386. lines.extend(["", "## Remaining Review Findings"])
  387. for _, row in mismatch_branches.iterrows():
  388. lines.append(
  389. f"- `{row['branch']}` / `{row['metric']}` remains inconsistent across report families; see `dragon_review_branch_metric_consistency.csv`."
  390. )
  391. lines.extend(
  392. [
  393. "",
  394. "## Practical Meaning",
  395. "- Use the branch-specific summary/release artifacts as the primary basis for governance decisions.",
  396. "- Use the consistency reports as an audit trail before external distribution of top-line metrics.",
  397. "- `dragon_strategy_overview.csv` is now aligned with the main branch artifacts and can be used as the compact comparison view.",
  398. ]
  399. )
  400. (base_dir / "dragon_system_review_final.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
  401. def main() -> None:
  402. base_dir = Path(__file__).resolve().parent
  403. build_window_consistency_report(base_dir)
  404. branch_consistency = build_branch_metric_consistency(base_dir)
  405. build_execution_monitor_review(base_dir)
  406. build_reporting_integrity_review(base_dir)
  407. build_system_final(base_dir, branch_consistency)
  408. if __name__ == "__main__":
  409. main()