| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455 |
- from __future__ import annotations
- import json
- from dataclasses import dataclass
- from pathlib import Path
- from typing import Iterable
- import pandas as pd
- @dataclass(frozen=True)
- class WindowScript:
- path: str
- scope: str
- category: str
- expectation: str
- WINDOW_SCRIPTS = [
- WindowScript("dragon_backtest.py", "workbook window", "evaluation", "dual_bound"),
- WindowScript("dragon_cost_stress_test.py", "fixed release window", "evaluation", "dual_bound"),
- WindowScript("dragon_deep_oversold_confirmation_experiments.py", "workbook window", "experiment", "dual_bound"),
- WindowScript("dragon_deep_oversold_experiments.py", "workbook window", "experiment", "dual_bound"),
- WindowScript("dragon_deep_oversold_selective_veto_experiments.py", "workbook window", "experiment", "dual_bound"),
- WindowScript("dragon_equity_curve_review.py", "fixed release window", "evaluation", "dual_bound"),
- WindowScript("dragon_glued_alpha_candidate.py", "workbook window", "evaluation", "dual_bound"),
- WindowScript("dragon_glued_refined_branch_review.py", "hybrid bounded window", "evaluation", "dual_bound"),
- WindowScript("dragon_glued_refined_removed_trade_attribution.py", "fixed release window", "attribution", "dual_bound"),
- WindowScript("dragon_glued_refined_sensitivity.py", "fixed release window", "evaluation", "dual_bound"),
- WindowScript("dragon_glued_refine_experiments.py", "workbook window", "experiment", "dual_bound"),
- WindowScript("dragon_predictive_break_experiments.py", "workbook window", "experiment", "dual_bound"),
- WindowScript("dragon_rc1_release.py", "fixed release window", "release", "dual_bound"),
- WindowScript("dragon_refined_alpha_attribution.py", "fixed release window", "attribution", "dual_bound"),
- WindowScript("dragon_refined_execution_validation.py", "fixed release window", "evaluation", "dual_bound"),
- WindowScript("dragon_rule_ablation.py", "workbook window", "evaluation", "dual_bound"),
- WindowScript("dragon_short_holding_audit.py", "workbook window", "audit", "dual_bound"),
- WindowScript("dragon_short_holding_experiments.py", "workbook window", "experiment", "dual_bound"),
- WindowScript("dragon_strategy_overview.py", "fixed release window", "overview", "dual_bound"),
- WindowScript("dragon_threshold_perturbation.py", "workbook window", "evaluation", "dual_bound"),
- WindowScript("dragon_daily_signal_pipeline.py", "live / forward window", "live", "live_exception"),
- ]
- def _load_csv(path: Path) -> pd.DataFrame:
- return pd.DataFrame() if not path.exists() else pd.read_csv(path, encoding="utf-8-sig")
- def _fmt_pct(value: object) -> str:
- if value is None or pd.isna(value):
- return "NA"
- return f"{float(value):.2%}"
- def _fmt_num(value: object, digits: int = 2) -> str:
- if value is None or pd.isna(value):
- return "NA"
- return f"{float(value):.{digits}f}"
- def _window_filter_status(text: str, expectation: str) -> tuple[str, str]:
- has_buy = 'trades["buy_date"]' in text
- has_sell = 'trades["sell_date"]' in text
- if expectation == "live_exception":
- return "EXEMPT", "Live / forward pipeline intentionally keeps open-ended trade coverage."
- if has_buy and has_sell:
- return "PASS", "Trade filter constrains both buy_date and sell_date."
- if has_buy and not has_sell:
- return "FAIL", "Trade filter constrains buy_date only."
- return "FAIL", "No recognizable trade-window filter found."
- def build_window_consistency_report(base_dir: Path) -> pd.DataFrame:
- rows: list[dict[str, object]] = []
- for item in WINDOW_SCRIPTS:
- text = (base_dir / item.path).read_text(encoding="utf-8")
- status, note = _window_filter_status(text, item.expectation)
- rows.append(
- {
- "script": item.path,
- "scope": item.scope,
- "category": item.category,
- "expectation": item.expectation,
- "status": status,
- "note": note,
- }
- )
- df = pd.DataFrame(rows)
- passed = int((df["status"] == "PASS").sum())
- exempt = int((df["status"] == "EXEMPT").sum())
- failed = int((df["status"] == "FAIL").sum())
- lines = [
- "# Dragon Review - Window Consistency",
- "",
- "## Scope",
- "- Goal: verify that in-sample / workbook-window trade statistics do not keep window-external exits.",
- "- Rule: for bounded research windows, trade filters must constrain both `buy_date` and `sell_date`.",
- "",
- "## Summary",
- f"- PASS: `{passed}`",
- f"- EXEMPT: `{exempt}`",
- f"- FAIL: `{failed}`",
- "",
- "## Result Table",
- ]
- for _, row in df.iterrows():
- lines.append(
- f"- `{row['script']}` | {row['category']} | {row['scope']} | `{row['status']}` | {row['note']}"
- )
- if failed == 0:
- lines.extend(
- [
- "",
- "## Judgment",
- "- The bounded research/evaluation pack is now on a consistent dual-bound trade-window rule.",
- "- `dragon_daily_signal_pipeline.py` remains intentionally exempt because it serves the live / forward chain rather than workbook-window evaluation.",
- ]
- )
- (base_dir / "dragon_review_window_consistency.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
- return df
- def _branch_source_frames(base_dir: Path) -> dict[str, pd.DataFrame]:
- frames: dict[str, pd.DataFrame] = {}
- overview = _load_csv(base_dir / "dragon_strategy_overview.csv")
- if not overview.empty:
- frames["dragon_strategy_overview.csv"] = overview
- glued = _load_csv(base_dir / "dragon_glued_refined_branch_summary.csv")
- if not glued.empty:
- frames["dragon_glued_refined_branch_summary.csv"] = glued
- alpha = _load_csv(base_dir / "dragon_alpha_first_branch_summary.csv")
- if not alpha.empty:
- frames["dragon_alpha_first_branch_summary.csv"] = alpha
- return frames
- def _iter_metric_values(base_dir: Path) -> Iterable[dict[str, object]]:
- metrics = [
- "trades",
- "win_rate",
- "avg_return",
- "median_return",
- "profit_factor",
- "compounded_return",
- "cagr",
- "real_buy_overlap",
- "real_sell_overlap",
- ]
- for source, df in _branch_source_frames(base_dir).items():
- for _, row in df.iterrows():
- branch = str(row["branch"])
- for metric in metrics:
- if metric in row.index:
- yield {
- "source": source,
- "branch": branch,
- "metric": metric,
- "value": row[metric],
- }
- rc1_path = base_dir / "dragon_rc1_config_snapshot.json"
- if rc1_path.exists():
- payload = json.loads(rc1_path.read_text(encoding="utf-8"))
- for metric, source_metric in [
- ("trades", "trade_count"),
- ("win_rate", "win_rate"),
- ("avg_return", "avg_return"),
- ("median_return", "median_return"),
- ("profit_factor", "profit_factor"),
- ("compounded_return", "compounded_return"),
- ("cagr", "cagr"),
- ]:
- yield {
- "source": "dragon_rc1_config_snapshot.json",
- "branch": str(payload["branch_name"]),
- "metric": metric,
- "value": payload[source_metric],
- }
- def build_branch_metric_consistency(base_dir: Path) -> pd.DataFrame:
- raw = pd.DataFrame(list(_iter_metric_values(base_dir)))
- if raw.empty:
- raw.to_csv(base_dir / "dragon_review_branch_metric_consistency.csv", index=False, encoding="utf-8-sig")
- (base_dir / "dragon_review_branch_metric_consistency.md").write_text(
- "# Dragon Review - Branch Metric Consistency\n\n- No source data found.\n",
- encoding="utf-8",
- )
- return raw
- raw["value"] = pd.to_numeric(raw["value"], errors="coerce")
- pivot = raw.pivot_table(index=["branch", "metric"], columns="source", values="value", aggfunc="last").reset_index()
- source_cols = [col for col in pivot.columns if col not in {"branch", "metric"}]
- records: list[dict[str, object]] = []
- for _, row in pivot.iterrows():
- values = [row[col] for col in source_cols if pd.notna(row[col])]
- max_abs_diff = float(max(values) - min(values)) if values else float("nan")
- if len(values) <= 1:
- status = "single_source"
- else:
- tol = 1e-12 if row["metric"] in {"trades", "real_buy_overlap", "real_sell_overlap"} else 1e-9
- status = "match" if max_abs_diff <= tol else "mismatch"
- rec = {
- "branch": row["branch"],
- "metric": row["metric"],
- "source_count": int(len(values)),
- "min_value": float(min(values)) if values else float("nan"),
- "max_value": float(max(values)) if values else float("nan"),
- "max_abs_diff": max_abs_diff,
- "status": status,
- }
- for col in source_cols:
- rec[col] = row[col]
- records.append(rec)
- result = pd.DataFrame(records).sort_values(["branch", "metric"]).reset_index(drop=True)
- result.to_csv(base_dir / "dragon_review_branch_metric_consistency.csv", index=False, encoding="utf-8-sig")
- mismatches = result[result["status"] == "mismatch"].copy()
- lines = [
- "# Dragon Review - Branch Metric Consistency",
- "",
- "## Scope",
- "- Sources compared:",
- "- `dragon_strategy_overview.csv`",
- "- `dragon_glued_refined_branch_summary.csv`",
- "- `dragon_alpha_first_branch_summary.csv`",
- "- `dragon_rc1_config_snapshot.json`",
- "",
- f"- Compared rows: `{len(result)}`",
- f"- Mismatches: `{len(mismatches)}`",
- "",
- ]
- if mismatches.empty:
- lines.append("- All compared branch metrics are consistent across current outputs.")
- else:
- lines.extend(["## Mismatches"])
- for _, row in mismatches.iterrows():
- parts = []
- for col in source_cols:
- if pd.notna(row[col]):
- parts.append(f"{col}={row[col]}")
- lines.append(
- f"- `{row['branch']}` / `{row['metric']}` | spread `{row['max_abs_diff']}` | " + "; ".join(parts)
- )
- lines.extend(
- [
- "",
- "## Judgment",
- "- `match` means the same branch/metric agrees across all currently available source files.",
- "- `mismatch` means at least one report family is using a different metric definition or evaluation window.",
- "- `single_source` means only one current artifact exposes that metric, so cross-check confidence is lower.",
- ]
- )
- (base_dir / "dragon_review_branch_metric_consistency.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
- return result
- def _calc_removed_trade_over_removal_count(base_dir: Path) -> float:
- df = _load_csv(base_dir / "dragon_glued_refined_removed_trade_attribution.csv")
- if df.empty or "recommendation" not in df.columns:
- return float("nan")
- return float((df["recommendation"].astype(str) == "OVER_REMOVAL").sum())
- def _calc_local_sensitivity_robust_case_count(base_dir: Path) -> float:
- df = _load_csv(base_dir / "dragon_glued_refined_sensitivity.csv")
- if df.empty or "label" not in df.columns:
- return float("nan")
- candidate = df[df["label"] == "refined_candidate_baseline"].copy()
- if candidate.empty:
- return float("nan")
- candidate_row = candidate.iloc[0]
- neighborhood = df[~df["label"].isin(["current_alpha_control", "refined_candidate_baseline"])].copy()
- robust = neighborhood[
- (neighborhood["avg_return"] >= float(candidate_row["avg_return"]) - 0.0015)
- & (neighborhood["profit_factor"] >= float(candidate_row["profit_factor"]) - 0.20)
- & (neighborhood["real_buy_overlap"] >= int(candidate_row["real_buy_overlap"]) - 1)
- & (neighborhood["real_sell_overlap"] >= int(candidate_row["real_sell_overlap"]) - 1)
- ]
- return float(len(robust))
- def build_execution_monitor_review(base_dir: Path) -> None:
- monitor = _load_csv(base_dir / "dragon_daily_monitor_snapshot.csv")
- weekly = _load_csv(base_dir / "dragon_forward_weekly_summary.csv")
- html_text = (base_dir / "dragon_forward_weekly_review.html").read_text(encoding="utf-8")
- daily_text = (base_dir / "dragon_daily_signal_pipeline.py").read_text(encoding="utf-8")
- exec_text = (base_dir / "dragon_refined_execution_validation.py").read_text(encoding="utf-8")
- actual_removed = _calc_removed_trade_over_removal_count(base_dir)
- actual_robust = _calc_local_sensitivity_robust_case_count(base_dir)
- monitor_map = {
- str(row["metric"]): float(row["actual_value"])
- for _, row in monitor.iterrows()
- if pd.notna(row["actual_value"])
- }
- removed_match = pd.notna(actual_removed) and abs(monitor_map.get("removed_trade_over_removal_count", float("nan")) - actual_removed) < 1e-12
- robust_match = pd.notna(actual_robust) and abs(monitor_map.get("local_sensitivity_robust_case_count", float("nan")) - actual_robust) < 1e-12
- daily_uses_nan = 'float("nan") if buy_next is None' in daily_text and 'float("nan") if sell_next is None' in daily_text
- exec_uses_nan = 'float("nan") if buy_next is None' in exec_text and 'float("nan") if sell_next is None' in exec_text
- weekly_has_system_monitor = not weekly.empty and "system_monitor" in set(weekly["branch"].astype(str))
- weekly_html_has_system_monitor = "system_monitor" in html_text
- lines = [
- "# Dragon Review - Execution And Monitor Consistency",
- "",
- "## Governance Metrics",
- f"- `removed_trade_over_removal_count`: monitor `{monitor_map.get('removed_trade_over_removal_count')}` vs source-derived `{actual_removed}` -> `{'match' if removed_match else 'mismatch'}`",
- f"- `local_sensitivity_robust_case_count`: monitor `{monitor_map.get('local_sensitivity_robust_case_count')}` vs source-derived `{actual_robust}` -> `{'match' if robust_match else 'mismatch'}`",
- "",
- "## Execution Fallback Rule",
- f"- `dragon_daily_signal_pipeline.py` uses NaN on missing next-bar execution prices: `{daily_uses_nan}`",
- f"- `dragon_refined_execution_validation.py` uses NaN on missing next-bar execution prices: `{exec_uses_nan}`",
- "",
- "## Weekly Monitor Separation",
- f"- `dragon_forward_weekly_summary.csv` includes `system_monitor` row: `{weekly_has_system_monitor}`",
- f"- `dragon_forward_weekly_review.html` includes `system_monitor` text: `{weekly_html_has_system_monitor}`",
- "",
- "## Judgment",
- "- The monitor chain is trustworthy only if governance metrics are derived from current source artifacts rather than hard-coded constants.",
- "- The execution-aware chain is trustworthy only if missing next-bar prices do not silently fall back to same-bar close.",
- "- Weekly summary is cleaner now because branch rows and system-level monitor counts are separated.",
- ]
- (base_dir / "dragon_review_execution_monitor.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
- def build_reporting_integrity_review(base_dir: Path) -> None:
- latest_bar = json.loads((base_dir / "dragon_forward_observation_state.json").read_text(encoding="utf-8")).get("latest_bar_date", "latest")
- files_to_check = [
- base_dir / "dragon_reports_index.html",
- base_dir / "dragon_daily_signal_report.html",
- base_dir / "dragon_forward_weekly_review.html",
- base_dir / "dragon_historical_trade_details.html",
- base_dir / "dragon_indicator_strategy_guide_cn.html",
- base_dir / "html_reports" / "index.html",
- base_dir / "html_reports" / f"dragon_daily_signal_report_{latest_bar}.html",
- base_dir / "html_reports" / f"dragon_forward_weekly_review_{latest_bar}.html",
- base_dir / "html_reports" / f"dragon_historical_trade_details_{latest_bar}.html",
- ]
- existence_rows = [{"path": str(path.relative_to(base_dir)), "exists": path.exists()} for path in files_to_check]
- index_text = (base_dir / "dragon_reports_index.html").read_text(encoding="utf-8")
- archive_index_text = (base_dir / "html_reports" / "index.html").read_text(encoding="utf-8")
- detail_text = (base_dir / "dragon_historical_trade_details.html").read_text(encoding="utf-8")
- daily_text = (base_dir / "dragon_daily_signal_report.html").read_text(encoding="utf-8")
- checks = [
- ("root index links to root daily report", 'href="dragon_daily_signal_report.html"' in index_text),
- (
- "root index links to archived daily report",
- f'href="html_reports/dragon_daily_signal_report_{latest_bar}.html"' in index_text,
- ),
- (
- "archive index links locally inside html_reports",
- f'href="dragon_daily_signal_report_{latest_bar}.html"' in archive_index_text,
- ),
- ("detail page contains snapshot summary strip", "snapshot-summary" in detail_text),
- ("detail page contains event summary labels", "总事件" in detail_text and "前一条:" in detail_text),
- ("detail page contains query filters", "branch-filter" in detail_text and "keyword-filter" in detail_text),
- ("daily report links to historical detail page", 'href="dragon_historical_trade_details.html"' in daily_text),
- ]
- lines = [
- "# Dragon Review - Reporting Integrity",
- "",
- "## File Existence",
- ]
- for row in existence_rows:
- lines.append(f"- `{row['path']}` -> `{row['exists']}`")
- lines.extend(["", "## Link / Feature Checks"])
- for label, passed in checks:
- lines.append(f"- {label}: `{passed}`")
- lines.extend(
- [
- "",
- "## Judgment",
- "- Root and archive HTML outputs are present and linked through the expected root-vs-archive relative paths.",
- "- Historical detail reporting currently includes the new indicator snapshot event-summary strip and deep-link filter controls.",
- "- Terminal mojibake remains a shell-display issue; these checks only validate file presence and embedded text markers, not browser rendering fidelity.",
- ]
- )
- (base_dir / "dragon_review_reporting_integrity.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
- def build_system_final(base_dir: Path, branch_consistency: pd.DataFrame) -> None:
- mismatch_branches = branch_consistency[branch_consistency["status"] == "mismatch"].copy()
- trusted_direct = [
- "dragon_alpha_first_branch_summary.csv",
- "dragon_glued_refined_branch_summary.csv",
- "dragon_rc1_config_snapshot.json",
- "dragon_daily_monitor_snapshot.csv",
- ]
- if mismatch_branches.empty:
- trusted_direct.append("dragon_strategy_overview.csv")
- trusted_labeled = [
- "dragon_refined_execution_stress.csv",
- "dragon_cost_stress_test.csv",
- "dragon_forward_weekly_summary.csv",
- "dragon_historical_trade_details.html",
- ]
- not_recommended = []
- if not mismatch_branches.empty:
- not_recommended.append("dragon_strategy_overview.csv")
- lines = [
- "# Dragon System Review Final",
- "",
- "## Overall Judgment",
- "- The current workspace is much closer to a trustworthy research pack after the window-consistency sweep and monitor-fallback fixes.",
- "",
- "## Trust Tiers",
- "- 可信可直接使用:",
- ]
- if mismatch_branches.empty:
- lines.insert(4, "- Cross-report headline metrics are currently aligned across the main branch-summary, release, and overview artifacts.")
- lines.insert(5, "- The remaining cautions are about report interpretation and forward monitoring, not internal metric drift.")
- else:
- lines.insert(4, "- The main remaining risk is not strategy logic but metric-definition drift between a few report families.")
- lines.extend([f"- `{name}`" for name in trusted_direct])
- lines.append("- 可信但需标注口径:")
- lines.extend([f"- `{name}`" for name in trusted_labeled])
- lines.append("- 暂不建议直接引用:")
- if not_recommended:
- lines.extend([f"- `{name}`" for name in not_recommended])
- else:
- lines.append("- `none`")
- if not mismatch_branches.empty:
- lines.extend(["", "## Remaining Review Findings"])
- for _, row in mismatch_branches.iterrows():
- lines.append(
- f"- `{row['branch']}` / `{row['metric']}` remains inconsistent across report families; see `dragon_review_branch_metric_consistency.csv`."
- )
- lines.extend(
- [
- "",
- "## Practical Meaning",
- "- Use the branch-specific summary/release artifacts as the primary basis for governance decisions.",
- "- Use the consistency reports as an audit trail before external distribution of top-line metrics.",
- "- `dragon_strategy_overview.csv` is now aligned with the main branch artifacts and can be used as the compact comparison view.",
- ]
- )
- (base_dir / "dragon_system_review_final.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
- def main() -> None:
- base_dir = Path(__file__).resolve().parent
- build_window_consistency_report(base_dir)
- branch_consistency = build_branch_metric_consistency(base_dir)
- build_execution_monitor_review(base_dir)
- build_reporting_integrity_review(base_dir)
- build_system_final(base_dir, branch_consistency)
- if __name__ == "__main__":
- main()
|