from __future__ import annotations import json from dataclasses import dataclass from pathlib import Path from typing import Iterable import pandas as pd @dataclass(frozen=True) class WindowScript: path: str scope: str category: str expectation: str WINDOW_SCRIPTS = [ WindowScript("dragon_backtest.py", "workbook window", "evaluation", "dual_bound"), WindowScript("dragon_cost_stress_test.py", "fixed release window", "evaluation", "dual_bound"), WindowScript("dragon_deep_oversold_confirmation_experiments.py", "workbook window", "experiment", "dual_bound"), WindowScript("dragon_deep_oversold_experiments.py", "workbook window", "experiment", "dual_bound"), WindowScript("dragon_deep_oversold_selective_veto_experiments.py", "workbook window", "experiment", "dual_bound"), WindowScript("dragon_equity_curve_review.py", "fixed release window", "evaluation", "dual_bound"), WindowScript("dragon_glued_alpha_candidate.py", "workbook window", "evaluation", "dual_bound"), WindowScript("dragon_glued_refined_branch_review.py", "hybrid bounded window", "evaluation", "dual_bound"), WindowScript("dragon_glued_refined_removed_trade_attribution.py", "fixed release window", "attribution", "dual_bound"), WindowScript("dragon_glued_refined_sensitivity.py", "fixed release window", "evaluation", "dual_bound"), WindowScript("dragon_glued_refine_experiments.py", "workbook window", "experiment", "dual_bound"), WindowScript("dragon_predictive_break_experiments.py", "workbook window", "experiment", "dual_bound"), WindowScript("dragon_rc1_release.py", "fixed release window", "release", "dual_bound"), WindowScript("dragon_refined_alpha_attribution.py", "fixed release window", "attribution", "dual_bound"), WindowScript("dragon_refined_execution_validation.py", "fixed release window", "evaluation", "dual_bound"), WindowScript("dragon_rule_ablation.py", "workbook window", "evaluation", "dual_bound"), WindowScript("dragon_short_holding_audit.py", "workbook window", "audit", "dual_bound"), WindowScript("dragon_short_holding_experiments.py", "workbook window", "experiment", "dual_bound"), WindowScript("dragon_strategy_overview.py", "fixed release window", "overview", "dual_bound"), WindowScript("dragon_threshold_perturbation.py", "workbook window", "evaluation", "dual_bound"), WindowScript("dragon_daily_signal_pipeline.py", "live / forward window", "live", "live_exception"), ] def _load_csv(path: Path) -> pd.DataFrame: return pd.DataFrame() if not path.exists() else pd.read_csv(path, encoding="utf-8-sig") def _fmt_pct(value: object) -> str: if value is None or pd.isna(value): return "NA" return f"{float(value):.2%}" def _fmt_num(value: object, digits: int = 2) -> str: if value is None or pd.isna(value): return "NA" return f"{float(value):.{digits}f}" def _window_filter_status(text: str, expectation: str) -> tuple[str, str]: has_buy = 'trades["buy_date"]' in text has_sell = 'trades["sell_date"]' in text if expectation == "live_exception": return "EXEMPT", "Live / forward pipeline intentionally keeps open-ended trade coverage." if has_buy and has_sell: return "PASS", "Trade filter constrains both buy_date and sell_date." if has_buy and not has_sell: return "FAIL", "Trade filter constrains buy_date only." return "FAIL", "No recognizable trade-window filter found." def build_window_consistency_report(base_dir: Path) -> pd.DataFrame: rows: list[dict[str, object]] = [] for item in WINDOW_SCRIPTS: text = (base_dir / item.path).read_text(encoding="utf-8") status, note = _window_filter_status(text, item.expectation) rows.append( { "script": item.path, "scope": item.scope, "category": item.category, "expectation": item.expectation, "status": status, "note": note, } ) df = pd.DataFrame(rows) passed = int((df["status"] == "PASS").sum()) exempt = int((df["status"] == "EXEMPT").sum()) failed = int((df["status"] == "FAIL").sum()) lines = [ "# Dragon Review - Window Consistency", "", "## Scope", "- Goal: verify that in-sample / workbook-window trade statistics do not keep window-external exits.", "- Rule: for bounded research windows, trade filters must constrain both `buy_date` and `sell_date`.", "", "## Summary", f"- PASS: `{passed}`", f"- EXEMPT: `{exempt}`", f"- FAIL: `{failed}`", "", "## Result Table", ] for _, row in df.iterrows(): lines.append( f"- `{row['script']}` | {row['category']} | {row['scope']} | `{row['status']}` | {row['note']}" ) if failed == 0: lines.extend( [ "", "## Judgment", "- The bounded research/evaluation pack is now on a consistent dual-bound trade-window rule.", "- `dragon_daily_signal_pipeline.py` remains intentionally exempt because it serves the live / forward chain rather than workbook-window evaluation.", ] ) (base_dir / "dragon_review_window_consistency.md").write_text("\n".join(lines) + "\n", encoding="utf-8") return df def _branch_source_frames(base_dir: Path) -> dict[str, pd.DataFrame]: frames: dict[str, pd.DataFrame] = {} overview = _load_csv(base_dir / "dragon_strategy_overview.csv") if not overview.empty: frames["dragon_strategy_overview.csv"] = overview glued = _load_csv(base_dir / "dragon_glued_refined_branch_summary.csv") if not glued.empty: frames["dragon_glued_refined_branch_summary.csv"] = glued alpha = _load_csv(base_dir / "dragon_alpha_first_branch_summary.csv") if not alpha.empty: frames["dragon_alpha_first_branch_summary.csv"] = alpha return frames def _iter_metric_values(base_dir: Path) -> Iterable[dict[str, object]]: metrics = [ "trades", "win_rate", "avg_return", "median_return", "profit_factor", "compounded_return", "cagr", "real_buy_overlap", "real_sell_overlap", ] for source, df in _branch_source_frames(base_dir).items(): for _, row in df.iterrows(): branch = str(row["branch"]) for metric in metrics: if metric in row.index: yield { "source": source, "branch": branch, "metric": metric, "value": row[metric], } rc1_path = base_dir / "dragon_rc1_config_snapshot.json" if rc1_path.exists(): payload = json.loads(rc1_path.read_text(encoding="utf-8")) for metric, source_metric in [ ("trades", "trade_count"), ("win_rate", "win_rate"), ("avg_return", "avg_return"), ("median_return", "median_return"), ("profit_factor", "profit_factor"), ("compounded_return", "compounded_return"), ("cagr", "cagr"), ]: yield { "source": "dragon_rc1_config_snapshot.json", "branch": str(payload["branch_name"]), "metric": metric, "value": payload[source_metric], } def build_branch_metric_consistency(base_dir: Path) -> pd.DataFrame: raw = pd.DataFrame(list(_iter_metric_values(base_dir))) if raw.empty: raw.to_csv(base_dir / "dragon_review_branch_metric_consistency.csv", index=False, encoding="utf-8-sig") (base_dir / "dragon_review_branch_metric_consistency.md").write_text( "# Dragon Review - Branch Metric Consistency\n\n- No source data found.\n", encoding="utf-8", ) return raw raw["value"] = pd.to_numeric(raw["value"], errors="coerce") pivot = raw.pivot_table(index=["branch", "metric"], columns="source", values="value", aggfunc="last").reset_index() source_cols = [col for col in pivot.columns if col not in {"branch", "metric"}] records: list[dict[str, object]] = [] for _, row in pivot.iterrows(): values = [row[col] for col in source_cols if pd.notna(row[col])] max_abs_diff = float(max(values) - min(values)) if values else float("nan") if len(values) <= 1: status = "single_source" else: tol = 1e-12 if row["metric"] in {"trades", "real_buy_overlap", "real_sell_overlap"} else 1e-9 status = "match" if max_abs_diff <= tol else "mismatch" rec = { "branch": row["branch"], "metric": row["metric"], "source_count": int(len(values)), "min_value": float(min(values)) if values else float("nan"), "max_value": float(max(values)) if values else float("nan"), "max_abs_diff": max_abs_diff, "status": status, } for col in source_cols: rec[col] = row[col] records.append(rec) result = pd.DataFrame(records).sort_values(["branch", "metric"]).reset_index(drop=True) result.to_csv(base_dir / "dragon_review_branch_metric_consistency.csv", index=False, encoding="utf-8-sig") mismatches = result[result["status"] == "mismatch"].copy() lines = [ "# Dragon Review - Branch Metric Consistency", "", "## Scope", "- Sources compared:", "- `dragon_strategy_overview.csv`", "- `dragon_glued_refined_branch_summary.csv`", "- `dragon_alpha_first_branch_summary.csv`", "- `dragon_rc1_config_snapshot.json`", "", f"- Compared rows: `{len(result)}`", f"- Mismatches: `{len(mismatches)}`", "", ] if mismatches.empty: lines.append("- All compared branch metrics are consistent across current outputs.") else: lines.extend(["## Mismatches"]) for _, row in mismatches.iterrows(): parts = [] for col in source_cols: if pd.notna(row[col]): parts.append(f"{col}={row[col]}") lines.append( f"- `{row['branch']}` / `{row['metric']}` | spread `{row['max_abs_diff']}` | " + "; ".join(parts) ) lines.extend( [ "", "## Judgment", "- `match` means the same branch/metric agrees across all currently available source files.", "- `mismatch` means at least one report family is using a different metric definition or evaluation window.", "- `single_source` means only one current artifact exposes that metric, so cross-check confidence is lower.", ] ) (base_dir / "dragon_review_branch_metric_consistency.md").write_text("\n".join(lines) + "\n", encoding="utf-8") return result def _calc_removed_trade_over_removal_count(base_dir: Path) -> float: df = _load_csv(base_dir / "dragon_glued_refined_removed_trade_attribution.csv") if df.empty or "recommendation" not in df.columns: return float("nan") return float((df["recommendation"].astype(str) == "OVER_REMOVAL").sum()) def _calc_local_sensitivity_robust_case_count(base_dir: Path) -> float: df = _load_csv(base_dir / "dragon_glued_refined_sensitivity.csv") if df.empty or "label" not in df.columns: return float("nan") candidate = df[df["label"] == "refined_candidate_baseline"].copy() if candidate.empty: return float("nan") candidate_row = candidate.iloc[0] neighborhood = df[~df["label"].isin(["current_alpha_control", "refined_candidate_baseline"])].copy() robust = neighborhood[ (neighborhood["avg_return"] >= float(candidate_row["avg_return"]) - 0.0015) & (neighborhood["profit_factor"] >= float(candidate_row["profit_factor"]) - 0.20) & (neighborhood["real_buy_overlap"] >= int(candidate_row["real_buy_overlap"]) - 1) & (neighborhood["real_sell_overlap"] >= int(candidate_row["real_sell_overlap"]) - 1) ] return float(len(robust)) def build_execution_monitor_review(base_dir: Path) -> None: monitor = _load_csv(base_dir / "dragon_daily_monitor_snapshot.csv") weekly = _load_csv(base_dir / "dragon_forward_weekly_summary.csv") html_text = (base_dir / "dragon_forward_weekly_review.html").read_text(encoding="utf-8") daily_text = (base_dir / "dragon_daily_signal_pipeline.py").read_text(encoding="utf-8") exec_text = (base_dir / "dragon_refined_execution_validation.py").read_text(encoding="utf-8") actual_removed = _calc_removed_trade_over_removal_count(base_dir) actual_robust = _calc_local_sensitivity_robust_case_count(base_dir) monitor_map = { str(row["metric"]): float(row["actual_value"]) for _, row in monitor.iterrows() if pd.notna(row["actual_value"]) } removed_match = pd.notna(actual_removed) and abs(monitor_map.get("removed_trade_over_removal_count", float("nan")) - actual_removed) < 1e-12 robust_match = pd.notna(actual_robust) and abs(monitor_map.get("local_sensitivity_robust_case_count", float("nan")) - actual_robust) < 1e-12 daily_uses_nan = 'float("nan") if buy_next is None' in daily_text and 'float("nan") if sell_next is None' in daily_text exec_uses_nan = 'float("nan") if buy_next is None' in exec_text and 'float("nan") if sell_next is None' in exec_text weekly_has_system_monitor = not weekly.empty and "system_monitor" in set(weekly["branch"].astype(str)) weekly_html_has_system_monitor = "system_monitor" in html_text lines = [ "# Dragon Review - Execution And Monitor Consistency", "", "## Governance Metrics", f"- `removed_trade_over_removal_count`: monitor `{monitor_map.get('removed_trade_over_removal_count')}` vs source-derived `{actual_removed}` -> `{'match' if removed_match else 'mismatch'}`", f"- `local_sensitivity_robust_case_count`: monitor `{monitor_map.get('local_sensitivity_robust_case_count')}` vs source-derived `{actual_robust}` -> `{'match' if robust_match else 'mismatch'}`", "", "## Execution Fallback Rule", f"- `dragon_daily_signal_pipeline.py` uses NaN on missing next-bar execution prices: `{daily_uses_nan}`", f"- `dragon_refined_execution_validation.py` uses NaN on missing next-bar execution prices: `{exec_uses_nan}`", "", "## Weekly Monitor Separation", f"- `dragon_forward_weekly_summary.csv` includes `system_monitor` row: `{weekly_has_system_monitor}`", f"- `dragon_forward_weekly_review.html` includes `system_monitor` text: `{weekly_html_has_system_monitor}`", "", "## Judgment", "- The monitor chain is trustworthy only if governance metrics are derived from current source artifacts rather than hard-coded constants.", "- The execution-aware chain is trustworthy only if missing next-bar prices do not silently fall back to same-bar close.", "- Weekly summary is cleaner now because branch rows and system-level monitor counts are separated.", ] (base_dir / "dragon_review_execution_monitor.md").write_text("\n".join(lines) + "\n", encoding="utf-8") def build_reporting_integrity_review(base_dir: Path) -> None: latest_bar = json.loads((base_dir / "dragon_forward_observation_state.json").read_text(encoding="utf-8")).get("latest_bar_date", "latest") files_to_check = [ base_dir / "dragon_reports_index.html", base_dir / "dragon_daily_signal_report.html", base_dir / "dragon_forward_weekly_review.html", base_dir / "dragon_historical_trade_details.html", base_dir / "dragon_indicator_strategy_guide_cn.html", base_dir / "html_reports" / "index.html", base_dir / "html_reports" / f"dragon_daily_signal_report_{latest_bar}.html", base_dir / "html_reports" / f"dragon_forward_weekly_review_{latest_bar}.html", base_dir / "html_reports" / f"dragon_historical_trade_details_{latest_bar}.html", ] existence_rows = [{"path": str(path.relative_to(base_dir)), "exists": path.exists()} for path in files_to_check] index_text = (base_dir / "dragon_reports_index.html").read_text(encoding="utf-8") archive_index_text = (base_dir / "html_reports" / "index.html").read_text(encoding="utf-8") detail_text = (base_dir / "dragon_historical_trade_details.html").read_text(encoding="utf-8") daily_text = (base_dir / "dragon_daily_signal_report.html").read_text(encoding="utf-8") checks = [ ("root index links to root daily report", 'href="dragon_daily_signal_report.html"' in index_text), ( "root index links to archived daily report", f'href="html_reports/dragon_daily_signal_report_{latest_bar}.html"' in index_text, ), ( "archive index links locally inside html_reports", f'href="dragon_daily_signal_report_{latest_bar}.html"' in archive_index_text, ), ("detail page contains snapshot summary strip", "snapshot-summary" in detail_text), ("detail page contains event summary labels", "总事件" in detail_text and "前一条:" in detail_text), ("detail page contains query filters", "branch-filter" in detail_text and "keyword-filter" in detail_text), ("daily report links to historical detail page", 'href="dragon_historical_trade_details.html"' in daily_text), ] lines = [ "# Dragon Review - Reporting Integrity", "", "## File Existence", ] for row in existence_rows: lines.append(f"- `{row['path']}` -> `{row['exists']}`") lines.extend(["", "## Link / Feature Checks"]) for label, passed in checks: lines.append(f"- {label}: `{passed}`") lines.extend( [ "", "## Judgment", "- Root and archive HTML outputs are present and linked through the expected root-vs-archive relative paths.", "- Historical detail reporting currently includes the new indicator snapshot event-summary strip and deep-link filter controls.", "- Terminal mojibake remains a shell-display issue; these checks only validate file presence and embedded text markers, not browser rendering fidelity.", ] ) (base_dir / "dragon_review_reporting_integrity.md").write_text("\n".join(lines) + "\n", encoding="utf-8") def build_system_final(base_dir: Path, branch_consistency: pd.DataFrame) -> None: mismatch_branches = branch_consistency[branch_consistency["status"] == "mismatch"].copy() trusted_direct = [ "dragon_alpha_first_branch_summary.csv", "dragon_glued_refined_branch_summary.csv", "dragon_rc1_config_snapshot.json", "dragon_daily_monitor_snapshot.csv", ] if mismatch_branches.empty: trusted_direct.append("dragon_strategy_overview.csv") trusted_labeled = [ "dragon_refined_execution_stress.csv", "dragon_cost_stress_test.csv", "dragon_forward_weekly_summary.csv", "dragon_historical_trade_details.html", ] not_recommended = [] if not mismatch_branches.empty: not_recommended.append("dragon_strategy_overview.csv") lines = [ "# Dragon System Review Final", "", "## Overall Judgment", "- The current workspace is much closer to a trustworthy research pack after the window-consistency sweep and monitor-fallback fixes.", "", "## Trust Tiers", "- 可信可直接使用:", ] if mismatch_branches.empty: lines.insert(4, "- Cross-report headline metrics are currently aligned across the main branch-summary, release, and overview artifacts.") lines.insert(5, "- The remaining cautions are about report interpretation and forward monitoring, not internal metric drift.") else: lines.insert(4, "- The main remaining risk is not strategy logic but metric-definition drift between a few report families.") lines.extend([f"- `{name}`" for name in trusted_direct]) lines.append("- 可信但需标注口径:") lines.extend([f"- `{name}`" for name in trusted_labeled]) lines.append("- 暂不建议直接引用:") if not_recommended: lines.extend([f"- `{name}`" for name in not_recommended]) else: lines.append("- `none`") if not mismatch_branches.empty: lines.extend(["", "## Remaining Review Findings"]) for _, row in mismatch_branches.iterrows(): lines.append( f"- `{row['branch']}` / `{row['metric']}` remains inconsistent across report families; see `dragon_review_branch_metric_consistency.csv`." ) lines.extend( [ "", "## Practical Meaning", "- Use the branch-specific summary/release artifacts as the primary basis for governance decisions.", "- Use the consistency reports as an audit trail before external distribution of top-line metrics.", "- `dragon_strategy_overview.csv` is now aligned with the main branch artifacts and can be used as the compact comparison view.", ] ) (base_dir / "dragon_system_review_final.md").write_text("\n".join(lines) + "\n", encoding="utf-8") def main() -> None: base_dir = Path(__file__).resolve().parent build_window_consistency_report(base_dir) branch_consistency = build_branch_metric_consistency(base_dir) build_execution_monitor_review(base_dir) build_reporting_integrity_review(base_dir) build_system_final(base_dir, branch_consistency) if __name__ == "__main__": main()