openclaw
/
cyb50-quant


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
							from __future__ import annotations

import json
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable

import pandas as pd


@dataclass(frozen=True)
class WindowScript:
    path: str
    scope: str
    category: str
    expectation: str


WINDOW_SCRIPTS = [
    WindowScript("dragon_backtest.py", "workbook window", "evaluation", "dual_bound"),
    WindowScript("dragon_cost_stress_test.py", "fixed release window", "evaluation", "dual_bound"),
    WindowScript("dragon_deep_oversold_confirmation_experiments.py", "workbook window", "experiment", "dual_bound"),
    WindowScript("dragon_deep_oversold_experiments.py", "workbook window", "experiment", "dual_bound"),
    WindowScript("dragon_deep_oversold_selective_veto_experiments.py", "workbook window", "experiment", "dual_bound"),
    WindowScript("dragon_equity_curve_review.py", "fixed release window", "evaluation", "dual_bound"),
    WindowScript("dragon_glued_alpha_candidate.py", "workbook window", "evaluation", "dual_bound"),
    WindowScript("dragon_glued_refined_branch_review.py", "hybrid bounded window", "evaluation", "dual_bound"),
    WindowScript("dragon_glued_refined_removed_trade_attribution.py", "fixed release window", "attribution", "dual_bound"),
    WindowScript("dragon_glued_refined_sensitivity.py", "fixed release window", "evaluation", "dual_bound"),
    WindowScript("dragon_glued_refine_experiments.py", "workbook window", "experiment", "dual_bound"),
    WindowScript("dragon_predictive_break_experiments.py", "workbook window", "experiment", "dual_bound"),
    WindowScript("dragon_rc1_release.py", "fixed release window", "release", "dual_bound"),
    WindowScript("dragon_refined_alpha_attribution.py", "fixed release window", "attribution", "dual_bound"),
    WindowScript("dragon_refined_execution_validation.py", "fixed release window", "evaluation", "dual_bound"),
    WindowScript("dragon_rule_ablation.py", "workbook window", "evaluation", "dual_bound"),
    WindowScript("dragon_short_holding_audit.py", "workbook window", "audit", "dual_bound"),
    WindowScript("dragon_short_holding_experiments.py", "workbook window", "experiment", "dual_bound"),
    WindowScript("dragon_strategy_overview.py", "fixed release window", "overview", "dual_bound"),
    WindowScript("dragon_threshold_perturbation.py", "workbook window", "evaluation", "dual_bound"),
    WindowScript("dragon_daily_signal_pipeline.py", "live / forward window", "live", "live_exception"),
]


def _load_csv(path: Path) -> pd.DataFrame:
    return pd.DataFrame() if not path.exists() else pd.read_csv(path, encoding="utf-8-sig")


def _fmt_pct(value: object) -> str:
    if value is None or pd.isna(value):
        return "NA"
    return f"{float(value):.2%}"


def _fmt_num(value: object, digits: int = 2) -> str:
    if value is None or pd.isna(value):
        return "NA"
    return f"{float(value):.{digits}f}"


def _window_filter_status(text: str, expectation: str) -> tuple[str, str]:
    has_buy = 'trades["buy_date"]' in text
    has_sell = 'trades["sell_date"]' in text
    if expectation == "live_exception":
        return "EXEMPT", "Live / forward pipeline intentionally keeps open-ended trade coverage."
    if has_buy and has_sell:
        return "PASS", "Trade filter constrains both buy_date and sell_date."
    if has_buy and not has_sell:
        return "FAIL", "Trade filter constrains buy_date only."
    return "FAIL", "No recognizable trade-window filter found."


def build_window_consistency_report(base_dir: Path) -> pd.DataFrame:
    rows: list[dict[str, object]] = []
    for item in WINDOW_SCRIPTS:
        text = (base_dir / item.path).read_text(encoding="utf-8")
        status, note = _window_filter_status(text, item.expectation)
        rows.append(
            {
                "script": item.path,
                "scope": item.scope,
                "category": item.category,
                "expectation": item.expectation,
                "status": status,
                "note": note,
            }
        )
    df = pd.DataFrame(rows)
    passed = int((df["status"] == "PASS").sum())
    exempt = int((df["status"] == "EXEMPT").sum())
    failed = int((df["status"] == "FAIL").sum())
    lines = [
        "# Dragon Review - Window Consistency",
        "",
        "## Scope",
        "- Goal: verify that in-sample / workbook-window trade statistics do not keep window-external exits.",
        "- Rule: for bounded research windows, trade filters must constrain both `buy_date` and `sell_date`.",
        "",
        "## Summary",
        f"- PASS: `{passed}`",
        f"- EXEMPT: `{exempt}`",
        f"- FAIL: `{failed}`",
        "",
        "## Result Table",
    ]
    for _, row in df.iterrows():
        lines.append(
            f"- `{row['script']}` | {row['category']} | {row['scope']} | `{row['status']}` | {row['note']}"
        )
    if failed == 0:
        lines.extend(
            [
                "",
                "## Judgment",
                "- The bounded research/evaluation pack is now on a consistent dual-bound trade-window rule.",
                "- `dragon_daily_signal_pipeline.py` remains intentionally exempt because it serves the live / forward chain rather than workbook-window evaluation.",
            ]
        )
    (base_dir / "dragon_review_window_consistency.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
    return df


def _branch_source_frames(base_dir: Path) -> dict[str, pd.DataFrame]:
    frames: dict[str, pd.DataFrame] = {}
    overview = _load_csv(base_dir / "dragon_strategy_overview.csv")
    if not overview.empty:
        frames["dragon_strategy_overview.csv"] = overview
    glued = _load_csv(base_dir / "dragon_glued_refined_branch_summary.csv")
    if not glued.empty:
        frames["dragon_glued_refined_branch_summary.csv"] = glued
    alpha = _load_csv(base_dir / "dragon_alpha_first_branch_summary.csv")
    if not alpha.empty:
        frames["dragon_alpha_first_branch_summary.csv"] = alpha
    return frames


def _iter_metric_values(base_dir: Path) -> Iterable[dict[str, object]]:
    metrics = [
        "trades",
        "win_rate",
        "avg_return",
        "median_return",
        "profit_factor",
        "compounded_return",
        "cagr",
        "real_buy_overlap",
        "real_sell_overlap",
    ]
    for source, df in _branch_source_frames(base_dir).items():
        for _, row in df.iterrows():
            branch = str(row["branch"])
            for metric in metrics:
                if metric in row.index:
                    yield {
                        "source": source,
                        "branch": branch,
                        "metric": metric,
                        "value": row[metric],
                    }
    rc1_path = base_dir / "dragon_rc1_config_snapshot.json"
    if rc1_path.exists():
        payload = json.loads(rc1_path.read_text(encoding="utf-8"))
        for metric, source_metric in [
            ("trades", "trade_count"),
            ("win_rate", "win_rate"),
            ("avg_return", "avg_return"),
            ("median_return", "median_return"),
            ("profit_factor", "profit_factor"),
            ("compounded_return", "compounded_return"),
            ("cagr", "cagr"),
        ]:
            yield {
                "source": "dragon_rc1_config_snapshot.json",
                "branch": str(payload["branch_name"]),
                "metric": metric,
                "value": payload[source_metric],
            }


def build_branch_metric_consistency(base_dir: Path) -> pd.DataFrame:
    raw = pd.DataFrame(list(_iter_metric_values(base_dir)))
    if raw.empty:
        raw.to_csv(base_dir / "dragon_review_branch_metric_consistency.csv", index=False, encoding="utf-8-sig")
        (base_dir / "dragon_review_branch_metric_consistency.md").write_text(
            "# Dragon Review - Branch Metric Consistency\n\n- No source data found.\n",
            encoding="utf-8",
        )
        return raw

    raw["value"] = pd.to_numeric(raw["value"], errors="coerce")
    pivot = raw.pivot_table(index=["branch", "metric"], columns="source", values="value", aggfunc="last").reset_index()
    source_cols = [col for col in pivot.columns if col not in {"branch", "metric"}]
    records: list[dict[str, object]] = []
    for _, row in pivot.iterrows():
        values = [row[col] for col in source_cols if pd.notna(row[col])]
        max_abs_diff = float(max(values) - min(values)) if values else float("nan")
        if len(values) <= 1:
            status = "single_source"
        else:
            tol = 1e-12 if row["metric"] in {"trades", "real_buy_overlap", "real_sell_overlap"} else 1e-9
            status = "match" if max_abs_diff <= tol else "mismatch"
        rec = {
            "branch": row["branch"],
            "metric": row["metric"],
            "source_count": int(len(values)),
            "min_value": float(min(values)) if values else float("nan"),
            "max_value": float(max(values)) if values else float("nan"),
            "max_abs_diff": max_abs_diff,
            "status": status,
        }
        for col in source_cols:
            rec[col] = row[col]
        records.append(rec)
    result = pd.DataFrame(records).sort_values(["branch", "metric"]).reset_index(drop=True)
    result.to_csv(base_dir / "dragon_review_branch_metric_consistency.csv", index=False, encoding="utf-8-sig")

    mismatches = result[result["status"] == "mismatch"].copy()
    lines = [
        "# Dragon Review - Branch Metric Consistency",
        "",
        "## Scope",
        "- Sources compared:",
        "- `dragon_strategy_overview.csv`",
        "- `dragon_glued_refined_branch_summary.csv`",
        "- `dragon_alpha_first_branch_summary.csv`",
        "- `dragon_rc1_config_snapshot.json`",
        "",
        f"- Compared rows: `{len(result)}`",
        f"- Mismatches: `{len(mismatches)}`",
        "",
    ]
    if mismatches.empty:
        lines.append("- All compared branch metrics are consistent across current outputs.")
    else:
        lines.extend(["## Mismatches"])
        for _, row in mismatches.iterrows():
            parts = []
            for col in source_cols:
                if pd.notna(row[col]):
                    parts.append(f"{col}={row[col]}")
            lines.append(
                f"- `{row['branch']}` / `{row['metric']}` | spread `{row['max_abs_diff']}` | " + "; ".join(parts)
            )
    lines.extend(
        [
            "",
            "## Judgment",
            "- `match` means the same branch/metric agrees across all currently available source files.",
            "- `mismatch` means at least one report family is using a different metric definition or evaluation window.",
            "- `single_source` means only one current artifact exposes that metric, so cross-check confidence is lower.",
        ]
    )
    (base_dir / "dragon_review_branch_metric_consistency.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
    return result


def _calc_removed_trade_over_removal_count(base_dir: Path) -> float:
    df = _load_csv(base_dir / "dragon_glued_refined_removed_trade_attribution.csv")
    if df.empty or "recommendation" not in df.columns:
        return float("nan")
    return float((df["recommendation"].astype(str) == "OVER_REMOVAL").sum())


def _calc_local_sensitivity_robust_case_count(base_dir: Path) -> float:
    df = _load_csv(base_dir / "dragon_glued_refined_sensitivity.csv")
    if df.empty or "label" not in df.columns:
        return float("nan")
    candidate = df[df["label"] == "refined_candidate_baseline"].copy()
    if candidate.empty:
        return float("nan")
    candidate_row = candidate.iloc[0]
    neighborhood = df[~df["label"].isin(["current_alpha_control", "refined_candidate_baseline"])].copy()
    robust = neighborhood[
        (neighborhood["avg_return"] >= float(candidate_row["avg_return"]) - 0.0015)
        & (neighborhood["profit_factor"] >= float(candidate_row["profit_factor"]) - 0.20)
        & (neighborhood["real_buy_overlap"] >= int(candidate_row["real_buy_overlap"]) - 1)
        & (neighborhood["real_sell_overlap"] >= int(candidate_row["real_sell_overlap"]) - 1)
    ]
    return float(len(robust))


def build_execution_monitor_review(base_dir: Path) -> None:
    monitor = _load_csv(base_dir / "dragon_daily_monitor_snapshot.csv")
    weekly = _load_csv(base_dir / "dragon_forward_weekly_summary.csv")
    html_text = (base_dir / "dragon_forward_weekly_review.html").read_text(encoding="utf-8")
    daily_text = (base_dir / "dragon_daily_signal_pipeline.py").read_text(encoding="utf-8")
    exec_text = (base_dir / "dragon_refined_execution_validation.py").read_text(encoding="utf-8")

    actual_removed = _calc_removed_trade_over_removal_count(base_dir)
    actual_robust = _calc_local_sensitivity_robust_case_count(base_dir)
    monitor_map = {
        str(row["metric"]): float(row["actual_value"])
        for _, row in monitor.iterrows()
        if pd.notna(row["actual_value"])
    }
    removed_match = pd.notna(actual_removed) and abs(monitor_map.get("removed_trade_over_removal_count", float("nan")) - actual_removed) < 1e-12
    robust_match = pd.notna(actual_robust) and abs(monitor_map.get("local_sensitivity_robust_case_count", float("nan")) - actual_robust) < 1e-12
    daily_uses_nan = 'float("nan") if buy_next is None' in daily_text and 'float("nan") if sell_next is None' in daily_text
    exec_uses_nan = 'float("nan") if buy_next is None' in exec_text and 'float("nan") if sell_next is None' in exec_text
    weekly_has_system_monitor = not weekly.empty and "system_monitor" in set(weekly["branch"].astype(str))
    weekly_html_has_system_monitor = "system_monitor" in html_text

    lines = [
        "# Dragon Review - Execution And Monitor Consistency",
        "",
        "## Governance Metrics",
        f"- `removed_trade_over_removal_count`: monitor `{monitor_map.get('removed_trade_over_removal_count')}` vs source-derived `{actual_removed}` -> `{'match' if removed_match else 'mismatch'}`",
        f"- `local_sensitivity_robust_case_count`: monitor `{monitor_map.get('local_sensitivity_robust_case_count')}` vs source-derived `{actual_robust}` -> `{'match' if robust_match else 'mismatch'}`",
        "",
        "## Execution Fallback Rule",
        f"- `dragon_daily_signal_pipeline.py` uses NaN on missing next-bar execution prices: `{daily_uses_nan}`",
        f"- `dragon_refined_execution_validation.py` uses NaN on missing next-bar execution prices: `{exec_uses_nan}`",
        "",
        "## Weekly Monitor Separation",
        f"- `dragon_forward_weekly_summary.csv` includes `system_monitor` row: `{weekly_has_system_monitor}`",
        f"- `dragon_forward_weekly_review.html` includes `system_monitor` text: `{weekly_html_has_system_monitor}`",
        "",
        "## Judgment",
        "- The monitor chain is trustworthy only if governance metrics are derived from current source artifacts rather than hard-coded constants.",
        "- The execution-aware chain is trustworthy only if missing next-bar prices do not silently fall back to same-bar close.",
        "- Weekly summary is cleaner now because branch rows and system-level monitor counts are separated.",
    ]
    (base_dir / "dragon_review_execution_monitor.md").write_text("\n".join(lines) + "\n", encoding="utf-8")


def build_reporting_integrity_review(base_dir: Path) -> None:
    latest_bar = json.loads((base_dir / "dragon_forward_observation_state.json").read_text(encoding="utf-8")).get("latest_bar_date", "latest")
    files_to_check = [
        base_dir / "dragon_reports_index.html",
        base_dir / "dragon_daily_signal_report.html",
        base_dir / "dragon_forward_weekly_review.html",
        base_dir / "dragon_historical_trade_details.html",
        base_dir / "dragon_indicator_strategy_guide_cn.html",
        base_dir / "html_reports" / "index.html",
        base_dir / "html_reports" / f"dragon_daily_signal_report_{latest_bar}.html",
        base_dir / "html_reports" / f"dragon_forward_weekly_review_{latest_bar}.html",
        base_dir / "html_reports" / f"dragon_historical_trade_details_{latest_bar}.html",
    ]
    existence_rows = [{"path": str(path.relative_to(base_dir)), "exists": path.exists()} for path in files_to_check]

    index_text = (base_dir / "dragon_reports_index.html").read_text(encoding="utf-8")
    archive_index_text = (base_dir / "html_reports" / "index.html").read_text(encoding="utf-8")
    detail_text = (base_dir / "dragon_historical_trade_details.html").read_text(encoding="utf-8")
    daily_text = (base_dir / "dragon_daily_signal_report.html").read_text(encoding="utf-8")

    checks = [
        ("root index links to root daily report", 'href="dragon_daily_signal_report.html"' in index_text),
        (
            "root index links to archived daily report",
            f'href="html_reports/dragon_daily_signal_report_{latest_bar}.html"' in index_text,
        ),
        (
            "archive index links locally inside html_reports",
            f'href="dragon_daily_signal_report_{latest_bar}.html"' in archive_index_text,
        ),
        ("detail page contains snapshot summary strip", "snapshot-summary" in detail_text),
        ("detail page contains event summary labels", "总事件" in detail_text and "前一条：" in detail_text),
        ("detail page contains query filters", "branch-filter" in detail_text and "keyword-filter" in detail_text),
        ("daily report links to historical detail page", 'href="dragon_historical_trade_details.html"' in daily_text),
    ]

    lines = [
        "# Dragon Review - Reporting Integrity",
        "",
        "## File Existence",
    ]
    for row in existence_rows:
        lines.append(f"- `{row['path']}` -> `{row['exists']}`")
    lines.extend(["", "## Link / Feature Checks"])
    for label, passed in checks:
        lines.append(f"- {label}: `{passed}`")
    lines.extend(
        [
            "",
            "## Judgment",
            "- Root and archive HTML outputs are present and linked through the expected root-vs-archive relative paths.",
            "- Historical detail reporting currently includes the new indicator snapshot event-summary strip and deep-link filter controls.",
            "- Terminal mojibake remains a shell-display issue; these checks only validate file presence and embedded text markers, not browser rendering fidelity.",
        ]
    )
    (base_dir / "dragon_review_reporting_integrity.md").write_text("\n".join(lines) + "\n", encoding="utf-8")


def build_system_final(base_dir: Path, branch_consistency: pd.DataFrame) -> None:
    mismatch_branches = branch_consistency[branch_consistency["status"] == "mismatch"].copy()
    trusted_direct = [
        "dragon_alpha_first_branch_summary.csv",
        "dragon_glued_refined_branch_summary.csv",
        "dragon_rc1_config_snapshot.json",
        "dragon_daily_monitor_snapshot.csv",
    ]
    if mismatch_branches.empty:
        trusted_direct.append("dragon_strategy_overview.csv")
    trusted_labeled = [
        "dragon_refined_execution_stress.csv",
        "dragon_cost_stress_test.csv",
        "dragon_forward_weekly_summary.csv",
        "dragon_historical_trade_details.html",
    ]
    not_recommended = []
    if not mismatch_branches.empty:
        not_recommended.append("dragon_strategy_overview.csv")

    lines = [
        "# Dragon System Review Final",
        "",
        "## Overall Judgment",
        "- The current workspace is much closer to a trustworthy research pack after the window-consistency sweep and monitor-fallback fixes.",
        "",
        "## Trust Tiers",
        "- 可信可直接使用:",
    ]
    if mismatch_branches.empty:
        lines.insert(4, "- Cross-report headline metrics are currently aligned across the main branch-summary, release, and overview artifacts.")
        lines.insert(5, "- The remaining cautions are about report interpretation and forward monitoring, not internal metric drift.")
    else:
        lines.insert(4, "- The main remaining risk is not strategy logic but metric-definition drift between a few report families.")
    lines.extend([f"- `{name}`" for name in trusted_direct])
    lines.append("- 可信但需标注口径:")
    lines.extend([f"- `{name}`" for name in trusted_labeled])
    lines.append("- 暂不建议直接引用:")
    if not_recommended:
        lines.extend([f"- `{name}`" for name in not_recommended])
    else:
        lines.append("- `none`")

    if not mismatch_branches.empty:
        lines.extend(["", "## Remaining Review Findings"])
        for _, row in mismatch_branches.iterrows():
            lines.append(
                f"- `{row['branch']}` / `{row['metric']}` remains inconsistent across report families; see `dragon_review_branch_metric_consistency.csv`."
            )

    lines.extend(
        [
            "",
            "## Practical Meaning",
            "- Use the branch-specific summary/release artifacts as the primary basis for governance decisions.",
            "- Use the consistency reports as an audit trail before external distribution of top-line metrics.",
            "- `dragon_strategy_overview.csv` is now aligned with the main branch artifacts and can be used as the compact comparison view.",
        ]
    )
    (base_dir / "dragon_system_review_final.md").write_text("\n".join(lines) + "\n", encoding="utf-8")


def main() -> None:
    base_dir = Path(__file__).resolve().parent
    build_window_consistency_report(base_dir)
    branch_consistency = build_branch_metric_consistency(base_dir)
    build_execution_monitor_review(base_dir)
    build_reporting_integrity_review(base_dir)
    build_system_final(base_dir, branch_consistency)


if __name__ == "__main__":
    main()