| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117 |
- from __future__ import annotations
- from pathlib import Path
- import pandas as pd
- def _find_workbook(base_dir: Path) -> Path:
- matches = sorted(base_dir.glob("*.xlsx"))
- if not matches:
- raise FileNotFoundError(f"No workbook found in {base_dir}")
- return matches[0]
- def _collect_gap_rows(workbook_df: pd.DataFrame, strategy_df: pd.DataFrame, indicators_df: pd.DataFrame, side: str, layer: str, gap_type: str) -> pd.DataFrame:
- wb = workbook_df[(workbook_df["side"] == side) & (workbook_df["layer"] == layer)].copy()
- st = strategy_df[(strategy_df["side"] == side) & (strategy_df["layer"] == layer)].copy()
- wb_dates = set(wb["date"])
- st_dates = set(st["date"])
- if gap_type == "missing_from_strategy":
- target_dates = sorted(wb_dates - st_dates)
- base = wb[wb["date"].isin(target_dates)].copy()
- base["diagnostic_type"] = gap_type
- base["source_note"] = base.get("note", "")
- base["source_reason"] = base.get("signal_reason", "")
- else:
- target_dates = sorted(st_dates - wb_dates)
- base = st[st["date"].isin(target_dates)].copy()
- base["diagnostic_type"] = gap_type
- base["source_note"] = ""
- base["source_reason"] = base.get("reason", "")
- if base.empty:
- return base
- merged = base.merge(
- indicators_df[
- ["date", "close", "a1", "b1", "c1", "kdj_buy", "kdj_sell", "ql_buy", "ql_sell"]
- ],
- on="date",
- how="left",
- suffixes=("", "_ind"),
- )
- merged["target_side"] = side
- merged["target_layer"] = layer
- return merged[
- [
- "diagnostic_type",
- "target_layer",
- "target_side",
- "date",
- "source_reason",
- "source_note",
- "close",
- "a1",
- "b1",
- "c1",
- "kdj_buy",
- "kdj_sell",
- "ql_buy",
- "ql_sell",
- ]
- ]
- def main() -> None:
- base_dir = Path(__file__).resolve().parent
- workbook_path = _find_workbook(base_dir)
- workbook_layers = pd.read_csv(base_dir / "dragon_workbook_layers.csv", encoding="utf-8-sig")
- strategy_events = pd.read_csv(base_dir / "dragon_strategy_events.csv", encoding="utf-8-sig")
- indicators = pd.read_csv(base_dir / "dragon_indicator_snapshot.csv", encoding="utf-8-sig")
- parts = []
- for side in ("BUY", "SELL"):
- for layer in ("real_trade", "aux_signal"):
- parts.append(_collect_gap_rows(workbook_layers, strategy_events, indicators, side, layer, "missing_from_strategy"))
- parts.append(_collect_gap_rows(workbook_layers, strategy_events, indicators, side, layer, "extra_in_strategy"))
- gaps = pd.concat(parts, ignore_index=True)
- gaps.to_csv(base_dir / "dragon_event_gaps.csv", index=False, encoding="utf-8-sig")
- lines = [
- "# Dragon Event Gap Diagnostics",
- "",
- f"- Workbook: `{workbook_path.name}`",
- f"- Gap rows: `{len(gaps)}`",
- "",
- "## Counts",
- ]
- summary = (
- gaps.groupby(["diagnostic_type", "target_layer", "target_side"])
- .size()
- .reset_index(name="count")
- .sort_values(["diagnostic_type", "target_layer", "target_side"])
- )
- for _, row in summary.iterrows():
- lines.append(
- f"- {row['diagnostic_type']} / {row['target_layer']} / {row['target_side']}: `{int(row['count'])}`"
- )
- top_missing_real_sell = gaps[
- (gaps["diagnostic_type"] == "missing_from_strategy")
- & (gaps["target_layer"] == "real_trade")
- & (gaps["target_side"] == "SELL")
- ].head(20)
- lines.extend(["", "## Sample Missing Real SELL Rows"])
- for _, row in top_missing_real_sell.iterrows():
- lines.append(
- f"- {row['date']} reason `{row['source_reason']}` note `{row['source_note']}` a1 `{row['a1']:.4f}` b1 `{row['b1']:.4f}` c1 `{row['c1']:.2f}`"
- )
- (base_dir / "dragon_event_gaps.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
- if __name__ == "__main__":
- main()
|