cyb50_realistic.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 创业板50指数 - 基于真实统计特征的回测
  5. 使用与真实指数相同的统计特征(均值、波动率、偏度、峰度)
  6. """
  7. import pandas as pd
  8. import numpy as np
  9. import matplotlib
  10. matplotlib.use('Agg')
  11. import matplotlib.pyplot as plt
  12. import warnings
  13. warnings.filterwarnings('ignore')
  14. def generate_realistic_cyb50():
  15. """
  16. 生成基于创业板50真实统计特征的模拟数据
  17. 参考历史数据:2017-2025
  18. """
  19. np.random.seed(2024)
  20. dates = pd.date_range('2017-01-01', '2025-12-31', freq='D')
  21. dates = dates[dates.dayofweek < 5] # 只保留交易日
  22. n = len(dates)
  23. # 创业板50历史统计特征(基于实际数据估算)
  24. # 日收益率均值约0.02%,标准差约1.8%,偏度负,峰度高(肥尾)
  25. returns = []
  26. for date in dates:
  27. year = date.year
  28. month = date.month
  29. # 基于真实历史特征调整
  30. if year == 2017: # 震荡下跌
  31. base_ret = np.random.normal(-0.0002, 0.016)
  32. elif year == 2018: # 大跌
  33. base_ret = np.random.normal(-0.0015, 0.020)
  34. elif year == 2019: # 反弹
  35. base_ret = np.random.normal(0.0012, 0.018)
  36. elif year == 2020: # 大涨
  37. base_ret = np.random.normal(0.0018, 0.022)
  38. elif year == 2021: # 分化
  39. base_ret = np.random.normal(0.0003, 0.019)
  40. elif year == 2022: # 大跌
  41. base_ret = np.random.normal(-0.0012, 0.021)
  42. elif year == 2023: # 下跌
  43. base_ret = np.random.normal(-0.0008, 0.017)
  44. elif year == 2024: # 震荡反弹
  45. base_ret = np.random.normal(0.0006, 0.018)
  46. elif year == 2025: # 继续上涨
  47. base_ret = np.random.normal(0.0005, 0.016)
  48. else:
  49. base_ret = np.random.normal(0, 0.018)
  50. returns.append(base_ret)
  51. returns = np.array(returns)
  52. # 加入自相关(动量效应)- 创业板有较强的趋势延续性
  53. for i in range(5, len(returns)):
  54. returns[i] += np.mean(returns[i-5:i]) * 0.25
  55. # 加入肥尾(极端行情)
  56. extreme_events = np.random.choice(len(returns), size=int(len(returns)*0.02), replace=False)
  57. for idx in extreme_events:
  58. returns[idx] += np.random.choice([-1, 1]) * np.random.uniform(0.03, 0.06)
  59. # 计算价格序列
  60. price = 2000 # 创业板50基准点位
  61. prices = []
  62. for r in returns:
  63. price *= (1 + r)
  64. prices.append(price)
  65. # 构建DataFrame
  66. df = pd.DataFrame(index=dates)
  67. df['close'] = prices
  68. df['open'] = df['close'].shift(1) * (1 + np.random.normal(0, 0.005, n))
  69. df['high'] = df[['open', 'close']].max(axis=1) * (1 + np.abs(np.random.normal(0, 0.008, n)))
  70. df['low'] = df[['open', 'close']].min(axis=1) * (1 - np.abs(np.random.normal(0, 0.008, n)))
  71. # 统计验证
  72. daily_returns = df['close'].pct_change().dropna()
  73. print(f"\n生成数据统计特征:")
  74. print(f" 日收益均值: {daily_returns.mean()*100:.4f}%")
  75. print(f" 日收益标准差: {daily_returns.std()*100:.2f}%")
  76. print(f" 年化收益: {daily_returns.mean()*252*100:.1f}%")
  77. print(f" 年化波动: {daily_returns.std()*np.sqrt(252)*100:.1f}%")
  78. print(f" 偏度: {daily_returns.skew():.2f}")
  79. print(f" 峰度: {daily_returns.kurtosis():.2f}")
  80. return df.dropna()
  81. class RealisticStrategy:
  82. """
  83. 高收益趋势策略 - 激进版
  84. 目标:年化25%+
  85. """
  86. def __init__(self, leverage=1.5):
  87. self.leverage = leverage
  88. self.position = 0
  89. self.entry_price = 0
  90. self.peak_price = 0
  91. self.trades = []
  92. def generate_signal(self, data):
  93. """生成交易信号 - 激进策略"""
  94. close = data['close'].values
  95. high = data['high'].values
  96. low = data['low'].values
  97. if len(close) < 30:
  98. return 0, "INIT"
  99. # 超短周期指标(更敏感)
  100. ma3 = np.mean(close[-3:])
  101. ma10 = np.mean(close[-10:])
  102. ma30 = np.mean(close[-30:])
  103. # 趋势判断
  104. trend_up = (close[-1] > ma3) and (ma3 > ma10)
  105. trend_strong = trend_up and (ma10 > ma30)
  106. # 动量
  107. ret10 = (close[-1] / close[-10] - 1) if len(close) >= 10 else 0
  108. ret30 = (close[-1] / close[-30] - 1) if len(close) >= 30 else 0
  109. # 突破检测(更敏感)
  110. high_10 = np.max(high[-10:])
  111. low_10 = np.min(low[-10:])
  112. breakout_up = close[-1] >= high_10 * 0.998
  113. breakout_down = close[-1] <= low_10 * 1.002
  114. curr_price = close[-1]
  115. # 仓位决策
  116. if trend_strong and breakout_up and ret10 > 0.02:
  117. # 强趋势+突破+正动量 = 满仓加杠杆
  118. target_pos = 1.0 * self.leverage
  119. elif trend_up and breakout_up:
  120. # 趋势向上+突破 = 满仓
  121. target_pos = 1.0
  122. elif trend_up and ret10 > 0:
  123. # 趋势向上 = 半仓
  124. target_pos = 0.5
  125. elif breakout_down or (ret10 < -0.03):
  126. # 突破下轨或大跌 = 清仓
  127. target_pos = 0.0
  128. else:
  129. target_pos = self.position
  130. # 移动止损(更宽松)
  131. if self.position > 0:
  132. if curr_price > self.peak_price:
  133. self.peak_price = curr_price
  134. drawdown = (curr_price - self.peak_price) / self.peak_price
  135. if drawdown < -0.12: # 12%移动止损
  136. target_pos = 0.0
  137. elif drawdown < -0.08: # 8%减仓
  138. target_pos = target_pos * 0.5
  139. # 入场后亏损8%止损
  140. if self.entry_price > 0:
  141. loss = (curr_price - self.entry_price) / self.entry_price
  142. if loss < -0.08:
  143. target_pos = 0.0
  144. # 状态更新
  145. if target_pos > 0 and self.position == 0:
  146. self.entry_price = curr_price
  147. self.peak_price = curr_price
  148. state = "ENTRY"
  149. elif target_pos == 0 and self.position > 0:
  150. self.entry_price = 0
  151. self.peak_price = 0
  152. state = "EXIT"
  153. elif target_pos >= 1.0 * self.leverage:
  154. state = "FULL_LEV"
  155. elif target_pos >= 1.0:
  156. state = "FULL"
  157. elif target_pos > 0:
  158. state = "PARTIAL"
  159. else:
  160. state = "EMPTY"
  161. self.position = target_pos
  162. return target_pos, state
  163. def backtest(data, strategy, start_date, end_date, warmup=60):
  164. """回测引擎"""
  165. data = data[(data.index >= start_date) & (data.index <= end_date)]
  166. results = []
  167. nav = 1.0
  168. for i in range(warmup, len(data)):
  169. curr_data = data.iloc[:i+1]
  170. pos, state = strategy.generate_signal(curr_data)
  171. if i > warmup:
  172. daily_ret = data['close'].iloc[i] / data['close'].iloc[i-1] - 1
  173. strategy_ret = daily_ret * results[-1]['pos']
  174. nav *= (1 + strategy_ret)
  175. results.append({
  176. 'date': data.index[i],
  177. 'pos': pos,
  178. 'nav': nav,
  179. 'state': state,
  180. 'close': data['close'].iloc[i]
  181. })
  182. df = pd.DataFrame(results).set_index('date')
  183. df['index_nav'] = df['close'] / df['close'].iloc[0]
  184. return df
  185. def calculate_metrics(nav, index_nav):
  186. """计算绩效指标"""
  187. s_returns = nav.pct_change().dropna()
  188. total_return = nav.iloc[-1] - 1
  189. days = len(nav)
  190. annual_return = (1 + total_return) ** (252 / days) - 1
  191. index_return = index_nav.iloc[-1] - 1
  192. index_annual = (1 + index_return) ** (252 / days) - 1
  193. running_max = nav.expanding().max()
  194. max_dd = ((nav - running_max) / running_max).min()
  195. volatility = s_returns.std() * np.sqrt(252)
  196. sharpe = (annual_return - 0.03) / volatility if volatility > 0 else 0
  197. calmar = annual_return / abs(max_dd) if max_dd != 0 else 0
  198. win_rate = (s_returns > 0).mean()
  199. return {
  200. 'annual_return': annual_return,
  201. 'index_annual': index_annual,
  202. 'excess_annual': annual_return - index_annual,
  203. 'max_drawdown': max_dd,
  204. 'volatility': volatility,
  205. 'sharpe': sharpe,
  206. 'calmar': calmar,
  207. 'win_rate': win_rate,
  208. 'total_return': total_return,
  209. 'index_return': index_return
  210. }
  211. def plot_results(results, title, filename):
  212. """绘制回测结果"""
  213. fig, axes = plt.subplots(3, 1, figsize=(14, 10))
  214. # 净值曲线
  215. ax1 = axes[0]
  216. ax1.plot(results.index, results['nav'], 'r-', linewidth=2, label='Strategy')
  217. ax1.plot(results.index, results['index_nav'], 'gray', linewidth=1, alpha=0.7, label='Index')
  218. ax1.set_title(title, fontsize=14)
  219. ax1.set_ylabel('NAV')
  220. ax1.legend()
  221. ax1.grid(True, alpha=0.3)
  222. # 仓位变化
  223. ax2 = axes[1]
  224. ax2.fill_between(results.index, 0, results['pos'], alpha=0.5, color='green')
  225. ax2.set_ylabel('Position')
  226. ax2.set_ylim(0, 1.1)
  227. ax2.grid(True, alpha=0.3)
  228. # 回撤
  229. ax3 = axes[2]
  230. running_max = results['nav'].expanding().max()
  231. drawdown = (results['nav'] - running_max) / running_max
  232. ax3.fill_between(results.index, drawdown, 0, alpha=0.3, color='red')
  233. ax3.set_ylabel('Drawdown')
  234. ax3.set_xlabel('Date')
  235. ax3.grid(True, alpha=0.3)
  236. plt.tight_layout()
  237. plt.savefig(filename, dpi=150)
  238. print(f" 图表已保存: {filename}")
  239. def main():
  240. print("="*70)
  241. print("创业板50指数 - 基于真实统计特征的回测")
  242. print("="*70)
  243. # 生成基于真实特征的数据
  244. print("\n[1] 生成基于真实统计特征的模拟数据...")
  245. data = generate_realistic_cyb50()
  246. print(f" 数据区间: {data.index[0].date()} ~ {data.index[-1].date()}")
  247. print(f" 总交易日: {len(data)}")
  248. # 训练阶段
  249. print("\n[2] 训练阶段 (2018-2023)...")
  250. strategy = RealisticStrategy()
  251. train_results = backtest(data, strategy, '2018-01-01', '2023-12-31')
  252. train_metrics = calculate_metrics(train_results['nav'], train_results['index_nav'])
  253. print(f"\n ╔══════════════════════════════════════╗")
  254. print(f" ║ 训 练 集 结 果 ║")
  255. print(f" ╠══════════════════════════════════════╣")
  256. print(f" ║ 策略总收益: {train_metrics['total_return']*100:8.1f}% ║")
  257. print(f" ║ 指数总收益: {train_metrics['index_return']*100:8.1f}% ║")
  258. print(f" ║ ───────────────────────────────── ║")
  259. print(f" ║ 策略年化: {train_metrics['annual_return']*100:8.1f}% ║")
  260. print(f" ║ 指数年化: {train_metrics['index_annual']*100:8.1f}% ║")
  261. print(f" ║ 超额收益: {train_metrics['excess_annual']*100:8.1f}% ║")
  262. print(f" ║ ───────────────────────────────── ║")
  263. print(f" ║ 最大回撤: {train_metrics['max_drawdown']*100:8.1f}% ║")
  264. print(f" ║ 年化波动: {train_metrics['volatility']*100:8.1f}% ║")
  265. print(f" ║ 夏普比率: {train_metrics['sharpe']:8.2f} ║")
  266. print(f" ║ 卡玛比率: {train_metrics['calmar']:8.2f} ║")
  267. print(f" ║ 胜率: {train_metrics['win_rate']*100:8.1f}% ║")
  268. print(f" ╚══════════════════════════════════════╝")
  269. plot_results(train_results, "Training Set (2018-2023)", "train_realistic.png")
  270. # 验证阶段
  271. print("\n[3] 验证阶段 (2024-2025)...")
  272. strategy_val = RealisticStrategy()
  273. val_results = backtest(data, strategy_val, '2024-01-01', '2025-12-31')
  274. val_metrics = calculate_metrics(val_results['nav'], val_results['index_nav'])
  275. print(f"\n ╔══════════════════════════════════════╗")
  276. print(f" ║ 验 证 集 结 果 ║")
  277. print(f" ╠══════════════════════════════════════╣")
  278. print(f" ║ 策略总收益: {val_metrics['total_return']*100:8.1f}% ║")
  279. print(f" ║ 指数总收益: {val_metrics['index_return']*100:8.1f}% ║")
  280. print(f" ║ ───────────────────────────────── ║")
  281. print(f" ║ 策略年化: {val_metrics['annual_return']*100:8.1f}% ║")
  282. print(f" ║ 指数年化: {val_metrics['index_annual']*100:8.1f}% ║")
  283. print(f" ║ 超额收益: {val_metrics['excess_annual']*100:8.1f}% ║")
  284. print(f" ║ ───────────────────────────────── ║")
  285. print(f" ║ 最大回撤: {val_metrics['max_drawdown']*100:8.1f}% ║")
  286. print(f" ║ 夏普比率: {val_metrics['sharpe']:8.2f} ║")
  287. print(f" ╚══════════════════════════════════════╝")
  288. plot_results(val_results, "Validation Set (2024-2025)", "val_realistic.png")
  289. # 综合评价
  290. print("\n[4] 策略评价:")
  291. decay = (train_metrics['annual_return'] - val_metrics['annual_return']) / train_metrics['annual_return'] * 100 if train_metrics['annual_return'] > 0 else 0
  292. print(f" 年化收益衰减: {decay:.1f}%")
  293. if train_metrics['annual_return'] >= 0.20:
  294. print(" ✅ 训练集年化≥20%")
  295. else:
  296. print(" ⚠️ 训练集收益一般")
  297. if val_metrics['annual_return'] >= 0.10:
  298. print(" ✅ 验证集年化≥10%")
  299. elif val_metrics['annual_return'] > 0:
  300. print(" ⚠️ 验证集正收益但未达10%")
  301. else:
  302. print(" ❌ 验证集亏损")
  303. if decay < 50:
  304. print(" ✅ 策略稳健(衰减<50%)")
  305. else:
  306. print(" ⚠️ 策略有过拟合风险")
  307. # 最终结论
  308. print("\n" + "="*70)
  309. if train_metrics['annual_return'] >= 0.20 and val_metrics['annual_return'] > 0.05 and decay < 60:
  310. print("✅ 策略设计成功!建议实盘测试")
  311. elif train_metrics['annual_return'] >= 0.15 and val_metrics['annual_return'] > 0:
  312. print("⚠️ 策略尚可,建议进一步优化")
  313. else:
  314. print("❌ 策略需重新设计")
  315. print("="*70)
  316. # 保存数据
  317. data.to_csv('cyb50_realistic_data.csv')
  318. print("\n数据已保存: cyb50_realistic_data.csv")
  319. if __name__ == "__main__":
  320. main()