micro.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. """
  2. 微观生态识别器(扩展版)
  3. 基于HMM三态模型 + 订单流毒性检测 + 主力资金识别
  4. """
  5. from dataclasses import dataclass, field
  6. from typing import Dict, List, Optional, Tuple
  7. from enum import Enum
  8. import numpy as np
  9. import pandas as pd
  10. from hmmlearn import hmm
  11. class MicroState(Enum):
  12. """微观生态状态"""
  13. RANGING = "ranging" # 震荡
  14. TRENDING = "trending" # 趋势
  15. REVERSING = "reversing" # 反转
  16. class FlowToxicity(Enum):
  17. """订单流毒性等级"""
  18. NONE = "none" # 无毒性
  19. LOW = "low" # 轻度
  20. MEDIUM = "medium" # 中度
  21. HIGH = "high" # 高度毒性
  22. @dataclass
  23. class SmartMoneySignal:
  24. """主力资金信号"""
  25. detected: bool
  26. direction: str # "accumulate", "distribute", "neutral"
  27. confidence: float
  28. large_order_count: int
  29. avg_order_size: float
  30. @dataclass
  31. class MicroEcosystem:
  32. """微观生态数据结构"""
  33. state: MicroState
  34. state_probability: Dict[MicroState, float]
  35. flow_toxicity: FlowToxicity
  36. smart_money: SmartMoneySignal
  37. hmm_features: Dict[str, float]
  38. warnings: List[str] = field(default_factory=list)
  39. class MicroEcosystemIdentifier:
  40. """
  41. 微观生态识别器(扩展版)
  42. 功能:
  43. 1. HMM三态识别(震荡/趋势/反转)
  44. 2. 订单流毒性检测
  45. 3. 主力资金(Smart Money)识别
  46. """
  47. def __init__(
  48. self,
  49. n_components: int = 3,
  50. covariance_type: str = "full",
  51. n_iter: int = 100,
  52. # 有毒订单流检测参数
  53. toxic_price_reversal: float = 0.003,
  54. toxic_depth_ratio: float = 0.8,
  55. toxic_duration: int = 10,
  56. # 主力资金识别参数
  57. smart_money_threshold: float = 1_000_000,
  58. accumulation_period: int = 5
  59. ):
  60. self.n_components = n_components
  61. self.covariance_type = covariance_type
  62. self.n_iter = n_iter
  63. # 有毒订单流参数
  64. self.toxic_price_reversal = toxic_price_reversal
  65. self.toxic_depth_ratio = toxic_depth_ratio
  66. self.toxic_duration = toxic_duration
  67. # 主力资金参数
  68. self.smart_money_threshold = smart_money_threshold
  69. self.accumulation_period = accumulation_period
  70. # HMM模型
  71. self.hmm_model: Optional[hmm.GaussianHMM] = None
  72. self._is_fitted = False
  73. def fit(self, price_data: pd.DataFrame):
  74. """
  75. 训练HMM模型
  76. Args:
  77. price_data: 历史价格数据
  78. """
  79. features = self._extract_hmm_features(price_data)
  80. self.hmm_model = hmm.GaussianHMM(
  81. n_components=self.n_components,
  82. covariance_type=self.covariance_type,
  83. n_iter=self.n_iter,
  84. random_state=42
  85. )
  86. self.hmm_model.fit(features)
  87. self._is_fitted = True
  88. # 识别各状态对应的市场类型
  89. self._label_states(price_data, features)
  90. def identify(
  91. self,
  92. price_data: pd.DataFrame,
  93. trade_data: Optional[pd.DataFrame] = None,
  94. order_book_data: Optional[pd.DataFrame] = None
  95. ) -> MicroEcosystem:
  96. """
  97. 识别微观生态
  98. Args:
  99. price_data: 价格数据
  100. trade_data: 成交数据(可选)
  101. order_book_data: 订单簿数据(可选)
  102. Returns:
  103. MicroEcosystem: 微观生态识别结果
  104. """
  105. warnings = []
  106. # 1. HMM状态识别
  107. if not self._is_fitted:
  108. self.fit(price_data)
  109. hmm_state, state_probs, features = self._predict_hmm_state(price_data)
  110. # 2. 订单流毒性检测
  111. toxicity = self._detect_flow_toxicity(
  112. price_data, trade_data, order_book_data
  113. )
  114. if toxicity in [FlowToxicity.HIGH, FlowToxicity.MEDIUM]:
  115. warnings.append(f"Detected {toxicity.value} toxic order flow")
  116. # 3. 主力资金识别
  117. smart_money = self._detect_smart_money(price_data, trade_data)
  118. if smart_money.detected:
  119. warnings.append(
  120. f"Smart money {smart_money.direction} detected "
  121. f"(confidence: {smart_money.confidence:.2f})"
  122. )
  123. return MicroEcosystem(
  124. state=hmm_state,
  125. state_probability=state_probs,
  126. flow_toxicity=toxicity,
  127. smart_money=smart_money,
  128. hmm_features=features,
  129. warnings=warnings
  130. )
  131. def _extract_hmm_features(self, data: pd.DataFrame) -> np.ndarray:
  132. """提取HMM特征"""
  133. # 特征1: 对数收益率
  134. log_returns = np.log(data['close'] / data['close'].shift(1))
  135. # 特征2: 波动率(20日)
  136. volatility = log_returns.rolling(20).std()
  137. # 特征3: 价格位置(在20日区间中的位置)
  138. price_position = (data['close'] - data['low'].rolling(20).min()) / \
  139. (data['high'].rolling(20).max() - data['low'].rolling(20).min() + 1e-10)
  140. # 特征4: 成交量变化
  141. volume_change = data['volume'].pct_change()
  142. features = pd.DataFrame({
  143. 'log_returns': log_returns,
  144. 'volatility': volatility,
  145. 'price_position': price_position,
  146. 'volume_change': volume_change
  147. }).dropna()
  148. return features.values
  149. def _label_states(self, price_data: pd.DataFrame, features: np.ndarray):
  150. """根据历史表现标记HMM状态"""
  151. hidden_states = self.hmm_model.predict(features)
  152. # 计算各状态的统计特征来标记
  153. returns = pd.Series(features[:, 0])
  154. volatility = pd.Series(features[:, 1])
  155. state_stats = {}
  156. for state in range(self.n_components):
  157. mask = hidden_states == state
  158. if mask.sum() > 0:
  159. state_stats[state] = {
  160. 'volatility': volatility[mask].mean(),
  161. 'trend_strength': abs(returns[mask].mean())
  162. }
  163. # 标记状态:高波动+弱趋势=震荡,低波动+强趋势=趋势,高波动+强趋势=反转
  164. self.state_labels = {}
  165. for state, stats in state_stats.items():
  166. vol = stats['volatility']
  167. trend = stats['trend_strength']
  168. if vol > np.percentile(list(s['volatility'] for s in state_stats.values()), 66):
  169. if trend > np.percentile(list(s['trend_strength'] for s in state_stats.values()), 50):
  170. self.state_labels[state] = MicroState.REVERSING
  171. else:
  172. self.state_labels[state] = MicroState.RANGING
  173. else:
  174. if trend > np.percentile(list(s['trend_strength'] for s in state_stats.values()), 50):
  175. self.state_labels[state] = MicroState.TRENDING
  176. else:
  177. self.state_labels[state] = MicroState.RANGING
  178. def _predict_hmm_state(
  179. self,
  180. price_data: pd.DataFrame
  181. ) -> Tuple[MicroState, Dict[MicroState, float], Dict[str, float]]:
  182. """预测当前HMM状态"""
  183. features = self._extract_hmm_features(price_data)
  184. if len(features) == 0:
  185. return MicroState.RANGING, {s: 1/3 for s in MicroState}, {}
  186. # 预测状态概率
  187. log_prob, state_probs = self.hmm_model.score_samples(features[-1:])
  188. # 获取最新特征值
  189. latest_features = {
  190. 'log_returns': features[-1, 0],
  191. 'volatility': features[-1, 1],
  192. 'price_position': features[-1, 2],
  193. 'volume_change': features[-1, 3]
  194. }
  195. # 映射到标记的状态
  196. probs_by_state = {}
  197. for state_idx, prob in enumerate(state_probs[0]):
  198. labeled_state = self.state_labels.get(state_idx, MicroState.RANGING)
  199. probs_by_state[labeled_state] = probs_by_state.get(labeled_state, 0) + prob
  200. # 最可能状态
  201. current_state = max(probs_by_state, key=probs_by_state.get)
  202. return current_state, probs_by_state, latest_features
  203. def _detect_flow_toxicity(
  204. self,
  205. price_data: pd.DataFrame,
  206. trade_data: Optional[pd.DataFrame] = None,
  207. order_book_data: Optional[pd.DataFrame] = None
  208. ) -> FlowToxicity:
  209. """
  210. 检测订单流毒性
  211. 有毒订单流特征:
  212. 1. 大单成交后价格反向运动
  213. 2. 买卖盘深度持续失衡
  214. """
  215. toxicity_score = 0.0
  216. # 检测1: 价格反转
  217. if trade_data is not None and 'large_trade' in trade_data.columns:
  218. large_trades = trade_data[trade_data['large_trade'] == True]
  219. for idx in large_trades.index:
  220. if idx + 1 in price_data.index:
  221. trade_price = large_trades.loc[idx, 'price']
  222. future_price = price_data.loc[idx + 1, 'close']
  223. reversal = abs(future_price - trade_price) / trade_price
  224. if reversal > self.toxic_price_reversal:
  225. toxicity_score += 0.3
  226. # 简化检测:使用价格与成交量的关系
  227. returns = price_data['close'].pct_change().abs().iloc[-20:]
  228. volumes = price_data['volume'].iloc[-20:]
  229. # 高成交量但价格不动 = 有毒(对敲或洗盘)
  230. volume_ma = volumes.mean()
  231. high_volume_periods = volumes > volume_ma * 1.5
  232. low_volatility_periods = returns < returns.mean() * 0.5
  233. toxic_periods = (high_volume_periods & low_volatility_periods).sum()
  234. toxicity_score += min(0.4, toxic_periods * 0.1)
  235. # 检测2: 买卖盘深度失衡
  236. if order_book_data is not None:
  237. if 'bid_depth' in order_book_data and 'ask_depth' in order_book_data:
  238. depth_ratio = (
  239. order_book_data['bid_depth'] /
  240. (order_book_data['ask_depth'] + 1e-10)
  241. )
  242. imbalanced_periods = (
  243. (depth_ratio < self.toxic_depth_ratio) |
  244. (depth_ratio > 1 / self.toxic_depth_ratio)
  245. ).sum()
  246. if imbalanced_periods > self.toxic_duration:
  247. toxicity_score += 0.3
  248. # 判断毒性等级
  249. if toxicity_score >= 0.6:
  250. return FlowToxicity.HIGH
  251. elif toxicity_score >= 0.4:
  252. return FlowToxicity.MEDIUM
  253. elif toxicity_score >= 0.2:
  254. return FlowToxicity.LOW
  255. else:
  256. return FlowToxicity.NONE
  257. def _detect_smart_money(
  258. self,
  259. price_data: pd.DataFrame,
  260. trade_data: Optional[pd.DataFrame] = None
  261. ) -> SmartMoneySignal:
  262. """
  263. 识别主力资金行为
  264. 特征:
  265. 1. 连续大单买入/卖出
  266. 2. 价格维持横盘或缓慢运动(隐蔽建仓)
  267. 3. 委托簿买方/卖方深度持续增加
  268. """
  269. if trade_data is None or trade_data.empty:
  270. return SmartMoneySignal(
  271. detected=False,
  272. direction="neutral",
  273. confidence=0.0,
  274. large_order_count=0,
  275. avg_order_size=0.0
  276. )
  277. # 识别大单
  278. recent_data = trade_data.iloc[-self.accumulation_period * 10:]
  279. avg_volume = recent_data['volume'].mean()
  280. large_orders = recent_data[
  281. recent_data['volume'] > avg_volume * 3
  282. ]
  283. if len(large_orders) < 3:
  284. return SmartMoneySignal(
  285. detected=False,
  286. direction="neutral",
  287. confidence=0.0,
  288. large_order_count=len(large_orders),
  289. avg_order_size=large_orders['volume'].mean() if len(large_orders) > 0 else 0.0
  290. )
  291. # 分析大单方向
  292. if 'side' in large_orders.columns:
  293. buy_orders = large_orders[large_orders['side'] == 'buy']
  294. sell_orders = large_orders[large_orders['side'] == 'sell']
  295. else:
  296. # 通过价格变动推断方向
  297. price_change = price_data['close'].diff()
  298. buy_orders = large_orders[price_change.loc[large_orders.index] > 0]
  299. sell_orders = large_orders[price_change.loc[large_orders.index] < 0]
  300. buy_volume = buy_orders['volume'].sum() if len(buy_orders) > 0 else 0
  301. sell_volume = sell_orders['volume'].sum() if len(sell_orders) > 0 else 0
  302. # 检测建仓/出货
  303. total_large_volume = buy_volume + sell_volume
  304. if total_large_volume == 0:
  305. return SmartMoneySignal(
  306. detected=False,
  307. direction="neutral",
  308. confidence=0.0,
  309. large_order_count=len(large_orders),
  310. avg_order_size=0.0
  311. )
  312. buy_ratio = buy_volume / total_large_volume
  313. # 价格走势
  314. price_trend = (
  315. price_data['close'].iloc[-1] -
  316. price_data['close'].iloc[-self.accumulation_period]
  317. ) / price_data['close'].iloc[-self.accumulation_period]
  318. # 建仓特征:大单买入 + 价格横盘/微涨
  319. if buy_ratio > 0.6 and abs(price_trend) < 0.02:
  320. return SmartMoneySignal(
  321. detected=True,
  322. direction="accumulate",
  323. confidence=buy_ratio,
  324. large_order_count=len(large_orders),
  325. avg_order_size=large_orders['volume'].mean()
  326. )
  327. # 出货特征:大单卖出 + 价格横盘/微跌
  328. if buy_ratio < 0.4 and abs(price_trend) < 0.02:
  329. return SmartMoneySignal(
  330. detected=True,
  331. direction="distribute",
  332. confidence=1 - buy_ratio,
  333. large_order_count=len(large_orders),
  334. avg_order_size=large_orders['volume'].mean()
  335. )
  336. return SmartMoneySignal(
  337. detected=False,
  338. direction="neutral",
  339. confidence=0.0,
  340. large_order_count=len(large_orders),
  341. avg_order_size=large_orders['volume'].mean()
  342. )