├── .gitignore
├── .vscode
└── settings.json
├── CUSUMDECT
├── CUSUMDECT.py
└── bar5rb8888.csv
├── GeneticOptimizeforVNPYStrategy
├── .idea
│ ├── GeneticOptimizeforVNPYStrategy.iml
│ ├── encodings.xml
│ ├── misc.xml
│ ├── modules.xml
│ ├── vcs.xml
│ └── workspace.xml
├── .vscode
│ └── settings.json
├── GeneticOptimize.py
├── GeneticOptimizev2.py
├── GeneticTrain.py
├── Knapsack.py
├── MultiTest.py
├── checkMutiple.py
└── file.txt
├── JDDataService
├── JQDataload.py
└── config.json
├── LSMT
├── .vscode
│ ├── launch.json
│ └── settings.json
├── LSTM.py
└── Stat1.py
├── MarketDataAnalyzer.py
├── PSOOptimize
└── PSOOptimize.py
├── README.md
├── Risk and Portfolio Knowledge Sharing
├── Basic and VaR.ipynb
├── README.md
├── RiskandPortfolio.pdf
└── Untitled.ipynb
├── TestData
└── rb1910.csv
└── TraderbySklearn
└── AnalyzebySklearn.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "python.pythonPath": "C:\\Anaconda2\\python.exe",
3 | "python.linting.pylintEnabled": false,
4 | "python.linting.enabled": true,
5 | "python.linting.flake8Enabled": false,
6 | "python.linting.banditEnabled": true,
7 | "python.formatting.provider": "black"
8 | }
--------------------------------------------------------------------------------
/CUSUMDECT/CUSUMDECT.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 |
3 | import numpy as np
4 | import pandas as pd
5 | import matplotlib.pyplot as plt
6 | import talib
7 |
8 |
9 |
10 | def detect_via_cusum_lg(ts, istart=30, threshold_times=5):
11 | """
12 | detect a time series using cusum algorithm
13 | :param ts: the time series to be detected
14 | :param istart: the data from index 0 to index istart will be used as cold startup data to train
15 | :param threshold_times: the times for setting threshold
16 | :return:
17 | """
18 |
19 | S_h = 0
20 | S_l = 0
21 | S_list = np.zeros(istart)
22 |
23 | meanArray = talib.SMA(ts,timeperiod = istart)
24 | stdArray = talib.STDDEV(np.log(ts/meanArray),timeperiod = istart)
25 | for i in range(istart+1, len(ts)-1):
26 | tslog = np.log(ts[i] / meanArray[i - 1])
27 |
28 | S_h_ = max(0, S_h + tslog - stdArray[i-1])
29 | S_l_ = min(0, S_l + tslog + stdArray[i-1])
30 |
31 | if S_h_> threshold_times * stdArray[i-1]:
32 | S_list = np.append(S_list,1)
33 | S_h_ = 0
34 | elif abs(S_l_)> threshold_times * stdArray[i-1]:
35 | S_list = np.append(S_list, -1)
36 | S_l_ = 0
37 | else:
38 | S_list = np.append(S_list, 0)
39 | S_h = S_h_
40 | S_l = S_l_
41 | return S_list
42 |
43 |
44 | #数据导入
45 | df5min = pd.read_csv("bar5rb8888.csv")
46 | dt0 = np.array(df5min["close"])
47 |
48 | listup,listdown = [],[]
49 | s_list = detect_via_cusum_lg(dt0,istart=30, threshold_times=5)
50 | for i in range(0,len(s_list)):
51 | if s_list[i] == 1:
52 | listup.append(i)
53 | elif s_list[i] == -1 :
54 | listdown.append(i)
55 |
56 |
57 | plt.subplot(2,1,1)
58 | plt.plot(dt0, color='y', lw=2.)
59 | plt.plot(dt0, '^', markersize=5, color='r', label='UP signal', markevery=listup)
60 | plt.plot(dt0, 'v', markersize=5, color='g', label='DOWN signal', markevery=listdown)
61 | plt.legend()
62 | plt.subplot(2,1,2)
63 | plt.title('s_list')
64 | plt.plot(s_list,'r-')
65 |
66 | plt.show()
67 |
--------------------------------------------------------------------------------
/GeneticOptimizeforVNPYStrategy/.idea/GeneticOptimizeforVNPYStrategy.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/GeneticOptimizeforVNPYStrategy/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/GeneticOptimizeforVNPYStrategy/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/GeneticOptimizeforVNPYStrategy/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/GeneticOptimizeforVNPYStrategy/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/GeneticOptimizeforVNPYStrategy/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
93 |
94 |
95 |
96 | creator
97 | parameterlist
98 | evaluate
99 | object_func
100 | mid
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 | 1554100021821
291 |
292 |
293 | 1554100021821
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
414 |
415 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 |
430 |
431 |
432 |
433 |
434 |
435 |
436 |
437 |
438 |
439 |
440 |
441 |
442 |
443 |
444 |
445 |
446 |
447 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
457 |
458 |
459 |
460 |
461 |
462 |
463 |
464 |
465 |
466 |
467 |
468 |
469 |
470 |
471 |
472 |
473 |
474 |
475 |
476 |
477 |
478 |
479 |
480 |
481 |
482 |
483 |
484 |
485 |
486 |
487 |
488 |
489 |
490 |
491 |
492 |
493 |
494 |
495 |
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
--------------------------------------------------------------------------------
/GeneticOptimizeforVNPYStrategy/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "python.pythonPath": "C:\\ProgramData\\Anaconda2\\python.exe"
3 | }
--------------------------------------------------------------------------------
/GeneticOptimizeforVNPYStrategy/GeneticOptimize.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 | """
3 | 展示如何执行参数优化。
4 | """
5 | from __future__ import division
6 | from __future__ import print_function
7 | from vnpy.trader.app.ctaStrategy.ctaBacktesting import BacktestingEngine, MINUTE_DB_NAME, OptimizationSetting
8 | from vnpy.trader.app.ctaStrategy.strategy.strategyBollChannel import BollChannelStrategy
9 | import random
10 | import numpy as np
11 | from deap import creator, base, tools, algorithms
12 | import multiprocessing
13 | import time, datetime
14 | import pandas as pd
15 | def object_func(strategy_avgTuple):
16 | """
17 | 本函数为优化目标函数,根据随机生成的策略参数,运行回测后自动返回2个结果指标:收益回撤比和夏普比率
18 | """
19 | strategy_avg = strategy_avgTuple
20 | paraSet = strategy_avgTuple.parameterPackage
21 | symbol = paraSet["symbol"]
22 | strategy = paraSet["strategy"]
23 | # 创建回测引擎对象
24 | engine = BacktestingEngine()
25 | # 设置回测使用的数据
26 | engine.setBacktestingMode(engine.BAR_MODE) # 设置引擎的回测模式为K线
27 | engine.setDatabase("VnTrader_1Min_Db", symbol["vtSymbol"]) # 设置使用的历史数据库
28 | engine.setStartDate(symbol["StartDate"]) # 设置回测用的数据起始日期
29 | engine.setEndDate(symbol["EndDate"]) # 设置回测用的数据起始日期
30 | # 配置回测引擎参数
31 | engine.setSlippage(symbol["Slippage"]) # 1跳
32 | engine.setRate(symbol["Rate"]) # 佣金大小
33 | engine.setSize(symbol["Size"]) # 合约大小
34 | engine.setPriceTick(symbol["Slippage"]) # 最小价格变动
35 | engine.setCapital(symbol["Capital"])
36 | setting = {}
37 | for item in range(len(strategy_avg)):
38 | setting.update(strategy_avg[item])
39 | engine.clearBacktestingResult()
40 | # 加载策略
41 | engine.initStrategy(strategy, setting)
42 | # 运行回测,返回指定的结果指标
43 | engine.runBacktesting() # 运行回测
44 | # 逐日回测
45 | # engine.calculateDailyResult()
46 | backresult = engine.calculateBacktestingResult()
47 | try:
48 | capital = round(backresult['capital'], 3) # 收益回撤比
49 | profitLossRatio = round(backresult['profitLossRatio'], 3) # 夏普比率 #夏普比率
50 | sharpeRatio = round(backresult['sharpeRatio'], 3)
51 | except Exception, e:
52 | print("Error: %s, %s" %(str(Exception),str(e)))
53 | sharpeRatio = 0
54 | profitLossRatio = 0 # 收益回撤比
55 | averageWinning = 0 # 夏普比率 #夏普比率
56 | capital = 0
57 | return capital, sharpeRatio, profitLossRatio
58 | class GeneticOptimizeStrategy(object):
59 | Strategy = BollChannelStrategy
60 | Symbollist ={
61 | "vtSymbol": 'rb0000',
62 | "StartDate": "20140601",
63 | "EndDate": "20141101",
64 | "Slippage": 1,
65 | "Size": 10,
66 | "Rate": 2 / 10000.0,
67 | "Capital": 10000
68 | }
69 | Parameterlist = {
70 | 'bollWindow': (10,50,1), #布林带窗口
71 | 'bollDev': (2,10,1), #布林带通道阈值
72 | 'slMultiplier':(3,6),
73 | 'barMins':[2,3,5,10,15,20],
74 | }
75 | parameterPackage = {
76 | "symbol":Symbollist,
77 | "strategy":Strategy
78 | }
79 | # ------------------------------------------------------------------------
80 | def __init__(self, Strategy, Symbollist, Parameterlist):
81 | self.strategy = Strategy
82 | self.symbol = Symbollist
83 | self.parameterlist = Parameterlist
84 | self.parameterPackage = {
85 | "strategy":self.strategy,
86 | "symbol":self.symbol
87 | }
88 | creator.create("FitnessMulti", base.Fitness, weights=(1.0, 1.0, 1.0)) # 1.0 求最大值;-1.0 求最小值
89 | creator.create("Individual", list, fitness=creator.FitnessMulti, parameterPackage=parameterPackage)
90 | # ------------------------------------------------------------------------
91 | def parameter_generate(self):
92 | '''
93 | 根据设置的起始值,终止值和步进,随机生成待优化的策略参数
94 | '''
95 | parameter_list = []
96 | for key, value in self.parameterlist.items():
97 | if isinstance(value, tuple):
98 | if len(value) == 3:
99 | parameter_list.append({key:random.randrange(value[0], value[1], value[2])})
100 | elif len(value) == 2:
101 | parameter_list.append({key:random.uniform(value[0], value[1])})
102 | elif isinstance(value, list):
103 | parameter_list.append({key:random.choice(value)})
104 | else:
105 | parameter_list.append({key:value})
106 | return parameter_list
107 | def mutArrayGroup(self, individual, parameterlist, indpb):
108 | size = len(individual)
109 | paralist = parameterlist()
110 | for i in xrange(size):
111 | if random.random() < indpb:
112 | individual[i] = paralist[i]
113 | return individual,
114 | def optimize(self):
115 | # 设置优化方向:最大化收益回撤比,最大化夏普比率
116 | toolbox = base.Toolbox() # Toolbox是deap库内置的工具箱,里面包含遗传算法中所用到的各种函数
117 | pool = multiprocessing.Pool(processes=(multiprocessing.cpu_count()-1))
118 | toolbox.register("map", pool.map)
119 | # 初始化
120 | toolbox.register("individual", tools.initIterate, creator.Individual,
121 | self.parameter_generate) # 注册个体:随机生成的策略参数parameter_generate()
122 | toolbox.register("population", tools.initRepeat, list,
123 | toolbox.individual) # 注册种群:个体形成种群
124 | toolbox.register("mate", tools.cxTwoPoint) # 注册交叉:两点交叉
125 | toolbox.register("mutate", self.mutArrayGroup, parameterlist=self.parameter_generate,
126 | indpb=0.6) # 注册变异:随机生成一定区间内的整数
127 | toolbox.register("evaluate", object_func) # 注册评估:优化目标函数object_func()
128 | toolbox.register("select", tools.selNSGA2) # 注册选择:NSGA-II(带精英策略的非支配排序的遗传算法)
129 | # 遗传算法参数设置
130 | MU = 8 # 设置每一代选择的个体数
131 | LAMBDA = 5 # 设置每一代产生的子女数
132 | pop = toolbox.population(20) # 设置族群里面的个体数量
133 | CXPB, MUTPB, NGEN = 0.5, 0.3, 10 # 分别为种群内部个体的交叉概率、变异概率、产生种群代数
134 | hof = tools.ParetoFront() # 解的集合:帕累托前沿(非占优最优集)
135 | # 解的集合的描述统计信息
136 | # 集合内平均值,标准差,最小值,最大值可以体现集合的收敛程度
137 | # 收敛程度低可以增加算法的迭代次数
138 | stats = tools.Statistics(lambda ind: ind.fitness.values)
139 | np.set_printoptions(suppress=True) # 对numpy默认输出的科学计数法转换
140 | stats.register("mean", np.mean, axis=0) # 统计目标优化函数结果的平均值
141 | stats.register("std", np.std, axis=0) # 统计目标优化函数结果的标准差
142 | stats.register("min", np.min, axis=0) # 统计目标优化函数结果的最小值
143 | stats.register("max", np.max, axis=0) # 统计目标优化函数结果的最大值
144 | # 运行算法
145 | algorithms.eaMuPlusLambda(pop, toolbox, MU, LAMBDA, CXPB, MUTPB, NGEN, stats,
146 | halloffame=hof, verbose=True) # esMuPlusLambda是一种基于(μ+λ)选择策略的多目标优化分段遗传算法
147 | return pop
148 | def poptoExcel(self, pop, number = 1000, path = "C:/data/"):
149 | #按照输入统计数据队列和路径,输出excel,这里不提供新增模式,如果想,可以改
150 | #dft.to_csv(path,index=False,header=True, mode = 'a')
151 | path = path + self.strategy.className + "_" + self.symbol[ "vtSymbol"] + str(datetime.date.today())+ ".xls"
152 | summayKey = ["StrategyParameter","TestValues"]
153 | best_ind = tools.selBest(pop, number)
154 | dft = pd.DataFrame(columns=summayKey)
155 | for i in range(0,len(best_ind)-1):
156 | if i == 0:
157 | # new = pd.DataFrame([{"StrategyParameter":self.complieString(best_ind[i])},{"TestValues":best_ind[i].fitness.values}], index=["0"])
158 | dft = dft.append([{"StrategyParameter":self.complieString(best_ind[i]),"TestValues":best_ind[i].fitness.values}], ignore_index=True)
159 | elif str(best_ind[i-1]) == (str(best_ind[i])):
160 | pass
161 | else:
162 | #new = pd.DataFrame({"StrategyParameter":self.complieString(best_ind[i]),"TestValues":best_ind[i].fitness.values}, index=["0"])
163 | dft = dft.append([{"StrategyParameter":self.complieString(best_ind[i]),"TestValues":best_ind[i].fitness.values}], ignore_index=True)
164 | dft.to_excel(path,index=False,header=True)
165 | print("回测统计结果输出到" + path)
166 | def complieString(self,individual):
167 | setting = {}
168 | for item in range(len(individual)):
169 | setting.update(individual[item])
170 | return str(setting)
171 | if __name__ == "__main__":
172 | Strategy = BollChannelStrategy
173 | Symbollist ={
174 | "vtSymbol": 'rb0000',
175 | "StartDate": "20140601",
176 | "EndDate": "20141101",
177 | "Slippage": 1,
178 | "Size": 10,
179 | "Rate": 2 / 10000.0,
180 | "Capital": 10000
181 | }
182 | Parameterlist = {
183 | 'bollWindow': (10,50,1), #布林带窗口
184 | 'bollDev': (2,10,1), #布林带通道阈值
185 | 'slMultiplier':(3,6),
186 | 'barMins':[2,3,5,10,15,20],
187 | }
188 | parameterPackage = {
189 | "symbol":Symbollist,
190 | "parameterlist":Parameterlist,
191 | "strategy":Strategy
192 | }
193 | GE = GeneticOptimizeStrategy(Strategy,Symbollist,Parameterlist)
194 | GE.poptoExcel(GE.optimize())
195 | print("-- End of (successful) evolution --")
196 |
--------------------------------------------------------------------------------
/GeneticOptimizeforVNPYStrategy/GeneticOptimizev2.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 |
3 | """
4 | 展示如何执行参数优化。
5 | """
6 |
7 | from __future__ import division
8 | from __future__ import print_function
9 | from vnpy.trader.app.ctaStrategy.ctaBacktesting import BacktestingEngine, MINUTE_DB_NAME, OptimizationSetting
10 | from vnpy.trader.app.ctaStrategy.strategy.strategyBBIBoll2V import BBIBoll2VStrategy
11 | from vnpy.trader.app.ctaStrategy.strategy.strategyBollChannel import BollChannelStrategy
12 | import random
13 | import numpy as np
14 | from deap import creator, base, tools, algorithms
15 | import multiprocessing
16 | import time,datetime
17 |
18 |
19 | def parameter_generate():
20 | '''
21 | 根据设置的起始值,终止值和步进,随机生成待优化的策略参数
22 | '''
23 | parameter_list = []
24 | timerange = [2,3,5,10,15,20]
25 |
26 | p1 = random.randrange(10,55,1) #入场窗口
27 | p2 = random.randrange(1,15,1) #出场窗口
28 | p3 = random.randrange(20,55,1) #基于ATR窗口止损窗
29 | p4 = random.randrange(1,12,1) #出场窗口
30 | p5 = random.randrange(20,70,1) #基于ATR的动态调仓
31 | p6 = random.randrange(1,30,1)
32 | p7 = random.choice(timerange)
33 | p8 = random.randrange(0,5,1)
34 | p9 = random.randrange(2, 6, 1)
35 | p10 = random.randrange(0, 5, 1)
36 | p11 = random.randrange(0, 5, 1)
37 | p12 = random.randrange(0, 5, 1)
38 |
39 | parameter_list.append(p1)
40 | parameter_list.append(p2)
41 | parameter_list.append(p3)
42 | parameter_list.append(p4)
43 | parameter_list.append(p5/10.0)
44 | parameter_list.append(p6/1000.0)
45 | parameter_list.append(p7)
46 | parameter_list.append(p8)
47 | parameter_list.append(p9)
48 | parameter_list.append(p10)
49 | parameter_list.append(p11)
50 | parameter_list.append(p12)
51 |
52 | return parameter_list
53 |
54 | def object_func(strategy_avg_list):
55 | """
56 | 本函数为优化目标函数,根据随机生成的策略参数,运行回测后自动返回2个结果指标:收益回撤比和夏普比率
57 | """
58 |
59 | strategy_avg = strategy_avg_list[0]
60 | seed = strategy_avg[1]
61 |
62 | # import time, random
63 | # a1 = (2018, 5, 30, 0, 0, 0, 0, 0, 0) # 设置开始日期时间元组(1976-01-01 00:00:00)
64 | # a2 = (2019, 1, 15, 23, 59, 59, 0, 0, 0) # 设置结束日期时间元组(1990-12-31 23:59:59)
65 | #
66 | # start = time.mktime(a1) # 生成开始时间戳
67 | # end = time.mktime(a2) # 生成结束时间戳
68 | # random.seed(seed)
69 | # t1 = random.randint(start, end) # 在开始和结束时间戳中随机取出一个
70 | # t2 = (t1 + 10000000) # 将时间戳生成时间元组
71 | # date_touple1 = time.localtime(t1) # 将时间戳生成时间元组
72 | # date_touple2 = time.localtime(t2) # 将时间戳生成时间元组
73 | # date_s = time.strftime("%Y%m%d", date_touple1) # 将时间元组转成格式化字符串(1976-05-21)
74 | # date_e = time.strftime("%Y%m%d", date_touple2)
75 |
76 | # 创建回测引擎对象
77 | engine = BacktestingEngine()
78 | # 设置回测使用的数据
79 | engine.setBacktestingMode(engine.BAR_MODE) # 设置引擎的回测模式为K线
80 | engine.setDatabase("VnTrader_1Min_Db", 'rb1901') # 设置使用的历史数据库
81 | engine.setStartDate('20180401') # 设置回测用的数据起始日期
82 | engine.setEndDate('20181215') # 设置回测用的数据起始日期
83 |
84 | # 配置回测引擎参数
85 | engine.setSlippage(1)
86 | engine.setRate(1/100)
87 | engine.setSize(10)
88 | engine.setPriceTick(1)
89 | engine.setCapital(10000)
90 |
91 | setting = {
92 | 'bollWindow': strategy_avg[0], #布林带窗口
93 | 'bollDev': strategy_avg[1], #布林带通道阈值
94 | 'bbibollWindow':strategy_avg[2],
95 | 'bbibollDev':strategy_avg[3],
96 | 'slMultiplier':strategy_avg[4],
97 | 'profitRate':strategy_avg[5],
98 | 'barMins':strategy_avg[6],
99 | 'endsize':strategy_avg[7],
100 | 'CDate':strategy_avg[8],
101 | 'endplus':strategy_avg[9],
102 | 'barsize': strategy_avg[10],
103 | 'barplus': strategy_avg[11],
104 |
105 | } #ATR窗口
106 | engine.clearBacktestingResult()
107 | #加载策略
108 | engine.initStrategy(BBIBoll2VStrategy, setting)
109 | # 运行回测,返回指定的结果指标
110 | engine.runBacktesting() # 运行回测
111 | #逐日回测
112 | # engine.calculateDailyResult()
113 | # backresult1 = engine.calculateDailyStatistics()[1]
114 | backresult = engine.calculateBacktestingResult()
115 | try:
116 | capital = round(backresult['capital'], 3) #收益回撤比
117 | # profitLossRatio = round(backresult['profitLossRatio'],3) #夏普比率
118 | sharpeRatio= round(backresult['sharpeRatio'],3)
119 | winningRate = round(backresult['winningRate'], 3)
120 | totalResult = round(backresult['totalResult'], 3)
121 | except:
122 | print("Error:")
123 | annualizedReturn = 0
124 | sharpeRatio = 0
125 | profitLossRatio = 0 #收益回撤比
126 | averageWinning= 0 #夏普比率 #夏普比率
127 | capital= 0
128 | totalResult = 0
129 |
130 |
131 | return capital, sharpeRatio, winningRate
132 |
133 | # 设置优化方向:最大化收益回撤比,最大化夏普比率
134 | creator.create("FitnessMulti", base.Fitness, weights=(1.0, 1.0)) # 1.0 求最大值;-1.0 求最小值
135 | creator.create("Individual", list, fitness=creator.FitnessMulti)
136 |
137 | def mutArrayGroup(individual,parameterlist, indpb):
138 | size = len(individual)
139 | paralist = parameterlist()
140 | for i in xrange(size):
141 | if random.random() < indpb:
142 | individual[i] = paralist[i]
143 |
144 | return individual,
145 |
146 | def optimize():
147 | toolbox = base.Toolbox() # Toolbox是deap库内置的工具箱,里面包含遗传算法中所用到的各种函数
148 | pool = multiprocessing.Pool(processes=(multiprocessing.cpu_count()-1))
149 | toolbox.register("map", pool.map)
150 | # toolbox.register("map", futures.map)
151 | # 初始化
152 | toolbox.register("individual", tools.initIterate, creator.Individual,
153 | parameter_generate) # 注册个体:随机生成的策略参数parameter_generate()
154 | toolbox.register("population", tools.initRepeat, list,
155 | toolbox.individual) # 注册种群:个体形成种群
156 | toolbox.register("mate", tools.cxTwoPoint) # 注册交叉:两点交叉
157 | toolbox.register("mutate", mutArrayGroup, parameterlist = parameter_generate, indpb=0.6) # 注册变异:随机生成一定区间内的整数
158 | toolbox.register("evaluate", object_func) # 注册评估:优化目标函数object_func()
159 | toolbox.register("select", tools.selNSGA2) # 注册选择:NSGA-II(带精英策略的非支配排序的遗传算法)
160 |
161 | # 遗传算法参数设置
162 | MU = 100 # 设置每一代选择的个体数
163 | LAMBDA = 100 # 设置每一代产生的子女数
164 | pop = toolbox.population(250) # 设置族群里面的个体数量
165 | CXPB, MUTPB, NGEN = 0.5, 0.3, 10 # 分别为种群内部个体的交叉概率、变异概率、产生种群代数
166 | hof = tools.ParetoFront() # 解的集合:帕累托前沿(非占优最优集)
167 |
168 | # 解的集合的描述统计信息
169 | # 集合内平均值,标准差,最小值,最大值可以体现集合的收敛程度
170 | # 收敛程度低可以增加算法的迭代次数
171 | stats = tools.Statistics(lambda ind: ind.fitness.values)
172 | np.set_printoptions(suppress=True) # 对numpy默认输出的科学计数法转换
173 | stats.register("mean", np.mean, axis=0) # 统计目标优化函数结果的平均值
174 | stats.register("std", np.std, axis=0) # 统计目标优化函数结果的标准差
175 | stats.register("min", np.min, axis=0) # 统计目标优化函数结果的最小值
176 | stats.register("max", np.max, axis=0) # 统计目标优化函数结果的最大值
177 | # 运行算法
178 | algorithms.eaMuPlusLambdv2(pop, toolbox, MU, LAMBDA, CXPB, MUTPB, NGEN, stats,
179 | halloffame=hof,verbose=True) # esMuPlusLambda是一种基于(μ+λ)选择策略的多目标优化分段遗传算法
180 |
181 | return pop
182 |
183 | if __name__ == "__main__":
184 | pop = optimize()
185 |
186 | print("-- End of (successful) evolution --")
187 | best_ind = tools.selBest(pop, 1000)
188 | for i in best_ind:
189 | print("best_ind",i)
190 | print("best_value",i.fitness.values)
191 | #输出到文本
192 | filepath = "C:\Users\shui0\OneDrive\Documents\Optimization\\BBIBoll2V_1Strategy"+ time.strftime("%m%d_%H_%M") +".txt"
193 |
194 | f = open(filepath,'a')
195 | for i in best_ind:
196 | f.write("best_ind:"+str(i)+'\t')
197 | f.write("best_value:"+str(i.fitness.values)+'\n')
198 |
199 | # strategy_avg1 = parameter_generate()
200 | #
201 | # print(strategy_avg1)
202 | # return1, return2 =object_func( strategy_avg1)
203 | # print(return1, return2)
204 |
--------------------------------------------------------------------------------
/GeneticOptimizeforVNPYStrategy/GeneticTrain.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | # -*- coding:utf-8 _*-
3 | """
4 | https://deap.readthedocs.io/en/master/overview.html
5 | """
6 |
7 | # Types
8 | from deap import base, creator
9 | creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
10 | creator.create("Individual", list, fitness=creator.FitnessMin)
11 |
12 | # Initialization
13 | import random
14 | from deap import tools
15 |
16 | IND_SIZE = 1000000
17 |
18 | toolbox = base.Toolbox()
19 | toolbox.register("attribute", random.random)
20 | toolbox.register("individual", tools.initRepeat, creator.Individual,
21 | toolbox.attribute, n=IND_SIZE)
22 | toolbox.register("population", tools.initRepeat, list, toolbox.individual)
23 | import multiprocessing
24 |
25 | pool = multiprocessing.Pool()
26 | toolbox.register("map", pool.map)
27 |
28 | # Operators
29 | def evaluate(individual):
30 | return sum(individual),
31 |
32 | toolbox.register("mate", tools.cxTwoPoint)
33 | toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.1)
34 | toolbox.register("select", tools.selTournament, tournsize=3)
35 | toolbox.register("evaluate", evaluate)
36 | import multiprocessing
37 |
38 |
39 | # Algorithms
40 | def main():
41 | pop = toolbox.population(n=50)
42 | CXPB, MUTPB, NGEN = 0.5, 0.2, 40
43 |
44 | # Evaluate the entire population
45 | fitnesses = toolbox.map(toolbox.evaluate, pop)
46 | for ind, fit in zip(pop, fitnesses):
47 | ind.fitness.values = fit
48 |
49 | for g in range(NGEN):
50 | # Select the next generation individuals
51 | offspring = toolbox.select(pop, len(pop))
52 | # Clone the selected individuals
53 | offspring = toolbox.map(toolbox.clone, offspring)
54 |
55 | # Apply crossover and mutation on the offspring
56 | for child1, child2 in zip(offspring[::2], offspring[1::2]):
57 | if random.random() < CXPB:
58 | toolbox.mate(child1, child2)
59 | del child1.fitness.values
60 | del child2.fitness.values
61 |
62 | for mutant in offspring:
63 | if random.random() < MUTPB:
64 | toolbox.mutate(mutant)
65 | del mutant.fitness.values
66 |
67 |
68 | # Evaluate the individuals with an invalid fitness
69 | invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
70 | fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
71 | for ind, fit in zip(invalid_ind, fitnesses):
72 | ind.fitness.values = fit
73 |
74 | # The population is entirely replaced by the offspring
75 | pop[:] = offspring
76 |
77 | return pop
78 |
79 |
80 | if __name__ == "__main__":
81 | # t1 = time.clock()
82 | pop = main()
83 | best_ind = tools.selBest(pop, 3)
84 | for i in best_ind:
85 | print("best_ind",i)
86 | print("best_value",i.fitness.values)
87 |
88 | # t2 = time.clock()
89 |
90 | print best_ind, best_ind.fitness.values
--------------------------------------------------------------------------------
/GeneticOptimizeforVNPYStrategy/Knapsack.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 |
4 |
5 | import numpy
6 |
7 |
8 |
9 | from deap import algorithms
10 |
11 | from deap import base
12 |
13 | from deap import creator
14 |
15 | from deap import tools
16 |
17 |
18 |
19 | IND_INIT_SIZE = 5
20 |
21 | MAX_ITEM = 50
22 |
23 | MAX_WEIGHT = 50
24 |
25 | NBR_ITEMS = 20
26 |
27 |
28 |
29 | # To assure reproductibility, the RNG seed is set prior to the items
30 |
31 | # dict initialization. It is also seeded in main().
32 |
33 | random.seed(64)
34 |
35 |
36 |
37 | # Create the item dictionary: item name is an integer, and value is
38 |
39 | # a (weight, value) 2-uple.
40 |
41 | items = {}
42 |
43 | # Create random items and store them in the items' dictionary.
44 |
45 | for i in range(NBR_ITEMS):
46 |
47 | items[i] = (random.randint(1, 10), random.uniform(0, 100))
48 |
49 |
50 |
51 | creator.create("Fitness", base.Fitness, weights=(-1.0, 1.0))
52 |
53 | creator.create("Individual", set, fitness=creator.Fitness)
54 |
55 |
56 |
57 | toolbox = base.Toolbox()
58 |
59 |
60 |
61 | # Attribute generator
62 |
63 | toolbox.register("attr_item", random.randrange, NBR_ITEMS)
64 |
65 |
66 |
67 | # Structure initializers
68 |
69 | toolbox.register("individual", tools.initRepeat, creator.Individual,
70 |
71 | toolbox.attr_item, IND_INIT_SIZE)
72 |
73 | toolbox.register("population", tools.initRepeat, list, toolbox.individual)
74 |
75 |
76 |
77 | def evalKnapsack(individual):
78 |
79 | weight = 0.0
80 |
81 | value = 0.0
82 |
83 | for item in individual:
84 |
85 | weight += items[item][0]
86 |
87 | value += items[item][1]
88 |
89 | if len(individual) > MAX_ITEM or weight > MAX_WEIGHT:
90 |
91 | return 10000, 0 # Ensure overweighted bags are dominated
92 |
93 | return weight, value
94 |
95 |
96 |
97 | def cxSet(ind1, ind2):
98 |
99 | """Apply a crossover operation on input sets. The first child is the
100 |
101 | intersection of the two sets, the second child is the difference of the
102 |
103 | two sets.
104 |
105 | """
106 |
107 | temp = set(ind1) # Used in order to keep type
108 |
109 | ind1 &= ind2 # Intersection (inplace)
110 |
111 | ind2 ^= temp # Symmetric Difference (inplace)
112 |
113 | return ind1, ind2
114 |
115 |
116 |
117 | def mutSet(individual):
118 |
119 | """Mutation that pops or add an element."""
120 |
121 | if random.random() < 0.5:
122 |
123 | if len(individual) > 0: # We cannot pop from an empty set
124 |
125 | individual.remove(random.choice(sorted(tuple(individual))))
126 |
127 | else:
128 |
129 | individual.add(random.randrange(NBR_ITEMS))
130 |
131 | return individual,
132 |
133 |
134 |
135 | toolbox.register("evaluate", evalKnapsack)
136 |
137 | toolbox.register("mate", cxSet)
138 |
139 | toolbox.register("mutate", mutSet)
140 |
141 | toolbox.register("select", tools.selNSGA2)
142 |
143 |
144 |
145 | def main():
146 |
147 | random.seed(64)
148 |
149 | NGEN = 50
150 |
151 | MU = 50
152 |
153 | LAMBDA = 100
154 |
155 | CXPB = 0.7
156 |
157 | MUTPB = 0.2
158 |
159 |
160 |
161 | pop = toolbox.population(n=MU)
162 |
163 | hof = tools.ParetoFront()
164 |
165 | stats = tools.Statistics(lambda ind: ind.fitness.values)
166 |
167 | stats.register("avg", numpy.mean, axis=0)
168 |
169 | stats.register("std", numpy.std, axis=0)
170 |
171 | stats.register("min", numpy.min, axis=0)
172 |
173 | stats.register("max", numpy.max, axis=0)
174 |
175 |
176 |
177 | algorithms.eaMuPlusLambda(pop, toolbox, MU, LAMBDA, CXPB, MUTPB, NGEN, stats,
178 |
179 | halloffame=hof)
180 |
181 |
182 |
183 | return pop, stats, hof
184 |
185 |
186 |
187 | if __name__ == "__main__":
188 |
189 | pop = main()[0]
190 | best_ind = tools.selBest(pop,-1)
191 | for i in best_ind:
192 | print("best_ind",i)
193 | print("best_value",i.fitness.values)
--------------------------------------------------------------------------------
/GeneticOptimizeforVNPYStrategy/MultiTest.py:
--------------------------------------------------------------------------------
1 | from multiprocessing import Pool
2 | import time
3 |
4 |
5 | def task(msg):
6 | print 'hello, %s' % msg
7 | time.sleep(1)
8 | return 'msg: %s' % msg
9 |
10 |
11 |
12 | pool = Pool(processes=4)
13 |
14 | results = []
15 | msgs = [x for x in range(2)]
16 |
17 | results = pool.map(task, msgs)
18 |
--------------------------------------------------------------------------------
/GeneticOptimizeforVNPYStrategy/checkMutiple.py:
--------------------------------------------------------------------------------
1 | import multiprocessing
2 |
3 |
4 | def func(y,x):
5 | return y * x
6 | class someClass(object):
7 | def __init__(self,func):
8 | self.f = func
9 |
10 | def go(self):
11 | pool = multiprocessing.Pool(processes=4)
12 | lista = [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
13 | print pool.map(self.f, lista, range(10))
14 |
15 | if __name__== '__main__' :
16 | c = someClass(func)
17 | c.go()
--------------------------------------------------------------------------------
/GeneticOptimizeforVNPYStrategy/file.txt:
--------------------------------------------------------------------------------
1 | hello world!
--------------------------------------------------------------------------------
/JDDataService/JQDataload.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 |
3 | from __future__ import print_function
4 | import sys
5 | import json
6 | from datetime import datetime,date
7 | from time import time, sleep
8 |
9 | from pymongo import MongoClient, ASCENDING
10 | import pandas as pd
11 |
12 | from vnpy.trader.vtObject import VtBarData, VtTickData
13 | from vnpy.trader.app.ctaStrategy.ctaBase import (MINUTE_DB_NAME,
14 | DAILY_DB_NAME,
15 | TICK_DB_NAME)
16 |
17 | import jqdatasdk as jq
18 |
19 | # 加载配置
20 | config = open('config.json')
21 | setting = json.load(config)
22 |
23 | mc = MongoClient() # Mongo连接
24 | dbMinute = mc[MINUTE_DB_NAME] # 数据库
25 | # dbDaily = mc[DAILY_DB_NAME]
26 | # dbTick = mc[TICK_DB_NAME]
27 |
28 | USERNAME = setting['Username']
29 | PASSWORD = setting['Password']
30 | jq.auth(USERNAME, PASSWORD)
31 |
32 | FIELDS = ['open', 'high', 'low', 'close', 'volume']
33 |
34 |
35 | # ----------------------------------------------------------------------
36 | def generateVtBar(row, symbol):
37 | """生成K线"""
38 | bar = VtBarData()
39 |
40 | bar.symbol = symbol
41 | bar.vtSymbol = symbol
42 | bar.open = row['open']
43 | bar.high = row['high']
44 | bar.low = row['low']
45 | bar.close = row['close']
46 | bar.volume = row['volume']
47 | bardatetime = row.name
48 | bar.date = bardatetime.strftime("%Y%m%d")
49 |
50 | bar.time = bardatetime.strftime("%H%M%S")
51 | # 将bar的时间改成提前一分钟
52 | hour = bar.time[0:2]
53 | minute = bar.time[2:4]
54 | sec = bar.time[4:6]
55 | if minute == "00":
56 | minute = "59"
57 |
58 | h = int(hour)
59 | if h == 0:
60 | h = 24
61 |
62 | hour = str(h - 1).rjust(2, '0')
63 | else:
64 | minute = str(int(minute) - 1).rjust(2, '0')
65 | bar.time = hour + minute + sec
66 |
67 | bar.datetime = datetime.strptime(' '.join([bar.date, bar.time]), '%Y%m%d %H%M%S')
68 | return bar
69 |
70 |
71 | # ----------------------------------------------------------------------
72 | def jqdownloadMinuteBarBySymbol(symbol,startDate,endDate):
73 | """下载某一合约的分钟线数据"""
74 | start = time()
75 |
76 | cl = dbMinute[symbol]
77 | cl.ensure_index([('datetime', ASCENDING)], unique=True) # 添加索引
78 |
79 | df = jq.get_price(setting[symbol],start_date = startDate,end_date = endDate, frequency='1m', fields=FIELDS,skip_paused = True)
80 | for ix, row in df.iterrows():
81 | bar = generateVtBar(row, symbol)
82 | d = bar.__dict__
83 | flt = {'datetime': bar.datetime}
84 | cl.replace_one(flt, d, True)
85 |
86 | end = time()
87 | cost = (end - start) * 1000
88 |
89 | print(u'合约%s的分钟K线数据下载完成%s - %s,耗时%s毫秒' % (symbol, df.index[0], df.index[-1], cost))
90 | print(jq.get_query_count())
91 |
92 | def jqdownloadMappingExcel(exportpath = "C:\Project\\"):
93 | getfuture = jq.get_all_securities(types=['futures'], date=None)
94 | getfuture.to_excel(
95 | exportpath + "Mapping" + str(date.today()) + ".xls",
96 | index=True, header=True)
97 | if __name__ == '__main__':
98 | jqdownloadMappingExcel()
99 | #下载主力合约
100 | jqdownloadMinuteBarBySymbol('rb0000', '2018-1-1', '2019-5-1')
101 | #下载单个品种
102 | jqdownloadMinuteBarBySymbol('zn1807','2018-6-2','2018-6-8')
103 |
--------------------------------------------------------------------------------
/JDDataService/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "Username": "请在聚宽申请",
3 | "Password": "请在聚宽申请",
4 | "rb1910":"RB1910.XSGE",
5 | "zn1807": "ZN1807.XSGE",
6 | "rb0000": "RB9999.XSGE"
7 | }
--------------------------------------------------------------------------------
/LSMT/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | // Use IntelliSense to learn about possible attributes.
3 | // Hover to view descriptions of existing attributes.
4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5 | "version": "0.2.0",
6 | "configurations": [
7 | {
8 | "name": "Python: Current File (Integrated Terminal)",
9 | "type": "python",
10 | "request": "launch",
11 | "program": "${file}",
12 | "console": "integratedTerminal"
13 | },
14 | {
15 | "name": "Python: Remote Attach",
16 | "type": "python",
17 | "request": "attach",
18 | "port": 5678,
19 | "host": "localhost",
20 | "pathMappings": [
21 | {
22 | "localRoot": "${workspaceFolder}",
23 | "remoteRoot": "."
24 | }
25 | ]
26 | },
27 | {
28 | "name": "Python: Module",
29 | "type": "python",
30 | "request": "launch",
31 | "module": "enter-your-module-name-here",
32 | "console": "integratedTerminal"
33 | },
34 | {
35 | "name": "Python: Django",
36 | "type": "python",
37 | "request": "launch",
38 | "program": "${workspaceFolder}/manage.py",
39 | "console": "integratedTerminal",
40 | "args": [
41 | "runserver",
42 | "--noreload",
43 | "--nothreading"
44 | ],
45 | "django": true
46 | },
47 | {
48 | "name": "Python: Flask",
49 | "type": "python",
50 | "request": "launch",
51 | "module": "flask",
52 | "env": {
53 | "FLASK_APP": "app.py"
54 | },
55 | "args": [
56 | "run",
57 | "--no-debugger",
58 | "--no-reload"
59 | ],
60 | "jinja": true
61 | },
62 | {
63 | "name": "Python: Current File (External Terminal)",
64 | "type": "python",
65 | "request": "launch",
66 | "program": "${file}",
67 | "console": "externalTerminal"
68 | }
69 | ]
70 | }
--------------------------------------------------------------------------------
/LSMT/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "python.pythonPath": "C:\\ProgramData\\Anaconda2\\python.exe"
3 | }
--------------------------------------------------------------------------------
/LSMT/LSTM.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 | from pymongo import MongoClient, ASCENDING
3 | import pandas as pd
4 | import numpy as np
5 | from datetime import datetime
6 | import talib
7 | import matplotlib.pyplot as plt
8 | import scipy.stats as scs
9 |
10 | class DataAnalyzer(object):
11 | def __init__(self, exportpath="C:\Project\\", datformat=['datetime', 'high', 'low', 'open', 'close','volume']):
12 | self.mongohost = None
13 | self.mongoport = None
14 | self.db = None
15 | self.collection = None
16 | self.df = pd.DataFrame()
17 | self.exportpath = exportpath
18 | self.datformat = datformat
19 |
20 | def db2df(self, db, collection, start, end, mongohost="localhost", mongoport=27017, export2csv=False):
21 | """读取MongoDB数据库行情记录,输出到Dataframe中"""
22 | self.mongohost = mongohost
23 | self.mongoport = mongoport
24 | self.db = db
25 | self.collection = collection
26 | dbClient = MongoClient(self.mongohost, self.mongoport, connectTimeoutMS=500)
27 | db = dbClient[self.db]
28 | cursor = db[self.collection].find({'datetime':{'$gte':start, '$lt':end}}).sort("datetime",ASCENDING)
29 | self.df = pd.DataFrame(list(cursor))
30 | self.df = self.df[self.datformat]
31 | self.df = self.df.reset_index(drop=True)
32 | path = self.exportpath + self.collection + ".csv"
33 | if export2csv == True:
34 | self.df.to_csv(path, index=True, header=True)
35 | return self.df
36 |
37 | def csv2df(self, csvpath, dataname="csv_data", export2csv=False):
38 | """读取csv行情数据,输入到Dataframe中"""
39 | csv_df = pd.read_csv(csvpath)
40 | self.df = csv_df[self.datformat]
41 | self.df["datetime"] = pd.to_datetime(self.df['datetime'])
42 | self.df = self.df.reset_index(drop=True)
43 | path = self.exportpath + dataname + ".csv"
44 | if export2csv == True:
45 | self.df.to_csv(path, index=True, header=True)
46 | return self.df
47 |
48 | def df2Barmin(self, inputdf, barmins, crossmin=1, export2csv=False):
49 | """输入分钟k线dataframe数据,合并多多种数据,例如三分钟/5分钟等,如果开始时间是9点1分,crossmin = 0;如果是9点0分,crossmin为1"""
50 | dfbarmin = pd.DataFrame()
51 | highBarMin = 0
52 | lowBarMin = 0
53 | openBarMin = 0
54 | volumeBarmin = 0
55 | datetime = 0
56 | for i in range(0, len(inputdf) - 1):
57 | bar = inputdf.iloc[i, :].to_dict()
58 | if openBarMin == 0:
59 | openBarmin = bar["open"]
60 | if highBarMin == 0:
61 | highBarMin = bar["high"]
62 | else:
63 | highBarMin = max(bar["high"], highBarMin)
64 |
65 | if lowBarMin == 0:
66 | lowBarMin = bar["low"]
67 | else:
68 | lowBarMin = min(bar["low"], lowBarMin)
69 | closeBarMin = bar["close"]
70 | datetime = bar["datetime"]
71 | volumeBarmin += int(bar["volume"])
72 | # X分钟已经走完
73 | if not (bar["datetime"].minute + crossmin) % barmins: # 可以用X整除
74 | # 生成上一X分钟K线的时间戳
75 | barMin = {'datetime': datetime, 'high': highBarMin, 'low': lowBarMin, 'open': openBarmin,
76 | 'close': closeBarMin, 'volume' : volumeBarmin}
77 | dfbarmin = dfbarmin.append(barMin, ignore_index=True)
78 | highBarMin = 0
79 | lowBarMin = 0
80 | openBarMin = 0
81 | volumeBarmin = 0
82 | if export2csv == True:
83 | dfbarmin.to_csv(self.exportpath + "bar" + str(barmins)+ str(self.collection) + ".csv", index=True, header=True)
84 | return dfbarmin
85 |
86 |
87 |
88 | #--------------------------------------------------------------
89 | def Percentage(self, inputdf, export2csv=True):
90 | """ 计算 Percentage """
91 | dfPercentage = inputdf
92 | for i in range(1, len(inputdf)):
93 |
94 | if dfPercentage.loc[ i - 1, "close"] == 0.0:
95 | percentage = 0
96 | else:
97 | percentage = ((dfPercentage.loc[i, "close"] - dfPercentage.loc[i - 1, "close"]) / dfPercentage.loc[ i - 1, "close"]) * 100.0
98 | dfPercentage.loc[i, "Perentage"] = percentage
99 |
100 | dfPercentage = dfPercentage.fillna(0)
101 | dfPercentage = dfPercentage.replace(np.inf, 0)
102 | if export2csv == True:
103 | dfPercentage.to_csv(self.exportpath + "Percentage_" + str(self.collection) + ".csv", index=True, header=True)
104 | return dfPercentage
105 |
106 | def resultValuate(self,inputdf, nextBar, export2csv=True):
107 | summayKey = ["Percentage","TestValues"]
108 | dft = pd.DataFrame(columns=summayKey)
109 |
110 |
111 | def addResultBar(self, inputdf, export2csv = False):
112 | dfaddResultBar = inputdf
113 | ######cci在(100 - 200),(200 -300)后的第2根,第4根,第6根的价格走势######################
114 | dfaddResultBar["next2BarClose"] = None
115 | dfaddResultBar["next4BarClose"] = None
116 | dfaddResultBar["next6BarClose"] = None
117 | dfaddResultBar["next5BarCloseMakrup"] = None
118 | for i in range(1, len(dfaddResultBar) - 6):
119 | dfaddResultBar.loc[i, "next2BarPercentage"] = dfaddResultBar.loc[i + 2, "close"] - dfaddResultBar.loc[i, "close"]
120 | dfaddResultBar.loc[i, "next4BarPercentage"] = dfaddResultBar.loc[i + 4, "close"] - dfaddResultBar.loc[i, "close"]
121 | dfaddResultBar.loc[i, "next6BarPercentage"] = dfaddResultBar.loc[i + 6, "close"] - dfaddResultBar.loc[i, "close"]
122 | if dfaddResultBar.loc[i, "close"] > dfaddResultBar.loc[i + 2, "close"]:
123 | dfaddResultBar.loc[i, "next2BarClose"] = -1
124 | elif dfaddResultBar.loc[i, "close"] < dfaddResultBar.loc[i + 2, "close"]:
125 | dfaddResultBar.loc[i, "next2BarClose"] = 1
126 |
127 | if dfaddResultBar.loc[i, "close"] > dfaddResultBar.loc[i + 4, "close"]:
128 | dfaddResultBar.loc[i, "next4BarClose"] = -1
129 | elif dfaddResultBar.loc[i, "close"] < dfaddResultBar.loc[i + 4, "close"]:
130 | dfaddResultBar.loc[i, "next4BarClose"] = 1
131 |
132 | if dfaddResultBar.loc[i, "close"] > dfaddResultBar.loc[i + 6, "close"]:
133 | dfaddResultBar.loc[i, "next6BarClose"] = -1
134 | elif dfaddResultBar.loc[i, "close"] < dfaddResultBar.loc[i + 6, "close"]:
135 | dfaddResultBar.loc[i, "next6BarClose"] = 1
136 |
137 | dfaddResultBar = dfaddResultBar.fillna(0)
138 | if export2csv == True:
139 | dfaddResultBar.to_csv(self.exportpath + "addResultBar" + str(self.collection) + ".csv", index=True, header=True)
140 | return dfaddResultBar
141 |
142 |
143 | def PrecentAnalysis(inputdf):
144 | dfPercentage = inputdf
145 | #######################################分析分布########################################
146 | plt.figure(figsize=(10,3))
147 | plt.hist(dfPercentage['Perentage'],bins=300,histtype='bar',align='mid',orientation='vertical',color='r')
148 | plt.show()
149 |
150 |
151 |
152 | for Perentagekey in range(1,5):
153 | lpHigh = np.percentile(dfPercentage['Perentage'], 100-Perentagekey)
154 | lpLow = np.percentile(dfPercentage['Perentage'], Perentagekey)
155 |
156 | de_anaylsisH = dfPercentage.loc[(dfPercentage["Perentage"]>= lpHigh)]
157 | HCount = de_anaylsisH['Perentage'].count()
158 | de_anaylsisL = dfPercentage.loc[(dfPercentage["Perentage"] <= lpLow)]
159 | LCount = de_anaylsisL['Perentage'].count()
160 |
161 |
162 | percebtage = de_anaylsisH[de_anaylsisH["next2BarClose"]>0]["next2BarClose"].count()*100.000/HCount
163 | de_anaylsisHsum = de_anaylsisH["next2BarPercentage"].sum()
164 | de_anaylsisLsum = de_anaylsisL["next2BarPercentage"].sum()
165 | print('Precent 大于 %s, %s时候,k线数量为 %s,第二根K线结束价格上涨概率为 %s%%;' %(lpHigh,100-Perentagekey,HCount , percebtage))
166 | print('和值 %s' %(de_anaylsisHsum))
167 |
168 | de_anaylsisL = dfPercentage.loc[(dfPercentage["Perentage"]<= lpLow)]
169 | percebtage = de_anaylsisL[de_anaylsisL["next2BarClose"]<0]["next2BarClose"].count()*100.000/LCount
170 | print('Precent 小于于 %s, %s时候,k线数量为 %s, 第二根K线结束价格下跌概率为 %s%%' %(lpLow,Perentagekey,LCount, percebtage))
171 | print('和值 %s' %(de_anaylsisLsum))
172 |
173 | de_anaylsisHsum = de_anaylsisH["next4BarPercentage"].sum()
174 | de_anaylsisLsum = de_anaylsisL["next4BarPercentage"].sum()
175 | percebtage = de_anaylsisH[de_anaylsisH["next4BarClose"] > 0]["next2BarClose"].count() * 100.000 / HCount
176 | print('Precent 大于 %s, %s时候,第四根K线结束价格上涨概率为 %s%%' % (lpHigh, 100 - Perentagekey, percebtage))
177 | # print('和值 %s' % (de_anaylsisHsum))
178 | percebtage = de_anaylsisL[de_anaylsisL["next4BarClose"] < 0]["next2BarClose"].count() * 100.000 / LCount
179 | print('Precent 小于于 %s, %s时候,第四根K线结束价格下跌概率为 %s%%' % (lpLow, Perentagekey, percebtage))
180 | print('和值 %s' % (de_anaylsisLsum))
181 |
182 | de_anaylsisHsum = de_anaylsisH["next6BarPercentage"].sum()
183 | de_anaylsisLsum = de_anaylsisL["next6BarPercentage"].sum()
184 | percebtage = de_anaylsisH[de_anaylsisH["next6BarClose"] > 0]["next2BarClose"].count() * 100.000 / HCount
185 | print('Precent 大于 %s, %s时候,第六根K线结束价格上涨概率为 %s%%' % (lpHigh, 100 - Perentagekey, percebtage))
186 | print('和值 %s' % (de_anaylsisHsum))
187 | percebtage = de_anaylsisL[de_anaylsisL["next6BarClose"] < 0]["next2BarClose"].count() * 100.000 /LCount
188 | print('Precent 小于于 %s, %s时候,第六根K线结束价格下跌概率为 %s%%' % (lpLow, Perentagekey, percebtage))
189 | print('和值 %s' % (de_anaylsisLsum))
190 |
191 |
192 |
193 | if __name__ == '__main__':
194 | DA = DataAnalyzer()
195 | #数据库导入
196 | start = datetime.strptime("20180901", '%Y%m%d')
197 | end = datetime.today()
198 | df = DA.db2df(db="VnTrader_1Min_Db", collection="m1905", start = start, end = end)
199 | #csv导入
200 | # df = DA.csv2df("rb1905.csv")
201 | df10min = DA.df2Barmin(df,10)
202 | dfPercentage = DA.Percentage(df10min)
203 | dfPercentage = DA.addResultBar(dfPercentage)
204 | PrecentAnalysis(dfPercentage)
205 |
206 |
--------------------------------------------------------------------------------
/LSMT/Stat1.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 | import pandas as pd
3 | import numpy as np
4 | import matplotlib.pyplot as plt
5 | from sklearn.preprocessing import MinMaxScaler
6 |
7 | df = pd.read_csv("C:\\Project\\rb1905.csv")
8 | df["datetime"] = pd.to_datetime(df['datetime'])
9 | df = df.reset_index(drop=True)
10 | # print(df)
11 |
12 | # plt.figure(figsize = (18,9))
13 | # plt.plot(range(df.shape[0]),(df['low']+df['high'])/2.0)
14 | # plt.xticks(range(0,df.shape[0],500),df['datetime'].loc[::500],rotation=45)
15 | # plt.xlabel('datetime',fontsize=18)
16 | # plt.ylabel('Mid Price',fontsize=18)
17 | # plt.show()
18 |
19 | high_prices = df.loc[:,'high'].values
20 | low_prices = df.loc[:,'low'].values
21 | mid_prices = (high_prices+low_prices)/2.
22 |
23 | train_data = mid_prices[:55000]
24 | test_data = mid_prices[55000:]
25 |
26 | scaler = MinMaxScaler()
27 | train_data = train_data.reshape(-1,1)
28 | test_data = test_data.reshape(-1,1)
29 | print(test_data)
30 |
31 | smoothing_window_size = 2500
32 | for di in range(0,50000,smoothing_window_size):
33 | print(di)
34 | scaler.fit(train_data[di:di+smoothing_window_size,:])
35 | train_data[di:di+smoothing_window_size,:] = scaler.transform(train_data[di:di+smoothing_window_size,:])
36 |
37 | # You normalize the last bit of remaining data
38 | scaler.fit(train_data[di+smoothing_window_size:,:])
39 | train_data[di+smoothing_window_size:,:] = scaler.transform(train_data[di+smoothing_window_size:,:])
40 |
41 | train_data = train_data.reshape(-1)
42 | test_data = scaler.transform(test_data).reshape(-1)
43 |
44 | EMA = 0.0
45 | gamma = 0.1
46 | for ti in range(52500):
47 | EMA = gamma*train_data[ti] + (1-gamma)*EMA
48 | train_data[ti] = EMA
49 |
50 | all_mid_data = np.concatenate([train_data,test_data],axis=0)
51 |
52 | window_size = 100
53 | N = train_data.size
54 |
55 | run_avg_predictions = []
56 | run_avg_x = []
57 |
58 | mse_errors = []
59 |
60 | running_mean = 0.0
61 | run_avg_predictions.append(running_mean)
62 |
63 | decay = 0.5
64 |
65 | for pred_idx in range(1,N):
66 |
67 | running_mean = running_mean*decay + (1.0-decay)*train_data[pred_idx-1]
68 | run_avg_predictions.append(running_mean)
69 | mse_errors.append((run_avg_predictions[-1]-train_data[pred_idx])**2)
70 | run_avg_x.append(pred_idx)
71 |
72 | print('MSE error for EMA averaging: %.5f'%(0.5*np.mean(mse_errors)))
--------------------------------------------------------------------------------
/MarketDataAnalyzer.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 | from pymongo import MongoClient, ASCENDING
3 | import pandas as pd
4 | import numpy as np
5 | from datetime import datetime
6 | import talib
7 | import matplotlib.pyplot as plt
8 | import scipy.stats as scs
9 |
10 |
11 | class DataAnalyzer(object):
12 | def __init__(self, exportpath="C:\Project\\", datformat=['datetime', 'high', 'low', 'open', 'close', 'volume']):
13 | self.mongohost = None
14 | self.mongoport = None
15 | self.db = None
16 | self.collection = None
17 | self.df = pd.DataFrame()
18 | self.exportpath = exportpath
19 | self.datformat = datformat
20 |
21 | def db2df(self, db, collection, start, end, mongohost="localhost", mongoport=27017, export2csv=False):
22 | """读取MongoDB数据库行情记录,输出到Dataframe中"""
23 | self.mongohost = mongohost
24 | self.mongoport = mongoport
25 | self.db = db
26 | self.collection = collection
27 | dbClient = MongoClient(self.mongohost, self.mongoport, connectTimeoutMS=500)
28 | db = dbClient[self.db]
29 | cursor = db[self.collection].find({'datetime': {'$gte': start, '$lt': end}}).sort("datetime", ASCENDING)
30 | self.df = pd.DataFrame(list(cursor))
31 | self.df = self.df[self.datformat]
32 | self.df = self.df.reset_index(drop=True)
33 | path = self.exportpath + self.collection + ".csv"
34 | if export2csv == True:
35 | self.df.to_csv(path, index=True, header=True)
36 | return self.df
37 |
38 | def csv2df(self, csvpath, dataname="csv_data", export2csv=False):
39 | """读取csv行情数据,输入到Dataframe中"""
40 | csv_df = pd.read_csv(csvpath)
41 | self.df = csv_df[self.datformat]
42 | self.df["datetime"] = pd.to_datetime(self.df['datetime'])
43 | # self.df["high"] = self.df['high'].astype(float)
44 | # self.df["low"] = self.df['low'].astype(float)
45 | # self.df["open"] = self.df['open'].astype(float)
46 | # self.df["close"] = self.df['close'].astype(float)
47 | # self.df["volume"] = self.df['volume'].astype(int)
48 | self.df = self.df.reset_index(drop=True)
49 | path = self.exportpath + dataname + ".csv"
50 | if export2csv == True:
51 | self.df.to_csv(path, index=True, header=True)
52 | return self.df
53 |
54 | def df2Barmin(self, inputdf, barmins, crossmin=1, export2csv=False):
55 | """输入分钟k线dataframe数据,合并多多种数据,例如三分钟/5分钟等,如果开始时间是9点1分,crossmin = 0;如果是9点0分,crossmin为1"""
56 | dfbarmin = pd.DataFrame()
57 | highBarMin = 0
58 | lowBarMin = 0
59 | openBarMin = 0
60 | volumeBarmin = 0
61 | datetime = 0
62 | for i in range(0, len(inputdf) - 1):
63 | bar = inputdf.iloc[i, :].to_dict()
64 | if openBarMin == 0:
65 | openBarmin = bar["open"]
66 | if highBarMin == 0:
67 | highBarMin = bar["high"]
68 | else:
69 | highBarMin = max(bar["high"], highBarMin)
70 |
71 | if lowBarMin == 0:
72 | lowBarMin = bar["low"]
73 | else:
74 | lowBarMin = min(bar["low"], lowBarMin)
75 | closeBarMin = bar["close"]
76 | datetime = bar["datetime"]
77 | volumeBarmin += int(bar["volume"])
78 | # X分钟已经走完
79 | if not (bar["datetime"].minute + crossmin) % barmins: # 可以用X整除
80 | # 生成上一X分钟K线的时间戳
81 | barMin = {'datetime': datetime, 'high': highBarMin, 'low': lowBarMin, 'open': openBarmin,
82 | 'close': closeBarMin, 'volume': volumeBarmin}
83 | dfbarmin = dfbarmin.append(barMin, ignore_index=True)
84 | highBarMin = 0
85 | lowBarMin = 0
86 | openBarMin = 0
87 | volumeBarmin = 0
88 | if export2csv == True:
89 | dfbarmin.to_csv(self.exportpath + "bar" + str(barmins) + str(self.collection) + ".csv", index=True,
90 | header=True)
91 | return dfbarmin
92 |
93 | def dfcci(self, inputdf, n, export2csv=True):
94 | """调用talib方法计算CCI指标,写入到df并输出"""
95 | dfcci = inputdf
96 | dfcci["cci"] = None
97 | for i in range(n, len(inputdf)):
98 | df_ne = inputdf.loc[i - n + 1:i, :]
99 | cci = talib.CCI(np.array(df_ne["high"]), np.array(df_ne["low"]), np.array(df_ne["close"]), n)
100 | dfcci.loc[i, "cci"] = cci[-1]
101 |
102 | dfcci = dfcci.fillna(0)
103 | dfcci = dfcci.replace(np.inf, 0)
104 | if export2csv == True:
105 | dfcci.to_csv(self.exportpath + "dfcci" + str(self.collection) + ".csv", index=True, header=True)
106 | return dfcci
107 |
108 | # --------------------------------------------------------------
109 | def Percentage(self, inputdf, export2csv=True):
110 | """调用talib方法计算CCI指标,写入到df并输出"""
111 | dfPercentage = inputdf
112 | # dfPercentage["Percentage"] = None
113 | for i in range(1, len(inputdf)):
114 | # if dfPercentage.loc[i,"close"]>dfPercentage.loc[i,"open"]:
115 | # percentage = ((dfPercentage.loc[i,"high"] - dfPercentage.loc[i-1,"close"])/ dfPercentage.loc[i-1,"close"])*100
116 | # else:
117 | # percentage = (( dfPercentage.loc[i,"low"] - dfPercentage.loc[i-1,"close"] )/ dfPercentage.loc[i-1,"close"])*100
118 | if dfPercentage.loc[i - 1, "close"] == 0.0:
119 | percentage = 0
120 | else:
121 | percentage = ((dfPercentage.loc[i, "close"] - dfPercentage.loc[i - 1, "close"]) / dfPercentage.loc[
122 | i - 1, "close"]) * 100.0
123 | dfPercentage.loc[i, "Perentage"] = percentage
124 |
125 | dfPercentage = dfPercentage.fillna(0)
126 | dfPercentage = dfPercentage.replace(np.inf, 0)
127 | if export2csv == True:
128 | dfPercentage.to_csv(self.exportpath + "Percentage_" + str(self.collection) + ".csv", index=True,
129 | header=True)
130 | return dfPercentage
131 |
132 | def resultValuate(self, inputdf, nextBar, export2csv=True):
133 | summayKey = ["Percentage", "TestValues"]
134 | dft = pd.DataFrame(columns=summayKey)
135 |
136 | def dfMACD(self, inputdf, n, export2csv=False):
137 | """调用talib方法计算MACD指标,写入到df并输出"""
138 | dfMACD = inputdf
139 | for i in range(n, len(inputdf)):
140 | df_ne = inputdf.loc[i - n + 1:i, :]
141 | macd, signal, hist = talib.MACD(np.array(df_ne["close"]), 12, 26, 9)
142 |
143 | dfMACD.loc[i, "macd"] = macd[-1]
144 | dfMACD.loc[i, "signal"] = signal[-1]
145 | dfMACD.loc[i, "hist"] = hist[-1]
146 | if dfMACD.loc[i, "hist"] > 0 and dfMACD.loc[i - 1, "hist"] <= 0:
147 | dfMACD.loc[i, "histIndictor"] = 1
148 | elif dfMACD.loc[i, "hist"] < 0 and dfMACD.loc[i - 1, "hist"] >= 0:
149 | dfMACD.loc[i, "histIndictor"] = -1
150 |
151 | dfMACD = dfMACD.fillna(0)
152 | dfMACD = dfMACD.replace(np.inf, 0)
153 | if export2csv == True:
154 | dfMACD.to_csv(self.exportpath + "macd" + str(self.collection) + ".csv", index=True, header=True)
155 | return dfMACD
156 |
157 |
158 | def dfBOLL(self, inputdf, n, dev, export2csv=False):
159 | """调用talib方法计算MACD指标,写入到df并输出"""
160 | # mid = self.sma(n, array)
161 | # std = self.std(n, array)
162 | #
163 | # up = mid + std * dev
164 | # down = mid - std * dev
165 | dfBil = inputdf
166 | for i in range(100, len(inputdf)):
167 | df_ne = inputdf.loc[i - 100 + 1:i, :]
168 | mid = talib.SMA(np.array(df_ne["close"]), n)
169 | std = talib.STDDEV(np.array(df_ne["close"]), n)
170 | up = mid[-1] + std[-1] * dev
171 | down = mid[-1] - std[-1] * dev
172 | dfBil.loc[i, "mid"] = mid[-1]
173 | dfBil.loc[i, "up"] = up
174 | dfBil.loc[i, "down"] = down
175 | if dfBil.loc[i, "up"] != np.inf and dfBil.loc[i, "high"] > dfBil.loc[i, "up"]:
176 | dfBil.loc[i, "BuyPoint"] = dfBil.loc[i, "high"] - dfBil.loc[i, "up"]
177 | elif dfBil.loc[i, "down"] != np.inf and dfBil.loc[i, "low"] < dfBil.loc[i, "down"]:
178 | dfBil.loc[i, "ShortPoint"] = dfBil.loc[i, "low"] - dfBil.loc[i, "down"]
179 |
180 | dfBil = dfBil.fillna(0)
181 | dfBil = dfBil.replace(np.inf, 0)
182 | if export2csv == True:
183 | dfBil.to_csv(self.exportpath + "BILLBOLL" + str(self.collection) + ".csv", index=True, header=True)
184 | return dfBil
185 |
186 | def addResultBar(self, inputdf, startBar=2, endBar=12, step=2, export2csv=False):
187 | dfaddResultBar = inputdf
188 | ######cci在(100 - 200),(200 -300)后的第2根,第4根,第6根的价格走势######################
189 |
190 | for i in range(1, len(dfaddResultBar) - endBar - step):
191 | for nextbar in range(startBar, endBar, step):
192 | dfaddResultBar.loc[i, "next" + str(nextbar) + "BarDiffer"] = dfaddResultBar.loc[i + nextbar, "close"] - \
193 | dfaddResultBar.loc[i, "close"]
194 | if dfaddResultBar.loc[i, "close"] > dfaddResultBar.loc[i + nextbar, "close"]:
195 | dfaddResultBar.loc[i, "next" + str(nextbar) + "BarClose"] = -1
196 | elif dfaddResultBar.loc[i, "close"] < dfaddResultBar.loc[i + nextbar, "close"]:
197 | dfaddResultBar.loc[i, "next" + str(nextbar) + "BarClose"] = 1
198 |
199 | # #######计算######################
200 | # dfaddResultBar.loc[i,"next5BarCloseMakrup"] = dfaddResultBar.loc[i+5,"close"] - dfaddResultBar.loc[i,"close"]
201 | dfaddResultBar = dfaddResultBar.fillna(0)
202 | dfaddResultBar = dfaddResultBar.replace(np.inf, 0)
203 | if export2csv == True:
204 | dfaddResultBar.to_csv(self.exportpath + "addResultBar" + str(self.collection) + ".csv", index=True,
205 | header=True)
206 | return dfaddResultBar
207 |
208 | def resultOutput(self, de_anaylsisH, startBar=2, endBar=12, step=2, export2csv=False):
209 | HCount = len(de_anaylsisH)
210 | # LCount = de_anaylsisL['ShortPoint'].count()
211 | print ("CheckPoint : %s" % (HCount))
212 | dfResult = pd.DataFrame()
213 |
214 | for bar in range(startBar, endBar, step):
215 | Upcount = len(de_anaylsisH[de_anaylsisH["next" + str(bar) + "BarClose"] > 0])
216 | Upprecent = Upcount * 100.000 / HCount
217 | Downcount = len(de_anaylsisH[de_anaylsisH["next" + str(bar) + "BarClose"] < 0])
218 | Downprecent = Downcount * 100.000 / HCount
219 | closemean = np.mean(de_anaylsisH["next" + str(bar) + "BarDiffer"])
220 | closesum = np.sum(de_anaylsisH["next" + str(bar) + "BarDiffer"])
221 | closestd = np.std(de_anaylsisH["next" + str(bar) + "BarDiffer"])
222 | closemax = np.max(de_anaylsisH["next" + str(bar) + "BarDiffer"])
223 | closemin = np.min(de_anaylsisH["next" + str(bar) + "BarDiffer"])
224 | print("k线数量为 %s, ,第%s根K线结束, 上涨k线为%s 价格上涨概率为 %s%%;" % (HCount, bar, Upcount, Upprecent))
225 | print("k线数量为 %s, ,第%s根K线结束, 下跌k线为%s 价格下跌概率为 %s%%;" % (HCount, bar, Downcount, Downprecent))
226 | print('和值 %s, 均值 %s, std %s, max: %s, min: %s' % (closesum, closemean, closestd, closemax, closemin))
227 | dfResult = dfResult.append(
228 | [{"Bar Count": bar, "TotalCount": HCount, "Upcount": Upcount, "Upprecent": Upprecent,
229 | "Downcount": Downcount, "Downprecent": Downprecent, "closesum": closesum,
230 | "closemean": closemean, "closestd": closestd, "closemax": closemax,
231 | "closemin": closemin
232 | }])
233 |
234 | dfResult = dfResult.fillna(0)
235 | dfResult = dfResult.replace(np.inf, 0)
236 | if export2csv == True:
237 | dfResult.to_csv(self.exportpath + "addResultBar" + str(self.collection) + ".csv", index=True, header=True)
238 | return dfResult
239 |
240 | def macdAnalysis(self, inputdf, export2csv=True):
241 | dfMACD = inputdf
242 | dfAnalysis = pd.DataFrame()
243 | #######################################分析cci分布########################################
244 |
245 | for hist in range(10, 25, 5):
246 | lpHigh = np.percentile(dfMACD['macd'], 100 - hist)
247 | lpLow = np.percentile(dfMACD['macd'], hist)
248 |
249 | df = pd.DataFrame()
250 | de_anaylsisH = dfMACD.loc[(dfMACD["macd"] >= lpHigh)]
251 | de_anaylsisH = de_anaylsisH.loc[(de_anaylsisH["histIndictor"] == 1)]
252 | df = self.resultOutput(de_anaylsisH, 2, 12, 2)
253 | df["hist"] = lpHigh
254 | df["histIndictor"] = 1
255 | dfAnalysis = dfAnalysis.append(df)
256 |
257 | df = pd.DataFrame()
258 | de_anaylsisL = dfMACD.loc[(dfMACD["macd"] <= lpLow)]
259 | de_anaylsisL = de_anaylsisL.loc[(de_anaylsisL["histIndictor"] == -1)]
260 | df = self.resultOutput(de_anaylsisL, 2, 12, 2)
261 | df["hist"] = lpHigh
262 | df["histIndictor"] = -1
263 | dfAnalysis = dfAnalysis.append(df)
264 |
265 | dfAnalysis = dfAnalysis.fillna(0)
266 | dfAnalysis = dfAnalysis.replace(np.inf, 0)
267 | if export2csv == True:
268 | dfAnalysis.to_csv(self.exportpath + "_Anaylsis" + str(self.collection) + ".csv", index=False, header=True)
269 | return dfAnalysis
270 |
271 |
272 |
273 | if __name__ == '__main__':
274 | DA = DataAnalyzer()
275 | # 数据库导入
276 | start = datetime.strptime("20180801", '%Y%m%d')
277 | end = datetime.strptime("20190501", '%Y%m%d')
278 | df = DA.db2df(db="VnTrader_1Min_Db", collection="CF905", start=start, end=end)
279 | # csv导入
280 | # df = DA.csv2df("rb1905.csv")
281 | df5min = DA.df2Barmin(df, 10)
282 |
283 |
284 | # print ("Dev is %s-------------------" %dev)
285 | df5minAdd = DA.addResultBar(df5min, export2csv=True)
286 | dfMACD = DA.dfMACD(df5minAdd, 100, export2csv=True)
287 | DA.macdAnalysis(dfMACD, export2csv=True)
288 |
289 |
290 |
--------------------------------------------------------------------------------
/PSOOptimize/PSOOptimize.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 |
3 | """
4 | 展示如何实现PSO粒子群优化VNPY策略参数
5 | 1、静态函数object_func,本函数为优化目标函数,根据随机生成的策略参数,运行回测后自动返回1个结果指标:夏普比率 这个是直接抄GenticOptimize2V的。这个独立出来是为了之后多线程实现。这里传入的是一个[{key1:para},{key2,para}] 这样结构的参数队列。
6 |
7 | 2、类PSOOptimize,放置主要函数如下:
8 | 2.1、__init__ 对象初始化,需要传入的参数是Strategy量化策略, Symbollist回测品种, Parameterlist参数范围。
9 | 2.2、creator.create("FitnessMax", base.Fitness, weights=(1.0,)) 定义优化方向
10 | creator.create("Particle", list, fitness=creator.FitnessMax, speed=list, pmin = list, pmax = list,smin=list, smax=list, parameterPackage = dict, best=None)
11 | 定义粒子类,其中包括参数list,回测返回结果,速度list,所在范围上下限队列,和速度 上下限 队列;还有一个打包的参数字典,主要为了之后多线程调用。和示例代码对比最大区别就是多了所在范围上线,因为如果没有,会出现负参数的情况报错。
12 | 2.3、particle_generate, 生成粒子,包含随机位置,和速度;这里面只支持(start,end,pace) 这样一种参数范围赋值。start和end就是位置范围上下限。步长pace就是速度上限,负pace就是速度下限。
13 | 2.4、updateParticle,更新粒子信息,根据粒子群最佳位置best,去更新粒子part的位置和速度。如果速度或位置在上下限,就去上下限值。如果位置值是整数,比是MAwindow这样,就会更新值也是整数。
14 | 速度公式:
15 | v[] = v[] + c1 * rand() * (pbest[] - present[]) + c2 * rand() * (gbest[] - present[])
16 | 位置公式:
17 | present[] = persent[] + v[]
18 | 2.5、optimize,这个没说明好说,主要就是绑定函数到toolbox,然后定义粒子群的粒子数量,和寻觅次数。优化出最有结果。
19 | 使用pool.map实现了多线程
20 | 2.6、mutated, 自变异函数
21 | """
22 | from __future__ import division
23 | from __future__ import print_function
24 | import operator
25 | import random
26 | import numpy
27 | from deap import base
28 | from deap import creator
29 | from deap import tools
30 | from vnpy.trader.app.ctaStrategy.ctaBacktesting import BacktestingEngine, MINUTE_DB_NAME, OptimizationSetting
31 | from vnpy.trader.app.ctaStrategy.strategy.strategyBollChannel import BollChannelStrategy
32 | import multiprocessing
33 | import pandas as pd
34 | import datetime
35 |
36 | def object_func(strategy_avgTuple):
37 | """
38 | 本函数为优化目标函数,根据随机生成的策略参数,运行回测后自动返回1个结果指标:夏普比率
39 | 这个是直接赋值GenticOptimize2V的
40 | """
41 | strategy_avg = strategy_avgTuple
42 | paraSet = strategy_avgTuple.parameterPackage
43 | symbol = paraSet["symbol"]
44 | strategy = paraSet["strategy"]
45 |
46 |
47 | # 创建回测引擎对象
48 | engine = BacktestingEngine()
49 | # 设置回测使用的数据
50 | engine.setBacktestingMode(engine.BAR_MODE) # 设置引擎的回测模式为K线
51 | engine.setDatabase("VnTrader_1Min_Db", symbol["vtSymbol"]) # 设置使用的历史数据库
52 | engine.setStartDate(symbol["StartDate"]) # 设置回测用的数据起始日期
53 | engine.setEndDate(symbol["EndDate"]) # 设置回测用的数据起始日期
54 |
55 | # 配置回测引擎参数
56 | engine.setSlippage(symbol["Slippage"]) # 1跳
57 | engine.setRate(symbol["Rate"]) # 佣金大小
58 | engine.setSize(symbol["Size"]) # 合约大小
59 | engine.setPriceTick(symbol["Slippage"]) # 最小价格变动
60 | engine.setCapital(symbol["Capital"])
61 |
62 |
63 | setting = {}
64 | for item in range(len(strategy_avg)):
65 | setting.update(strategy_avg[item])
66 |
67 | engine.clearBacktestingResult()
68 | # 加载策略
69 |
70 | engine.initStrategy(strategy, setting)
71 | # 运行回测,返回指定的结果指标
72 | engine.runBacktesting() # 运行回测
73 | # 逐日回测
74 | # engine.calculateDailyResult()
75 | backresult = engine.calculateBacktestingResult()
76 |
77 | try:
78 |
79 | sharpeRatio = round(backresult['sharpeRatio'], 3)
80 | totalResultCount = round(backresult['totalResult'],3)
81 |
82 | except Exception, e:
83 | print("Error: %s, %s" %(str(Exception),str(e)))
84 | sharpeRatio = 0
85 |
86 | return sharpeRatio,
87 |
88 |
89 |
90 |
91 | class PSOOptimize(object):
92 | strategy = None
93 | symbol = {}
94 | parameterlist = {}
95 | parameterPackage = {}
96 |
97 |
98 | # ------------------------------------------------------------------------
99 | def __init__(self, Strategy, Symbollist, Parameterlist):
100 | self.strategy = Strategy
101 | self.symbol = Symbollist
102 | self.parameterlist = Parameterlist
103 | self.parameterPackage = {
104 | "strategy":self.strategy,
105 | "symbol":self.symbol
106 | }
107 |
108 | creator.create("FitnessMax", base.Fitness, weights=(1.0,))
109 | creator.create("Particle", list, fitness=creator.FitnessMax, speed=list,
110 | pmin = list, pmax = list,smin=list, smax=list, parameterPackage = dict, best=None)
111 |
112 | def particle_generate(self):
113 | """
114 | 生产particle粒子,根据传入设置的起始值,终止值随机生成位置,和位置最大最小值,根据步进生成速度,和速度最大最小值
115 | 支持两种参数设置,一个是三个元素start,end 和pace
116 | 还有一个单一不变点
117 | """
118 | position_list = []
119 | speed_list = []
120 | pmin_list = []
121 | pmax_list = []
122 | smin_list = []
123 | smax_list = []
124 | for key, value in self.parameterlist.items():
125 | if isinstance(value, tuple):
126 | if len(value) == 3:
127 | if isinstance(value[0],int):
128 | position_list.append({key:random.randrange(value[0], value[1])})
129 | else:
130 | position_list.append({key: random.uniform(value[0], value[1])})
131 | pmin_list.append(value[0])
132 | pmax_list.append(value[1])
133 | speed_list.append(random.uniform(-value[2], value[2]))
134 | smin_list.append(-value[2])
135 | smax_list.append(value[2])
136 | else:
137 | print("Paramerte list incorrect")
138 | elif isinstance(value, int) or isinstance(value, float):
139 | position_list.append({key: value})
140 | pmin_list.append(value)
141 | pmax_list.append(value)
142 | speed_list.append(0)
143 | smin_list.append(0)
144 | smax_list.append(0)
145 | else:
146 | print("Paramerte list incorrect")
147 | particle = creator.Particle(position_list)
148 | particle.speed = speed_list
149 | particle.pmin = pmin_list
150 | particle.pmax = pmax_list
151 | particle.smin = smin_list
152 | particle.smax = smax_list
153 | particle.parameterPackage = self.parameterPackage
154 | return particle
155 |
156 | def updateParticle(self,part, best, phi1, phi2):
157 | """
158 | 根据粒子群最佳位置best,去更新粒子part的位置和速度,
159 | 速度公式:
160 | v[] = v[] + c1 * rand() * (pbest[] - present[]) + c2 * rand() * (gbest[] - present[])
161 | 位置公式:
162 | present[] = persent[] + v[]
163 | """
164 | u1 = (random.uniform(0, phi1) for _ in range(len(part)))
165 | u2 = (random.uniform(0, phi2) for _ in range(len(part)))
166 |
167 | v_u1 = map(operator.mul, u1, map(self.sub, part.best, part))# c1 * rand() * (pbest[] - present[])
168 | v_u2 = map(operator.mul, u2, map(self.sub, best, part)) # c2 * rand() * (gbest[] - present[])
169 | part.speed = list(map(operator.add, part.speed, map(operator.add, v_u1, v_u2)))
170 |
171 | for i, speed in enumerate(part.speed):
172 | if speed < part.smin:
173 | part.speed[i] = part.smin[i]
174 | elif speed > part.smax:
175 | part.speed[i] = part.smax[i]
176 | #返回现在位置,如果原来是整数,返回也是整数,否则这是浮点数; 如果超过上下限,用上下限数值
177 | for i,item in enumerate(part):
178 | if isinstance(item.values()[0],int):
179 | positionV = round(item.values()[0] + part.speed[i])
180 | else:
181 | positionV = item.values()[0] + part.speed[i]
182 | if positionV <= part.pmin[i] :
183 | part[i][item.keys()[0]] = part.pmin[i]
184 | elif positionV >= part.pmax[i]:
185 | part[i][item.keys()[0]] = part.pmax[i]
186 | else:
187 | part[i][item.keys()[0]] = positionV
188 |
189 | def sub(self,a,b):
190 | return a.values()[0] - b.values()[0]
191 |
192 | def mutated(self,particle,MUTPB):
193 | """
194 | 自适应变异
195 | :param particle: 传入
196 | :param MUTPB:
197 | :return:
198 | """
199 | size = len(particle)
200 | newPart = self.particle_generate()
201 | for i in xrange(size):
202 | if random.random() < MUTPB:
203 | particle[i] = newPart[i]
204 | return particle
205 |
206 | def optimize(self):
207 | toolbox = base.Toolbox()
208 | pool = multiprocessing.Pool(processes=(multiprocessing.cpu_count()))
209 | toolbox.register("map", pool.map)
210 | toolbox.register("particle", self.particle_generate)
211 | toolbox.register("population", tools.initRepeat, list, toolbox.particle)
212 | toolbox.register("update", self.updateParticle, phi1=2.0, phi2=2.0)
213 | toolbox.register("evaluate", object_func)
214 |
215 | pop = toolbox.population(n=20) #粒子群有5个粒子
216 | GEN = 20 #更新一千次
217 | MUTPB = 0.1 #自适应变异概率
218 | best = None
219 | for g in range(GEN):
220 | # for part in pop: #每次更新,计算粒子群中最优参数,并把最优值写入best
221 | # part.fitness.values = toolbox.evaluate(part)
222 | # if not part.best or part.best.fitness < part.fitness:
223 | # part.best = creator.Particle(part)
224 | # part.best.fitness.values = part.fitness.values
225 | # if not best or best.fitness < part.fitness:
226 | # best = creator.Particle(part)
227 | # best.fitness.values = part.fitness.values
228 | fitnesses = toolbox.map(toolbox.evaluate, pop) #利用pool.map,实现多线程
229 | for part, fit in zip(pop, fitnesses):
230 | part.fitness.values = fit
231 | if not part.best or part.best.fitness < part.fitness:
232 | part.best = creator.Particle(part)
233 | part.best.fitness.values = part.fitness.values
234 | if not best or best.fitness < part.fitness:
235 | best = creator.Particle(part)
236 | best.fitness.values = part.fitness.values
237 |
238 | if g < GEN - 1: #在最后一轮之前,每轮进行位置更新和变异
239 | for i,part in enumerate(pop):
240 | toolbox.update(part, best)#更新粒子位置
241 | if random.random() < MUTPB: #自适应变异
242 | self.mutated(part,MUTPB)
243 |
244 | # Gather all the fitnesses in one list and print the stats
245 | return pop, best
246 |
247 |
248 |
249 |
250 | if __name__ == "__main__":
251 |
252 | Strategy = BollChannelStrategy
253 |
254 | Symbol = {
255 | "vtSymbol": 'rb1905',
256 | "StartDate": "20181001",
257 | "EndDate": "20190301",
258 | "Slippage": 1,
259 | "Size": 10,
260 | "Rate": 2 / 10000,
261 | "Capital": 10000
262 | }
263 |
264 | Parameterlist = {
265 | 'bollWindow':(10,50,5),
266 | 'bollDev':(3.0,6.0,0.6),
267 | 'cciWindow':(10,50,5),
268 | 'atrWindow':(10,50,5),
269 | 'slMultiplier':(3.5,5.5,0.5),
270 | 'fixedSize':1
271 | }
272 |
273 | parameterPackage = {
274 | "symbol": Symbol,
275 | "parameterlist": Parameterlist,
276 | "strategy": Strategy
277 | }
278 | PSO = PSOOptimize(Strategy, Symbol, Parameterlist)
279 | pop,best = PSO.optimize()
280 | print ("best para: %s, result:%s" %(best,best.fitness.values))
281 | print(pop[:20])
282 | print("-- End of (successful) %s evolution --", Symbol["vtSymbol"])
283 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MarketDataAnaylzerbyDataFrame
2 | ----------------------------------------------4/24/2019--------------------------------------------------------------------
3 | 1. 新增方法addResultBar,可以在输入参数中定义需要对比的的范围,主要当前close和以后close的上涨下跌,和差值。
4 | 2. 新增方法resultOutput,根据对比值,输出检查点之后的走势,支持导出csv分析。
5 | 3. 修复一些比较机械的写法
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | ----------------------------------------------------------------------------------------------------------------------------------------
18 | A tool as Data Analyzer for commodity or stock by DataFrame
19 |
20 | 1. 定义了一个类DataAnalyzer初始化的时候需要输入生成csv文件导出文件夹地址,和数据格式
21 |
22 | 1.1 方法db2df,输入vnpy使用行情数据库信息和读取品种信息,程序读取vnpy的指定评到指定开始到结束时间段的分钟k线数据;按照初始化格式返回生成DataFrame供分析;如果expeort2csv为True的话,会生成一个csv文件到指定地址
23 |
24 | 1.2 方法csv2df,输入指定路径的csv行情文件;程序读取csv文件;按照格式返回生成DataFrame供分析;如果expeort2csv为True的话,会生成一个csv文件到指定地址。把导入的字符串转换datetime格式,此时可能会有warning信息。
25 |
26 | 1.3 方法df2Barmin,输入DataFrame格式1分钟行情数据,和指定输出分钟k线,程序整合出对应分钟k线数据。比如输出1分钟行情数据csv,要求输出5分钟k先数据;程序输出5分钟K线信息DataFrame供分析。这里有点地方要注意,如果数据中一天开始第一个bar是9点,那么crossmin为1; 如果第一个是9点1分,此处为0。如果expeort2csv为True的话,会生成一个csv文件到指定地址。
27 |
28 | 1.4 方法dfcci,其实是一个示例方法,输入DataFrame格式分钟行情数据,和参数cciWindows,程序调用talib的cci方法,进行计算。返回带有新的一列cci数据的DataFrame。用来分析。如果expeort2csv为True的话,会生成一个csv文件到指定地址。打开就是这样一个东西。
29 |
30 | 2. 通过类的方法,读取一个rb1905行情数据,按照聚合出5分钟K线,在按照cci周期为15条K线计算cci值
31 |
32 | 2.1 画出cci的柱状分布图,CCI(Commodity Channel lndex)顺势指标是测量股价是否已超出常态分布范围的一个指数,波动于正无限大和负无限小之间。如下图x轴是cci值,y轴是出现次数。从图中可以看出cci数据是两个正太分布叠加,波峰在-80和+80两个值,正负200之后,cci出现就变的很少,此时可以用DataFrame的数字分析功能找到更多数据。
33 |
34 | 2.2 计算每个时间点的当前价格,和之后第5根K线结束价格差,和cci的值做成散点图,
35 |
36 | 2.3 cci值在正负(100-200)区间,和(200-300)区间算是出现比较少,计算在这个区间出现时,之后第2,第4,和第6根K线结束价格增多还是减少概率
37 |
--------------------------------------------------------------------------------
/Risk and Portfolio Knowledge Sharing/README.md:
--------------------------------------------------------------------------------
1 | 主要内容标题:
2 |
3 | 风险的分类
4 | 使用收益率标准差(或加波动率)度量风险
5 | 波动率的获得
6 | 为什么使用对数来计算收益率
7 | 收益率,标准差,方差的计算
8 | VaR风险资产(Value-at-Risk)的定义和方程
9 | 使用历史数据模拟计算VaR
10 | 使用加权历史数据模拟计算VaR
11 | 使用均值方差模型计算VaR
12 | 正态分布累积逆函数介绍
13 | 自回归模型AR介绍
14 | 自回归条件异方差模型ARCH介绍
15 | GARCH介绍
16 | 如何使用GARCH和JP Morgan’s RiskMetrics计算VAR
17 | 蒙特卡洛模拟计算VAR
18 | 投资组合的波动率计算和相关计算
19 | 投资组合的最小方差模型
20 | 利用夏普模型选取投资组合
21 | 利用GARCH模型计算投资组合的VaR
22 |
23 | 参考文档和链接
24 |
--------------------------------------------------------------------------------
/Risk and Portfolio Knowledge Sharing/RiskandPortfolio.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BillyZhangGuoping/MarketDataAnaylzerbyDataFrame/530e0fb90e3b84f146adb56df890fe59415d1afe/Risk and Portfolio Knowledge Sharing/RiskandPortfolio.pdf
--------------------------------------------------------------------------------
/Risk and Portfolio Knowledge Sharing/Untitled.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 17,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "data": {
10 | "text/html": [
11 | "
\n",
12 | "\n",
25 | "
\n",
26 | " \n",
27 | " \n",
28 | " | \n",
29 | " High | \n",
30 | " Low | \n",
31 | " Open | \n",
32 | " Close | \n",
33 | " Volume | \n",
34 | " Adj Close | \n",
35 | "
\n",
36 | " \n",
37 | " Date | \n",
38 | " | \n",
39 | " | \n",
40 | " | \n",
41 | " | \n",
42 | " | \n",
43 | " | \n",
44 | "
\n",
45 | " \n",
46 | " \n",
47 | " \n",
48 | " 2019-09-10 | \n",
49 | " 35.740002 | \n",
50 | " 34.750000 | \n",
51 | " 35.709999 | \n",
52 | " 35.000000 | \n",
53 | " 67533066.0 | \n",
54 | " 35.000000 | \n",
55 | "
\n",
56 | " \n",
57 | " 2019-09-11 | \n",
58 | " 35.240002 | \n",
59 | " 34.709999 | \n",
60 | " 35.189999 | \n",
61 | " 35.049999 | \n",
62 | " 60908940.0 | \n",
63 | " 35.049999 | \n",
64 | "
\n",
65 | " \n",
66 | " 2019-09-12 | \n",
67 | " 35.430000 | \n",
68 | " 34.970001 | \n",
69 | " 35.400002 | \n",
70 | " 35.290001 | \n",
71 | " 52582171.0 | \n",
72 | " 35.290001 | \n",
73 | "
\n",
74 | " \n",
75 | " 2019-09-16 | \n",
76 | " 35.540001 | \n",
77 | " 34.820000 | \n",
78 | " 35.500000 | \n",
79 | " 35.000000 | \n",
80 | " 46166805.0 | \n",
81 | " 35.000000 | \n",
82 | "
\n",
83 | " \n",
84 | " 2019-09-17 | \n",
85 | " 35.180000 | \n",
86 | " 34.619999 | \n",
87 | " 35.070000 | \n",
88 | " 34.700001 | \n",
89 | " 40907776.0 | \n",
90 | " 34.700001 | \n",
91 | "
\n",
92 | " \n",
93 | "
\n",
94 | "
"
95 | ],
96 | "text/plain": [
97 | " High Low Open Close Volume Adj Close\n",
98 | "Date \n",
99 | "2019-09-10 35.740002 34.750000 35.709999 35.000000 67533066.0 35.000000\n",
100 | "2019-09-11 35.240002 34.709999 35.189999 35.049999 60908940.0 35.049999\n",
101 | "2019-09-12 35.430000 34.970001 35.400002 35.290001 52582171.0 35.290001\n",
102 | "2019-09-16 35.540001 34.820000 35.500000 35.000000 46166805.0 35.000000\n",
103 | "2019-09-17 35.180000 34.619999 35.070000 34.700001 40907776.0 34.700001"
104 | ]
105 | },
106 | "execution_count": 17,
107 | "metadata": {},
108 | "output_type": "execute_result"
109 | }
110 | ],
111 | "source": [
112 | "import pandas_datareader.data as web\n",
113 | "import datetime\n",
114 | "import yfinance as yf\n",
115 | "# yf.pdr_override()\n",
116 | "\n",
117 | "start=datetime.datetime(2008, 1, 1)\n",
118 | "end=datetime.datetime.today()\n",
119 | "apple=web.get_data_yahoo('600036.SS',start,end)\n",
120 | "apple.tail()"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 14,
126 | "metadata": {},
127 | "outputs": [
128 | {
129 | "ename": "NameError",
130 | "evalue": "name 'NDX' is not defined",
131 | "output_type": "error",
132 | "traceback": [
133 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
134 | "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
135 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mNDX\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtail\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
136 | "\u001b[1;31mNameError\u001b[0m: name 'NDX' is not defined"
137 | ]
138 | }
139 | ],
140 | "source": [
141 | "apple.tail()"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": 15,
147 | "metadata": {},
148 | "outputs": [
149 | {
150 | "data": {
151 | "image/png": "\n",
152 | "text/plain": [
153 | ""
154 | ]
155 | },
156 | "metadata": {},
157 | "output_type": "display_data"
158 | }
159 | ],
160 | "source": [
161 | "import matplotlib.pyplot as plt\n",
162 | "%matplotlib inline\n",
163 | "apple['Close'].plot()\n",
164 | "plt.show()"
165 | ]
166 | },
167 | {
168 | "cell_type": "code",
169 | "execution_count": 22,
170 | "metadata": {},
171 | "outputs": [],
172 | "source": [
173 | "SAP.to_csv(\"REALLONGTERM.CSV\",index=True, header=True)"
174 | ]
175 | },
176 | {
177 | "cell_type": "code",
178 | "execution_count": 21,
179 | "metadata": {},
180 | "outputs": [
181 | {
182 | "data": {
183 | "image/png": "\n",
184 | "text/plain": [
185 | ""
186 | ]
187 | },
188 | "metadata": {},
189 | "output_type": "display_data"
190 | },
191 | {
192 | "name": "stdout",
193 | "output_type": "stream",
194 | "text": [
195 | "Iteration: 1, Func. Count: 6, Neg. LLF: 5206.19238728\n",
196 | "Iteration: 2, Func. Count: 15, Neg. LLF: 5205.44782196\n",
197 | "Iteration: 3, Func. Count: 24, Neg. LLF: 5205.16609339\n",
198 | "Iteration: 4, Func. Count: 32, Neg. LLF: 5203.4457523\n",
199 | "Iteration: 5, Func. Count: 41, Neg. LLF: 5202.94521095\n",
200 | "Iteration: 6, Func. Count: 48, Neg. LLF: 5202.45218084\n",
201 | "Iteration: 7, Func. Count: 54, Neg. LLF: 5202.28164924\n",
202 | "Iteration: 8, Func. Count: 60, Neg. LLF: 5202.25645484\n",
203 | "Iteration: 9, Func. Count: 66, Neg. LLF: 5202.25632277\n",
204 | "Iteration: 10, Func. Count: 72, Neg. LLF: 5202.25631508\n",
205 | "Iteration: 11, Func. Count: 78, Neg. LLF: 5202.25631385\n",
206 | "Optimization terminated successfully. (Exit mode 0)\n",
207 | " Current function value: 5202.25631385\n",
208 | " Iterations: 11\n",
209 | " Function evaluations: 78\n",
210 | " Gradient evaluations: 11\n",
211 | " Constant Mean - GARCH Model Results \n",
212 | "==============================================================================\n",
213 | "Dep. Variable: Close R-squared: -0.001\n",
214 | "Mean Model: Constant Mean Adj. R-squared: -0.001\n",
215 | "Vol Model: GARCH Log-Likelihood: -5202.26\n",
216 | "Distribution: Normal AIC: 10412.5\n",
217 | "Method: Maximum Likelihood BIC: 10436.5\n",
218 | " No. Observations: 2987\n",
219 | "Date: Thu, Sep 12 2019 Df Residuals: 2983\n",
220 | "Time: 13:40:30 Df Model: 4\n",
221 | " Mean Model \n",
222 | "==========================================================================\n",
223 | " coef std err t P>|t| 95.0% Conf. Int.\n",
224 | "--------------------------------------------------------------------------\n",
225 | "mu 0.0833 2.508e-02 3.320 9.003e-04 [3.411e-02, 0.132]\n",
226 | " Volatility Model \n",
227 | "==========================================================================\n",
228 | " coef std err t P>|t| 95.0% Conf. Int.\n",
229 | "--------------------------------------------------------------------------\n",
230 | "omega 0.0636 2.750e-02 2.311 2.084e-02 [9.651e-03, 0.117]\n",
231 | "alpha[1] 0.0721 2.666e-02 2.705 6.837e-03 [1.986e-02, 0.124]\n",
232 | "beta[1] 0.9004 3.140e-02 28.678 7.095e-181 [ 0.839, 0.962]\n",
233 | "==========================================================================\n",
234 | "\n",
235 | "Covariance estimator: robust\n",
236 | "-5202.25631385\n"
237 | ]
238 | }
239 | ],
240 | "source": [
241 | "import datetime as dt\n",
242 | "import sys\n",
243 | "import numpy as np\n",
244 | "import pandas as pd\n",
245 | "import pandas_datareader.data as web\n",
246 | "import matplotlib.pyplot as plt\n",
247 | "from arch import arch_model\n",
248 | "\n",
249 | "# start = dt.datetime(2015,1,1)\n",
250 | "# end = dt.datetime(2018,1,1)\n",
251 | "\n",
252 | "# NDX['close'] = NDX['Index Value']\n",
253 | "\n",
254 | "returns = 100 * NDX['Close'].pct_change().dropna()\n",
255 | "returns.plot()\n",
256 | "plt.show()\n",
257 | "\n",
258 | "from arch import arch_model\n",
259 | "model=arch_model(returns, vol='Garch', p=1, o=0, q=1, dist='Normal')\n",
260 | "results=model.fit()\n",
261 | "print(results.summary())\n",
262 | "\n",
263 | "# forecasts = results.forecast(horizon=30, method='simulation', simulations=100)\n",
264 | "# sims = forecasts.simulations\n",
265 | "\n",
266 | "# print(np.percentile(sims.values[-1,:,-1].T,5))\n",
267 | "# plt.hist(sims.values[-1, :,-1],bins=50)\n",
268 | "# plt.title('Distribution of Returns')\n",
269 | "# plt.show()\n",
270 | "print(results.loglikelihood)"
271 | ]
272 | },
273 | {
274 | "cell_type": "code",
275 | "execution_count": 28,
276 | "metadata": {},
277 | "outputs": [
278 | {
279 | "name": "stdout",
280 | "output_type": "stream",
281 | "text": [
282 | "\n",
283 | "\n",
284 | "Stock: SAP\n",
285 | "('Mu over 1 days:', 0.049117474655217444)\n",
286 | "('Sigma over 1 days:', 1.5176955642513446)\n",
287 | "('Monte Carlo VaR for SAP with confidence 0.99 over period of 1 days: ', 99.02716961589906)\n"
288 | ]
289 | }
290 | ],
291 | "source": [
292 | "def VaR_Monte_FHS(num,p,sigma_first,ndays,omega,alpha,beta,theta):\n",
293 | " np.random.seed(4)\n",
294 | " MC = num\n",
295 | " sigma2_FHS = sigma_first\n",
296 | " data_FHS = pd.DataFrame(index = range(MC))\n",
297 | " data_FHS_R = pd.DataFrame(index = range(MC))\n",
298 | " data_FHS['z_day1'] = data.loc[np.random.randint(0,data.shape[0]-1,MC),'z_Garch'].values\n",
299 | " data_FHS_R['R_day1'] = sigma2_Garch**0.5 * data_FHS['z_day1']\n",
300 | " data_FHS['sigma2_day1'] = omega + alpha*(data_FHS_R['R_day1'] - theta*sigma2_FHS**0.5)**2 + beta*sigma2_FHS\n",
301 | "\n",
302 | " # 低2-10天公式一样,循环\n",
303 | " for i in range(2,ndays +1):\n",
304 | " exec(\"data_FHS['z_day\" + str(i) + \"'] = data.loc[np.random.randint(0,data.shape[0]-1,MC),'z_Garch'].values\")\n",
305 | " exec(\"data_FHS_R['R_day\" + str(i) + \"'] = data_FHS['sigma2_day\" + str(i-1) + \"']**0.5 * data_FHS['z_day\" + str(i) + \"']\")\n",
306 | " exec(\"data_FHS['sigma2_day\" + str(i) + \"'] = omega + alpha*(data_FHS_R['R_day\" + str(i) + \"'] - theta*data_FHS['sigma2_day\" + str(i -1) + \"']**0.5)**2 + beta*data_FHS['sigma2_day\" + str(i-1) + \"']\")\n",
307 | "\n",
308 | " VaR = pd.DataFrame(index = range(ndays))\n",
309 | " VaR['ndays'] = np.arange(1,ndays+1)\n",
310 | " VaR['VaR'] = 0\n",
311 | " for i in range(ndays):\n",
312 | " R_ndays = data_FHS_R.iloc[:,:i+1].sum(axis = 1)\n",
313 | " VaR.loc[i,'VaR'] = - np.percentile(R_ndays,p) \n",
314 | " return VaR\n",
315 | "\n",
316 | "def VaR_MC(returns, S, start, end,c ,period, iterations):\n",
317 | " \n",
318 | " \n",
319 | " mu = returns.mean()\n",
320 | " sigma =returns.std()\n",
321 | " \n",
322 | " z = np.random.normal(0, 1, [1, iterations])\n",
323 | " \n",
324 | " ST = S*np.exp(period*(mu - 0.5*sigma**2)+sigma*np.sqrt(period)*z)\n",
325 | " \n",
326 | " ST = np.sort(ST)\n",
327 | " \n",
328 | " Spc = np.percentile(ST,(1-c)*100)\n",
329 | " \n",
330 | " var = S - Spc\n",
331 | " \n",
332 | " \n",
333 | " print('\\n')\n",
334 | " print('Stock: {}'.format(stock[0]))\n",
335 | " print('Mu over {} days:'.format(period), mu*period)\n",
336 | " print('Sigma over {} days:'.format(period), sigma*np.sqrt(period))\n",
337 | " \n",
338 | " print('Monte Carlo VaR for {} with confidence {} over period of {} days: '.format(stock[0],c,period), var)\n",
339 | "stock = ['SAP']\n",
340 | "s=100\n",
341 | "c=0.99\n",
342 | "period =1\n",
343 | "iterations = 10000000\n",
344 | "\n",
345 | "\n",
346 | "VaR_MC(returns, s, start, end,c ,period, iterations)"
347 | ]
348 | },
349 | {
350 | "cell_type": "code",
351 | "execution_count": null,
352 | "metadata": {},
353 | "outputs": [],
354 | "source": []
355 | }
356 | ],
357 | "metadata": {
358 | "kernelspec": {
359 | "display_name": "Python 2",
360 | "language": "python",
361 | "name": "python2"
362 | },
363 | "language_info": {
364 | "codemirror_mode": {
365 | "name": "ipython",
366 | "version": 2
367 | },
368 | "file_extension": ".py",
369 | "mimetype": "text/x-python",
370 | "name": "python",
371 | "nbconvert_exporter": "python",
372 | "pygments_lexer": "ipython2",
373 | "version": "2.7.15"
374 | }
375 | },
376 | "nbformat": 4,
377 | "nbformat_minor": 2
378 | }
379 |
--------------------------------------------------------------------------------
/TraderbySklearn/AnalyzebySklearn.py:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 | import warnings
3 | warnings.filterwarnings("ignore")
4 | from pymongo import MongoClient, ASCENDING
5 | import pandas as pd
6 | import numpy as np
7 | from datetime import datetime
8 | import talib
9 | import matplotlib.pyplot as plt
10 | import scipy.stats as st
11 | from sklearn.model_selection import train_test_split
12 | # LogisticRegression 逻辑回归
13 | from sklearn.linear_model import LogisticRegression
14 | # DecisionTreeClassifier 决策树
15 | from sklearn.tree import DecisionTreeClassifier
16 | # SVC 支持向量分类
17 | from sklearn.svm import SVC
18 | # MLP 神经网络
19 | from sklearn.neural_network import MLPClassifier
20 | from sklearn.model_selection import GridSearchCV
21 | class DataAnalyzerforSklearn(object):
22 | """
23 | 这个类是为了SVM做归纳分析数据,以未来6个bar的斜率线性回归为判断分类是否正确。
24 | 不是直接分析HLOC,而且用下列分非线性参数(就是和具体点位无关)
25 | 1.Percentage
26 | 2.std
27 | 4.MACD
28 | 5.CCI
29 | 6.ATR
30 | 7. 该bar之前的均线斜率
31 | 8. RSI
32 | """
33 | def __init__(self, exportpath="C:\\Project\\", datformat=['datetime', 'high', 'low', 'open', 'close','volume']):
34 | self.mongohost = None
35 | self.mongoport = None
36 | self.db = None
37 | self.collection = None
38 | self.df = pd.DataFrame()
39 | self.exportpath = exportpath
40 | self.datformat = datformat
41 | self.startBar = 2
42 | self.endBar = 12
43 | self.step = 2
44 | self.pValue = 0.015
45 | #-----------------------------------------导入数据-------------------------------------------------
46 | def db2df(self, db, collection, start, end, mongohost="localhost", mongoport=27017, export2csv=False):
47 | """读取MongoDB数据库行情记录,输出到Dataframe中"""
48 | self.mongohost = mongohost
49 | self.mongoport = mongoport
50 | self.db = db
51 | self.collection = collection
52 | dbClient = MongoClient(self.mongohost, self.mongoport, connectTimeoutMS=500)
53 | db = dbClient[self.db]
54 | cursor = db[self.collection].find({'datetime':{'$gte':start, '$lt':end}}).sort("datetime",ASCENDING)
55 | self.df = pd.DataFrame(list(cursor))
56 | self.df = self.df[self.datformat]
57 | self.df = self.df.reset_index(drop=True)
58 | path = self.exportpath + self.collection + ".csv"
59 | if export2csv == True:
60 | self.df.to_csv(path, index=True, header=True)
61 | return self.df
62 | def csv2df(self, csvpath, dataname="csv_data", export2csv=False):
63 | """读取csv行情数据,输入到Dataframe中"""
64 | csv_df = pd.read_csv(csvpath)
65 | self.df = csv_df[self.datformat]
66 | self.df["datetime"] = pd.to_datetime(self.df['datetime'])
67 | self.df = self.df.reset_index(drop=True)
68 | path = self.exportpath + dataname + ".csv"
69 | if export2csv == True:
70 | self.df.to_csv(path, index=True, header=True)
71 | return self.df
72 | def df2Barmin(self, inputdf, barmins, crossmin=1, export2csv=False):
73 | """输入分钟k线dataframe数据,合并多多种数据,例如三分钟/5分钟等,如果开始时间是9点1分,crossmin = 0;如果是9点0分,crossmin为1"""
74 | dfbarmin = pd.DataFrame()
75 | highBarMin = 0
76 | lowBarMin = 0
77 | openBarMin = 0
78 | volumeBarmin = 0
79 | datetime = 0
80 | for i in range(0, len(inputdf) - 1):
81 | bar = inputdf.iloc[i, :].to_dict()
82 | if openBarMin == 0:
83 | openBarmin = bar["open"]
84 | if highBarMin == 0:
85 | highBarMin = bar["high"]
86 | else:
87 | highBarMin = max(bar["high"], highBarMin)
88 | if lowBarMin == 0:
89 | lowBarMin = bar["low"]
90 | else:
91 | lowBarMin = min(bar["low"], lowBarMin)
92 | closeBarMin = bar["close"]
93 | datetime = bar["datetime"]
94 | volumeBarmin += int(bar["volume"])
95 | # X分钟已经走完
96 | if not (bar["datetime"].minute + crossmin) % barmins: # 可以用X整除
97 | # 生成上一X分钟K线的时间戳
98 | barMin = {'datetime': datetime, 'high': highBarMin, 'low': lowBarMin, 'open': openBarmin,
99 | 'close': closeBarMin, 'volume' : volumeBarmin}
100 | dfbarmin = dfbarmin.append(barMin, ignore_index=True)
101 | highBarMin = 0
102 | lowBarMin = 0
103 | openBarMin = 0
104 | volumeBarmin = 0
105 | if export2csv == True:
106 | dfbarmin.to_csv(self.exportpath + "bar" + str(barmins)+ str(self.collection) + ".csv", index=True, header=True)
107 | return dfbarmin
108 | #-----------------------------------------开始计算指标-------------------------------------------------
109 | def dfcci(self, inputdf, n, export2csv=True):
110 | """调用talib方法计算CCI指标,写入到df并输出"""
111 | dfcci = inputdf
112 | dfcci["cci"] = None
113 | for i in range(n, len(inputdf)):
114 | df_ne = inputdf.loc[i - n + 1:i, :]
115 | cci = talib.CCI(np.array(df_ne["high"]), np.array(df_ne["low"]), np.array(df_ne["close"]), n)
116 | dfcci.loc[i, "cci"] = cci[-1]
117 | dfcci = dfcci.fillna(0)
118 | dfcci = dfcci.replace(np.inf, 0)
119 | if export2csv == True:
120 | dfcci.to_csv(self.exportpath + "dfcci" + str(self.collection) + ".csv", index=True, header=True)
121 | return dfcci
122 | def dfatr(self, inputdf, n, export2csv=True):
123 | """调用talib方法计算ATR指标,写入到df并输出"""
124 | dfatr = inputdf
125 | for i in range((n+1), len(inputdf)):
126 | df_ne = inputdf.loc[i - n :i, :]
127 | atr = talib.ATR(np.array(df_ne["high"]), np.array(df_ne["low"]), np.array(df_ne["close"]), n)
128 | dfatr.loc[i, "atr"] = atr[-1]
129 | dfatr = dfatr.fillna(0)
130 | dfatr = dfatr.replace(np.inf, 0)
131 | if export2csv == True:
132 | dfatr.to_csv(self.exportpath + "dfatr" + str(self.collection) + ".csv", index=True, header=True)
133 | return dfatr
134 | def dfrsi(self, inputdf, n, export2csv=True):
135 | """调用talib方法计算ATR指标,写入到df并输出"""
136 | dfrsi = inputdf
137 | dfrsi["rsi"] = None
138 | for i in range(n+1, len(inputdf)):
139 | df_ne = inputdf.loc[i - n :i, :]
140 | rsi = talib.RSI(np.array(df_ne["close"]), n)
141 | dfrsi.loc[i, "rsi"] = rsi[-1]
142 | dfrsi = dfrsi.fillna(0)
143 | dfrsi = dfrsi.replace(np.inf, 0)
144 | if export2csv == True:
145 | dfrsi.to_csv(self.exportpath + "dfrsi" + str(self.collection) + ".csv", index=True, header=True)
146 | return dfrsi
147 | def Percentage(self, inputdf, export2csv=True):
148 | """调用talib方法计算CCI指标,写入到df并输出"""
149 | dfPercentage = inputdf
150 | # dfPercentage["Percentage"] = None
151 | for i in range(1, len(inputdf)):
152 | # if dfPercentage.loc[i,"close"]>dfPercentage.loc[i,"open"]:
153 | # percentage = ((dfPercentage.loc[i,"high"] - dfPercentage.loc[i-1,"close"])/ dfPercentage.loc[i-1,"close"])*100
154 | # else:
155 | # percentage = (( dfPercentage.loc[i,"low"] - dfPercentage.loc[i-1,"close"] )/ dfPercentage.loc[i-1,"close"])*100
156 | if dfPercentage.loc[ i - 1, "close"] == 0.0:
157 | percentage = 0
158 | else:
159 | percentage = ((dfPercentage.loc[i, "close"] - dfPercentage.loc[i - 1, "close"]) / dfPercentage.loc[ i - 1, "close"]) * 100.0
160 | dfPercentage.loc[i, "Perentage"] = percentage
161 | dfPercentage = dfPercentage.fillna(0)
162 | dfPercentage = dfPercentage.replace(np.inf, 0)
163 | if export2csv == True:
164 | dfPercentage.to_csv(self.exportpath + "Percentage_" + str(self.collection) + ".csv", index=True, header=True)
165 | return dfPercentage
166 | def dfMACD(self, inputdf, n, export2csv=False):
167 | """调用talib方法计算MACD指标,写入到df并输出"""
168 | dfMACD = inputdf
169 | for i in range(n, len(inputdf)):
170 | df_ne = inputdf.loc[i - n + 1:i, :]
171 | macd,signal,hist = talib.MACD(np.array(df_ne["close"]),12,26,9)
172 | dfMACD.loc[i, "macd"] = macd[-1]
173 | dfMACD.loc[i, "signal"] = signal[-1]
174 | dfMACD.loc[i, "hist"] = hist[-1]
175 | dfMACD = dfMACD.fillna(0)
176 | dfMACD = dfMACD.replace(np.inf, 0)
177 | if export2csv == True:
178 | dfMACD.to_csv(self.exportpath + "macd" + str(self.collection) + ".csv", index=True, header=True)
179 | return dfMACD
180 | def dfSTD(self, inputdf, n, export2csv=False):
181 | """调用talib方法计算MACD指标,写入到df并输出"""
182 | dfSTD = inputdf
183 | for i in range(n, len(inputdf)):
184 | df_ne = inputdf.loc[i - n + 1:i, :]
185 | std = talib.STDDEV(np.array(df_ne["close"]),n)
186 | dfSTD.loc[i, "std"] = std[-1]
187 | dfSTD = dfSTD.fillna(0)
188 | dfSTD = dfSTD.replace(np.inf, 0)
189 | if export2csv == True:
190 | dfSTD.to_csv(self.exportpath + "dfSTD" + str(self.collection) + ".csv", index=True, header=True)
191 | return dfSTD
192 | #-----------------------------------------加入趋势分类-------------------------------------------------
193 | def addTrend(self, inputdf, trendsetp=6, export2csv=False):
194 | """以未来6个bar的斜率线性回归为判断分类是否正确"""
195 | dfTrend = inputdf
196 | for i in range(1, len(dfTrend) - trendsetp-1):
197 | histRe = np.array(dfTrend["close"])[i:i+trendsetp]
198 | xAixs = np.arange(trendsetp) + 1
199 | res = st.linregress(y=histRe, x=xAixs)
200 | if res.pvalue < self.pValue+0.01:
201 | if res.slope > 0.5:
202 | dfTrend.loc[i,"tradeindictor"] = 1
203 | elif res.slope < -0.5:
204 | dfTrend.loc[i, "tradeindictor"] = -1
205 | dfTrend = dfTrend.fillna(0)
206 | dfTrend = dfTrend.replace(np.inf, 0)
207 | if export2csv == True:
208 | dfTrend.to_csv(self.exportpath + "addTrend" + str(self.collection) + ".csv", index=True, header=True)
209 | return dfTrend
210 | def GirdValuate(X_train, y_train):
211 | """1)LogisticRegression
212 | 逻辑回归
213 | 2)DecisionTreeClassifier
214 | 决策树
215 | 3)SVC
216 | 支持向量分类
217 | 4)MLP
218 | 神经网络"""
219 | clf_DT=DecisionTreeClassifier()
220 | param_grid_DT= {'max_depth': [1,2,3,4,5,6]}
221 | clf_Logit=LogisticRegression()
222 | param_grid_logit= {'solver': ['liblinear','lbfgs','newton-cg','sag']}
223 | clf_svc=SVC()
224 | param_grid_svc={'kernel':('linear', 'poly', 'rbf', 'sigmoid'),
225 | 'C':[1, 2, 4],
226 | 'gamma':[0.125, 0.25, 0.5 ,1, 2, 4]}
227 | clf_mlp = MLPClassifier()
228 | param_grid_mlp= {"hidden_layer_sizes": [(100,), (100, 30)],
229 | "solver": ['adam', 'sgd', 'lbfgs'],
230 | "max_iter": [20],
231 | "verbose": [False]
232 | }
233 | #打包参数集合
234 | clf=[clf_DT,clf_Logit,clf_mlp,clf_svc]
235 | param_grid=[param_grid_DT,param_grid_logit,param_grid_mlp,param_grid_svc]
236 | from sklearn.model_selection import StratifiedKFold # 交叉验证
237 | kflod = StratifiedKFold(n_splits=10, shuffle=True, random_state=7) # 将训练/测试数据集划分10个互斥子集,这样方便多进程测试
238 | #网格测试
239 | for i in range(0,4):
240 | grid=GridSearchCV(clf[i], param_grid[i], scoring='accuracy',n_jobs = -1,cv = kflod)
241 | grid.fit(X_train, y_train)
242 | print (grid.best_params_,': ',grid.best_score_)
243 | if __name__ == '__main__':
244 | # 读取数据
245 | # exportpath = "C:\\Users\shui0\OneDrive\Documents\Project\\"
246 | exportpath = "C:\Project\\"
247 | DA = DataAnalyzerforSklearn(exportpath)
248 | #数据库导入
249 | start = datetime.strptime("20180501", '%Y%m%d')
250 | end = datetime.strptime("20190501", '%Y%m%d')
251 | df = DA.db2df(db="VnTrader_1Min_Db", collection="rb8888", start = start, end = end)
252 | df5min = DA.df2Barmin(df, 5)
253 | df5minAdd = DA.addTrend(df5min, export2csv=True)
254 | df5minAdd = DA.dfMACD(df5minAdd, n=34, export2csv=True)
255 | df5minAdd = DA.dfatr(df5minAdd, n=25, export2csv=True)
256 | df5minAdd = DA.dfrsi(df5minAdd, n=35, export2csv=True)
257 | df5minAdd = DA.dfcci(df5minAdd,n = 30,export2csv=True)
258 | df5minAdd = DA.dfSTD(df5minAdd, n=30, export2csv=True)
259 | df5minAdd = DA.Percentage(df5minAdd,export2csv = True)
260 | #划分测试验证。
261 | df_test = df5minAdd.loc[60:,:] #只从第60个开始分析,因为之前很多是空值
262 | y= np.array(df_test["tradeindictor"]) #只保留结果趋势结果,转化为数组
263 | X = df_test.drop(["tradeindictor","close","datetime","high","low","open","volume"],axis = 1).values #不是直接分析HLOC,只保留特征值,转化为数组
264 | X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=0) #三七
265 | print("训练集长度: %s, 测试集长度: %s" %(len(X_train),len(X_test)))
266 | from sklearn.feature_selection import SelectKBest
267 | from sklearn.feature_selection import SelectPercentile
268 | from sklearn.feature_selection import mutual_info_classif
269 | #特征工作,可以按照百分比选出最高分特征类,取最优70%,也可以用SelectKBest,指定要几个特征类。
270 | print(X_train.shape)
271 | selectPer = SelectPercentile(mutual_info_classif, percentile=70)
272 | # selectPer = SelectKBest(mutual_info_classif, k=7)
273 | X_train = selectPer.fit_transform(X_train, y_train)
274 | print(X_train.shape)
275 | X_test = selectPer.transform(X_test)
276 | # 也可以用Fpr选择
277 | # selectFea=SelectFpr(alpha=0.01)
278 | # X_train_new = selectFea.fit_transform(X_train, y_train)
279 | # X_test_new = selectFea.transform(X_test)
280 | # 这里使用下面模式进行分析,然后利用网格调参
281 | GirdValuate(X_train,y_train)
282 | # 使用选取最好的模型,进行测试看看拼接
283 | # • 模型预测:model.predict()
284 | # • Accuracy:metrics.accuracy_score()
285 | # • Presicion:metrics.precision_score()
286 | # • Recall:metrics.recall_score()
287 | from sklearn import metrics
288 | clf_selected=MLPClassifier(hidden_layer_sizes=(100,30), max_iter=20, solver='adam') #此处填入网格回测最优模型和参数,
289 | # {'hidden_layer_sizes': (100, 30), 'max_iter': 20, 'solver': 'adam', 'verbose': False} : 0.9897016507648039
290 | clf_selected.fit(X_train, y_train)
291 | y_pred = clf_selected.predict(X_test)
292 | #accuracy
293 | accuracy=metrics.accuracy_score(y_true=y_test, y_pred=y_pred)
294 | print ('accuracy:',accuracy)
295 | #precision
296 | precision=metrics.precision_score(y_true=y_test, y_pred=y_pred,average="micro")
297 | print ('precision:',precision)
298 | #recall
299 | recall=metrics.recall_score(y_true=y_test, y_pred=y_pred,average="micro")
300 | print ('recall:',recall)
301 | #实际值和预测值
302 | print (y_test)
303 | print (y_pred)
304 | dfresult = pd.DataFrame({'Actual':y_test,'Predict':y_pred})
305 | dfresult.to_csv(exportpath + "result" + ".csv", index=True, header=True)
306 | from sklearn.externals import joblib
307 | #模型保存到本地
308 | joblib.dump(clf_selected,'clf_selected.m')
309 | #模型的恢复
310 | clf_tmp=joblib.load('clf_selected.m')
--------------------------------------------------------------------------------