├── code_monitor
└── memory_monitor.py
├── conjugate_gradient
└── conjugate_gradient.py
├── forecast_auto_adjustment
├── README.md
├── README.pdf
├── data
│ ├── alibaba_stock.csv
│ ├── amazon_stock.csv
│ ├── google_stock.csv
│ └── jd_stock.csv
├── images
│ ├── alibaba_stock_adjust_forecast.png
│ ├── alibaba_stock_adjust_trendy_forecast.png
│ ├── alibaba_stock_normal_forecast.png
│ ├── amazon_stock_adjust_forecast.png
│ ├── amazon_stock_adjust_trendy_forecast.png
│ ├── amazon_stock_normal_forecast.png
│ ├── error_adjust_s1.png
│ ├── error_adjust_s2.png
│ ├── google_stock_adjust_forecast.png
│ ├── google_stock_adjust_trendy_forecast.png
│ ├── google_stock_normal_forecast.png
│ ├── jd_stock_adjust_forecast.png
│ ├── jd_stock_adjust_trendy_forecast.png
│ └── jd_stock_normal_forecast.png
├── ts_features.py
├── util.py
└── validation.py
├── forecast_reconcilation
├── README.md
├── data
│ └── reconcilation_test.csv
├── data_structure.py
└── reconcilation.py
├── forecastability
├── README.md
├── build
│ └── lib
│ │ └── forecastability
│ │ ├── __init__.py
│ │ ├── forecastability.py
│ │ ├── period_detect.py
│ │ └── util.py
├── dist
│ ├── forecastability-0.0.2-py3-none-any.whl
│ ├── forecastability-0.0.2-py3.6.egg
│ └── forecastability-0.0.2.tar.gz
├── forecastability.egg-info
│ ├── PKG-INFO
│ ├── SOURCES.txt
│ ├── dependency_links.txt
│ ├── requires.txt
│ └── top_level.txt
├── forecastability
│ ├── __init__.py
│ ├── forecastability.py
│ ├── period_detect.py
│ └── util.py
├── requirements.txt
└── setup.py
├── km
├── README.md
├── dfs vs bfs.png
└── km.py
├── nmf
├── README.md
└── nmf.py
├── period_detection
├── README.md
├── confidence.gif
├── p1.png
├── p2.png
└── period_detect.py
└── psoco
├── LICENSE
├── README.md
├── build
└── lib
│ └── psoco
│ ├── __init__.py
│ └── psoco.py
├── dist
├── psoco-0.0.0.tar.gz
├── psoco-0.0.7.tar.gz
└── psoco-0.0.8.tar.gz
├── psoco.egg-info
├── PKG-INFO
├── SOURCES.txt
├── dependency_links.txt
└── top_level.txt
├── psoco
├── __init__.py
└── psoco.py
├── setup.py
└── tests
└── tests.py
/code_monitor/memory_monitor.py:
--------------------------------------------------------------------------------
1 | # /usr/bin/python 3.6
2 | # -*-coding:utf-8-*-
3 |
4 | import subprocess
5 | import psutil
6 | import matplotlib.pyplot as plt
7 | import time
8 |
9 | cmd = "python ./slot_allocation/slot_allocation_app.py --params {\"horizon\":30,\"warehouse_id\":\"65c0eb0a5c113609bbba19e5246c5ed2\",\"customer_id\":\"5df83c373bde3c002cc4b4c3\",\"pick_zones\":[\"A\"],\"storage_zones\":[\"A\"],\"end_time\":\"20200518\",\"initialize_dist_matrix\":false,\"input_path\":\"./data/input\",\"output_path\":\"./data/output\",\"strategy_type\":1,\"dist_matrix_path\":\"./data/output\"}"
10 | process = subprocess.Popen(cmd.split(" "))
11 |
12 | pid = process.pid
13 | print("process id: ", pid)
14 |
15 | def get_memory_list():
16 | process = psutil.Process(pid)
17 | memory_list = []
18 | while process_running(process):
19 | try:
20 | memo = process.memory_info().rss / 1024 / 1024 #MB
21 | except:
22 | break
23 | memory_list.append(memo)
24 | time.sleep(2)
25 | return memory_list
26 |
27 | def process_running(process):
28 | try:
29 | memo = process.memory_info().rss / 1024 / 1024
30 | return True
31 | except:
32 | return False
33 |
34 | def plot():
35 | start = time.time()
36 | memory_list = get_memory_list()
37 | end = time.time()
38 | print("Time spent to run {}s".format(round(end-start, 2)))
39 | plt.plot([x for x in range(len(memory_list))], memory_list)
40 | plt.xlabel("record point")
41 | plt.ylabel("memory (MB)")
42 | plt.show()
43 |
44 | if __name__ == "__main__":
45 | plot()
46 |
--------------------------------------------------------------------------------
/conjugate_gradient/conjugate_gradient.py:
--------------------------------------------------------------------------------
1 | # /usr/bin/env python 3.6
2 | # -*-coding:utf-8-*-
3 |
4 | '''
5 | Conjugate Gradient Method
6 |
7 | Reference link:
8 | https://en.wikipedia.org/wiki/Conjugate_gradient_method
9 |
10 | Author: Jing Wang
11 | '''
12 |
13 | import numpy as np
14 | from copy import deepcopy
15 | import random
16 |
17 | random.seed(123)
18 |
19 | def solve(A, b, max_iter):
20 | '''
21 | Args:
22 |
23 | A (array): should be positive definite
24 | b (array):
25 | '''
26 |
27 | if A.shape[0] != b.shape[0]:
28 | raise Exception("Please check the shape of array!")
29 |
30 | threshold = 1e-10
31 | r = deepcopy(b)
32 | p = deepcopy(b)
33 | k = 0
34 | x = np.zeros_like(b)
35 | while k < max_iter:
36 |
37 | rdot = r.T.dot(r)
38 | Ap = A.dot(p)
39 |
40 | alpha = rdot / (p.T.dot(Ap))
41 | x = x + alpha * p
42 | r = r - alpha * Ap
43 |
44 | newrdot = r.T.dot(r)
45 | if np.sqrt(newrdot) < threshold:
46 | break
47 |
48 | beta = newrdot / rdot
49 |
50 | p = r + beta * p
51 |
52 | k += 1
53 | return x
54 |
55 | if __name__ == '__main__':
56 |
57 | A = np.array([[4, 1], [1, 3]])
58 | b = np.array([[1], [2]])
59 |
60 | print("A: ", A)
61 | print("b: ", b)
62 |
63 | x = np.linalg.inv(A).dot(b)
64 | x2 = solve(A, b, 10)
65 |
66 | print("x: ", x)
67 | print("x2: ", x2)
--------------------------------------------------------------------------------
/forecast_auto_adjustment/README.md:
--------------------------------------------------------------------------------
1 | # Forecast Auto-Adjustment
2 |
3 | This is a research and implement of auto-adjustment for demand forecast in rolling predict.
4 |
5 | 在滚动预测中,根据前面滚动的情况或者在调度过程中根据新加入的真实值,去调整我们模型的预测值,以实现更高的精确度。可用在高峰预测等场景。主要是根据最近的误差表现来修正模型的预测值输出,作为新的输出。
6 |
7 | 该方法适用于逐渐上涨的高峰,可能适用于LightGBM之类的树模型,根据类似点采样且不会超过历史最大值。
8 |
9 | ## Error as Feature
10 |
11 | Original Model: $f: \hat{y}_t = f(X_t)$
12 |
13 | New Model: $f: \hat{y}_t = f(X_t, e_{t-h}), e_{t-h} = y_{t-h} - \hat{y}_{t-h}$
14 |
15 | The simplest method is to add error in previous rolling as feature in current rolling. The inital error could be set 1.
16 |
17 | 最直接的方式是把上一轮滚动的预测误差$e_{t-h}$作为下一轮的特征值加入。
18 |
19 | ## Error Postprocess
20 |
21 | Assume the fitted model is going to perform as previous. For example, if the model underestimates in $t-l$ predicting $t$, then it will do the same thing in $t$ predicting $t+l$。
22 |
23 | 误差后处理方式。基于的假设是模型在该轮滚动的表现会延续上一轮的表现。比如上一轮滚动低估,该轮还是会低估。上一轮滚动高估,该轮还是会高估。
24 |
25 | Let $U$ be the event of model underestimation , $O$ be the event of model overestimation.
26 |
27 | 让$U$是模型低估事件,$O$为模型高估事件。
28 |
29 | 该方法假设:$P(U_t | U_{t-l}) = 1$和$P(O_t|O_{t-l}) = 1$
30 |
31 | Proof:
32 |
33 | 我们来证明这一假设是有一定合理性的。假如我们选用线性模型开始$\hat{y} = X\theta$。
34 |
35 |
36 | $$
37 | \hat{y}_{t-l} = X_{t-l} \theta_{t-l} \\
38 | \theta_{t-l} + \Delta \theta = \theta_t \\
39 | \Delta \theta = -\alpha \frac{d loss_{t-l}}{d X_{t-l}} \\
40 | \hat{y}_t = X_t \theta_t = X_t (\theta_{t-l} + \Delta \theta)
41 | $$
42 |
43 | 假设有个完美的模型$y = X\theta_p$,$loss = |y - \hat{y}|$。如果$\hat{y}_{t-l} \leq y_{t-l}$, 那么$\theta_{t-l} \leq \theta_p$ (如果$X_{t-l} > 0$,表示$X_{t-l}$是正定矩阵),前面低估,我们要证明后面也很可能低估。
44 |
45 | $$
46 | \begin{align}
47 | \Delta \theta &= - \alpha \frac{d(y_{t-l} - X_{t-l}\theta_{t-l})}{dX_{t-l}} = \alpha \theta_{t-l} \\
48 | \hat{y}_t &= X_t (1 + \alpha )\theta_{t-l} \leq X_t(1+\alpha) \theta_{p} = (1+\alpha) y_t
49 | \ \ \text{如果$X_t>0$}
50 | \end{align}
51 | $$
52 |
53 |
54 | 如果$\hat{y_{t-l}} > y_{t-l}$,即$\theta_{t-l} > \theta_p$。
55 |
56 |
57 | $$
58 | \begin{align}
59 | \Delta \theta &= - \alpha \theta_{t-l} \\
60 | \hat{y}_t &= X_t(1 - \alpha)\theta_{t-l} > X_t(1 - \alpha)\theta_p = (1 - \alpha)y_t
61 | \end{align}
62 | $$
63 |
64 | 由于$\alpha$比较小,我们可以近似不等式成立。$P(\hat{y_t} \leq y_t | \hat{y}_{t-l} \leq y_{t-l}) \approx 1$ , $P(\hat{y_t} > y_t | \hat{y}_{t-l} > y_{t-l}) \approx 1$。$\alpha$越小,也就是梯度更新越慢,假设越有可能成立。上述基于线性模型的情况下成立,或者当$l$相对较小的时候,我们可以认为$y_{t-l}$和$y_t$之间的接近线性的。但该假设还不能推广到更通用的情况。
65 |
66 | **当特征矩阵$X_t$是正定矩阵,且$l$较小的时候,假设大概率成立。**
67 |
68 | 该方法应用:$e_{t-l} = y_{t-l} - \hat{y}_{t-l}$, $\tilde{y}_t = \hat{y}_t + e_{t-l} $,$\tilde{y}_t$为修正后的预测结果。在实际预测中会出现两种情况,造成看起来预测偏移延迟的情况。
69 |
70 |
71 |
72 |
73 |
74 |
75 | 总体准确率会比后面不低估也不高估更高,因为出现误差抵消。
76 |
77 | 为了可能减少这种情况,但不一定能够提升预测指标。我们引入趋势项进行规则调整。
78 |
79 | * 如果训练集末尾趋势是增长的
80 | * 前面高估的部分沿用模型输出的原始结果,低估的部分进行误差修正
81 | * 如果训练集末尾趋势是降低的
82 | * 前面低估的部分沿用模型输出的原始结果,高估的部分进行误差修正
83 |
84 | ## Practice
85 |
86 | 我们选用Amazon,Google,Alibaba和JD四家公司两年的股票收盘价作为测试数据验证,原始预测,误差修正和误差趋势修正的对比结果。对比指标使用$MAPE$和$CV$。模型采用线性模型,特征采用一系列事件序列特征。结果如下图
87 |
88 | | 公司 | 原始预测平均MAPE | 原始预测CV | 误差修正平均MAPE | 误差修正CV | 误差趋势平均MAPE | 误差趋势CV |
89 | | ------- | ---------------- | ---------- | ---------------- | ---------- | ---------------- | ---------- |
90 | | Amazon | 0.103 | **0.608** | **0.065** | 0.638 | 0.075 | 0.776 |
91 | | Google | 0.076 | **0.650** | **0.057** | 0.766 | 0.066 | 0.768 |
92 | | Alibaba | 0.082 | 0.450 | **0.048** | **0.407** | 0.069 | 0.436 |
93 | | JD | 0.062 | 0.597 | **0.045** | **0.437** | 0.053 | 0.474 |
94 |
95 | 误差趋势的方式从效果表现来看没有误差修正好,但比原始预测要好一些。主要原因是如前所述,在计算平均MAPE和CV的时候,误差修正产生了更多的误差抵消。
96 |
97 | 示意图:
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
--------------------------------------------------------------------------------
/forecast_auto_adjustment/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecast_auto_adjustment/README.pdf
--------------------------------------------------------------------------------
/forecast_auto_adjustment/data/amazon_stock.csv:
--------------------------------------------------------------------------------
1 | Date,Open,High,Low,Close,Adj Close,Volume
2 | 2018-11-02,1678.589966,1697.439941,1651.829956,1665.530029,1665.530029,6955500
3 | 2018-11-05,1657.569946,1658.089966,1596.359985,1627.800049,1627.800049,5624700
4 | 2018-11-06,1618.349976,1665.000000,1614.550049,1642.810059,1642.810059,4257400
5 | 2018-11-07,1673.000000,1759.229980,1664.079956,1755.489990,1755.489990,8192200
6 | 2018-11-08,1755.000000,1784.000000,1725.109985,1754.910034,1754.910034,6534900
7 | 2018-11-09,1732.500000,1743.920044,1701.869995,1712.430054,1712.430054,5902200
8 | 2018-11-12,1698.239990,1708.550049,1630.010010,1636.849976,1636.849976,6806200
9 | 2018-11-13,1649.290039,1677.060059,1613.750000,1631.170044,1631.170044,5933300
10 | 2018-11-14,1656.319946,1673.000000,1597.069946,1599.010010,1599.010010,6486900
11 | 2018-11-15,1581.010010,1624.819946,1546.510010,1619.439941,1619.439941,8427300
12 | 2018-11-16,1587.500000,1614.479980,1573.119995,1593.410034,1593.410034,6066100
13 | 2018-11-19,1577.010010,1581.189941,1503.359985,1512.290039,1512.290039,7790000
14 | 2018-11-20,1437.500000,1534.750000,1420.000000,1495.459961,1495.459961,10878800
15 | 2018-11-21,1542.989990,1550.000000,1515.000000,1516.729980,1516.729980,5716800
16 | 2018-11-23,1517.000000,1536.199951,1501.810059,1502.060059,1502.060059,2707600
17 | 2018-11-26,1539.000000,1584.810059,1524.219971,1581.329956,1581.329956,6257700
18 | 2018-11-27,1575.989990,1597.650024,1558.010010,1581.420044,1581.420044,5783200
19 | 2018-11-28,1613.920044,1681.449951,1601.219971,1677.750000,1677.750000,8458700
20 | 2018-11-29,1674.989990,1689.989990,1652.329956,1673.569946,1673.569946,6613200
21 | 2018-11-30,1679.500000,1696.000000,1666.500000,1690.170044,1690.170044,5761800
22 | 2018-12-03,1769.459961,1778.339966,1730.000000,1772.359985,1772.359985,6862300
23 | 2018-12-04,1756.000000,1770.339966,1665.000000,1668.400024,1668.400024,8694500
24 | 2018-12-06,1614.869995,1701.050049,1609.849976,1699.189941,1699.189941,8789400
25 | 2018-12-07,1705.069946,1718.930054,1625.459961,1629.130005,1629.130005,7576100
26 | 2018-12-10,1623.839966,1657.989990,1590.869995,1641.030029,1641.030029,7494800
27 | 2018-12-11,1678.000000,1679.469971,1619.599976,1643.239990,1643.239990,6244700
28 | 2018-12-12,1669.000000,1704.989990,1660.270020,1663.540039,1663.540039,6598000
29 | 2018-12-13,1680.000000,1692.119995,1641.500000,1658.380005,1658.380005,5271300
30 | 2018-12-14,1638.000000,1642.569946,1585.000000,1591.910034,1591.910034,6367200
31 | 2018-12-17,1566.000000,1576.130005,1505.010010,1520.910034,1520.910034,8829800
32 | 2018-12-18,1540.000000,1567.550049,1523.010010,1551.479980,1551.479980,6523000
33 | 2018-12-19,1543.050049,1584.530029,1483.180054,1495.079956,1495.079956,8792200
34 | 2018-12-20,1484.000000,1509.500000,1432.689941,1460.829956,1460.829956,9991800
35 | 2018-12-21,1464.989990,1480.000000,1363.959961,1377.449951,1377.449951,13640300
36 | 2018-12-24,1346.000000,1396.030029,1307.000000,1343.959961,1343.959961,7220000
37 | 2018-12-26,1368.890015,1473.160034,1363.010010,1470.900024,1470.900024,10411800
38 | 2018-12-27,1454.199951,1469.000000,1390.310059,1461.640015,1461.640015,9722000
39 | 2018-12-28,1473.349976,1513.469971,1449.000000,1478.020020,1478.020020,8829000
40 | 2018-12-31,1510.800049,1520.760010,1487.000000,1501.969971,1501.969971,6954500
41 | 2019-01-02,1465.199951,1553.359985,1460.930054,1539.130005,1539.130005,7983100
42 | 2019-01-03,1520.010010,1538.000000,1497.109985,1500.280029,1500.280029,6975600
43 | 2019-01-04,1530.000000,1594.000000,1518.310059,1575.390015,1575.390015,9182600
44 | 2019-01-07,1602.310059,1634.560059,1589.189941,1629.510010,1629.510010,7993200
45 | 2019-01-08,1664.689941,1676.609985,1616.609985,1656.579956,1656.579956,8881400
46 | 2019-01-09,1652.979980,1667.800049,1641.400024,1659.420044,1659.420044,6348800
47 | 2019-01-10,1641.010010,1663.250000,1621.619995,1656.219971,1656.219971,6507700
48 | 2019-01-11,1640.550049,1660.290039,1636.219971,1640.560059,1640.560059,4686200
49 | 2019-01-14,1615.000000,1648.199951,1595.150024,1617.209961,1617.209961,6005900
50 | 2019-01-15,1632.000000,1675.160034,1626.010010,1674.560059,1674.560059,5998500
51 | 2019-01-16,1684.219971,1705.000000,1675.880005,1683.780029,1683.780029,6366900
52 | 2019-01-17,1680.000000,1700.170044,1677.500000,1693.219971,1693.219971,4208900
53 | 2019-01-18,1712.000000,1716.199951,1691.540039,1696.199951,1696.199951,6020500
54 | 2019-01-22,1681.000000,1681.869995,1610.199951,1632.170044,1632.170044,6416800
55 | 2019-01-23,1656.000000,1657.430054,1612.000000,1640.020020,1640.020020,5225200
56 | 2019-01-24,1641.069946,1657.260010,1631.780029,1654.930054,1654.930054,4089900
57 | 2019-01-25,1670.500000,1683.479980,1661.609985,1670.569946,1670.569946,4945900
58 | 2019-01-28,1643.589966,1645.000000,1614.089966,1637.890015,1637.890015,4837700
59 | 2019-01-29,1631.270020,1632.380005,1590.719971,1593.880005,1593.880005,4632800
60 | 2019-01-30,1623.000000,1676.949951,1619.680054,1670.430054,1670.430054,5783800
61 | 2019-01-31,1692.849976,1736.410034,1679.079956,1718.729980,1718.729980,10910300
62 | 2019-02-01,1638.880005,1673.060059,1622.010010,1626.229980,1626.229980,11506200
63 | 2019-02-04,1623.000000,1649.630005,1613.500000,1633.310059,1633.310059,4929100
64 | 2019-02-05,1643.339966,1665.260010,1642.500000,1658.810059,1658.810059,4453100
65 | 2019-02-06,1670.750000,1672.260010,1633.339966,1640.260010,1640.260010,3939900
66 | 2019-02-07,1625.000000,1625.540039,1592.910034,1614.369995,1614.369995,4626600
67 | 2019-02-08,1586.000000,1588.589966,1566.760010,1588.219971,1588.219971,5657500
68 | 2019-02-11,1600.979980,1609.290039,1586.000000,1591.000000,1591.000000,3317300
69 | 2019-02-12,1604.000000,1639.400024,1598.880005,1638.010010,1638.010010,4858600
70 | 2019-02-13,1647.000000,1656.380005,1637.109985,1640.000000,1640.000000,3560300
71 | 2019-02-14,1624.500000,1637.900024,1606.060059,1622.650024,1622.650024,4120500
72 | 2019-02-15,1627.859985,1628.910034,1604.500000,1607.949951,1607.949951,4343900
73 | 2019-02-19,1601.000000,1634.000000,1600.560059,1627.579956,1627.579956,3681700
74 | 2019-02-20,1630.000000,1634.930054,1610.119995,1622.099976,1622.099976,3337600
75 | 2019-02-21,1619.849976,1623.560059,1600.910034,1619.439941,1619.439941,3483400
76 | 2019-02-22,1623.500000,1634.939941,1621.170044,1631.560059,1631.560059,3096200
77 | 2019-02-25,1641.449951,1654.599976,1630.390015,1633.000000,1633.000000,3184500
78 | 2019-02-26,1625.979980,1639.989990,1616.130005,1636.400024,1636.400024,2665800
79 | 2019-02-27,1628.180054,1641.810059,1615.099976,1641.089966,1641.089966,3148800
80 | 2019-02-28,1635.250000,1651.770020,1633.829956,1639.829956,1639.829956,3025900
81 | 2019-03-01,1655.130005,1674.260010,1651.000000,1671.729980,1671.729980,4974900
82 | 2019-03-04,1685.000000,1709.430054,1674.359985,1696.170044,1696.170044,6167400
83 | 2019-03-05,1702.949951,1707.800049,1689.010010,1692.430054,1692.430054,3681500
84 | 2019-03-06,1695.969971,1697.750000,1668.280029,1668.949951,1668.949951,3996000
85 | 2019-03-07,1667.369995,1669.750000,1620.510010,1625.949951,1625.949951,4957000
86 | 2019-03-08,1604.010010,1622.719971,1586.569946,1620.800049,1620.800049,4667000
87 | 2019-03-11,1626.119995,1672.290039,1626.010010,1670.619995,1670.619995,3876400
88 | 2019-03-12,1669.000000,1684.270020,1660.979980,1673.099976,1673.099976,3614500
89 | 2019-03-13,1683.000000,1700.000000,1679.349976,1690.810059,1690.810059,3552000
90 | 2019-03-14,1691.199951,1702.000000,1684.339966,1686.219971,1686.219971,2946600
91 | 2019-03-15,1703.000000,1718.800049,1693.130005,1712.359985,1712.359985,7550900
92 | 2019-03-18,1712.699951,1750.000000,1712.630005,1742.150024,1742.150024,5429100
93 | 2019-03-19,1753.510010,1784.160034,1753.510010,1761.849976,1761.849976,6364200
94 | 2019-03-20,1769.939941,1799.500000,1767.030029,1797.270020,1797.270020,6265600
95 | 2019-03-21,1796.260010,1823.750000,1787.280029,1819.260010,1819.260010,5767800
96 | 2019-03-22,1810.170044,1818.979980,1763.109985,1764.770020,1764.770020,6363000
97 | 2019-03-25,1757.790039,1782.680054,1747.500000,1774.260010,1774.260010,5103800
98 | 2019-03-26,1793.000000,1805.770020,1773.359985,1783.760010,1783.760010,4865900
99 | 2019-03-27,1784.130005,1787.500000,1745.680054,1765.699951,1765.699951,4324800
100 | 2019-03-28,1770.000000,1777.930054,1753.469971,1773.420044,1773.420044,3043000
101 | 2019-03-29,1786.579956,1792.859985,1776.630005,1780.750000,1780.750000,3320800
102 | 2019-04-01,1800.109985,1815.670044,1798.729980,1814.189941,1814.189941,4238800
103 | 2019-04-02,1811.020020,1820.000000,1805.119995,1813.979980,1813.979980,3448100
104 | 2019-04-03,1826.719971,1830.000000,1809.619995,1820.699951,1820.699951,3980600
105 | 2019-04-04,1820.650024,1828.750000,1804.199951,1818.859985,1818.859985,3623900
106 | 2019-04-05,1829.000000,1838.579956,1825.189941,1837.280029,1837.280029,3640500
107 | 2019-04-08,1833.229980,1850.199951,1825.109985,1849.859985,1849.859985,3752800
108 | 2019-04-09,1845.489990,1853.089966,1831.780029,1835.839966,1835.839966,3714400
109 | 2019-04-10,1841.000000,1848.000000,1828.810059,1847.329956,1847.329956,2964000
110 | 2019-04-11,1848.699951,1849.949951,1840.310059,1844.069946,1844.069946,2654800
111 | 2019-04-12,1848.400024,1851.500000,1841.300049,1843.060059,1843.060059,3114400
112 | 2019-04-15,1842.000000,1846.849976,1818.900024,1844.869995,1844.869995,3724400
113 | 2019-04-16,1851.349976,1869.770020,1848.000000,1863.040039,1863.040039,3044600
114 | 2019-04-17,1872.989990,1876.469971,1860.439941,1864.819946,1864.819946,2893500
115 | 2019-04-18,1868.790039,1870.819946,1859.479980,1861.689941,1861.689941,2749900
116 | 2019-04-22,1855.400024,1888.420044,1845.640015,1887.310059,1887.310059,3373800
117 | 2019-04-23,1891.199951,1929.260010,1889.579956,1923.770020,1923.770020,4640400
118 | 2019-04-24,1925.000000,1929.689941,1898.160034,1901.750000,1901.750000,3675800
119 | 2019-04-25,1917.000000,1922.449951,1900.310059,1902.250000,1902.250000,6099100
120 | 2019-04-26,1929.000000,1951.000000,1898.000000,1950.630005,1950.630005,8432600
121 | 2019-04-29,1949.000000,1956.339966,1934.089966,1938.430054,1938.430054,4021300
122 | 2019-04-30,1930.099976,1935.709961,1906.949951,1926.520020,1926.520020,3506000
123 | 2019-05-01,1933.089966,1943.640015,1910.550049,1911.520020,1911.520020,3117000
124 | 2019-05-02,1913.329956,1921.550049,1881.869995,1900.819946,1900.819946,3962900
125 | 2019-05-03,1949.000000,1964.400024,1936.000000,1962.459961,1962.459961,6381600
126 | 2019-05-06,1917.979980,1959.000000,1910.500000,1950.550049,1950.550049,5417800
127 | 2019-05-07,1939.989990,1949.099976,1903.380005,1921.000000,1921.000000,5902100
128 | 2019-05-08,1918.869995,1935.369995,1910.000000,1917.770020,1917.770020,4078600
129 | 2019-05-09,1900.000000,1909.400024,1876.000000,1899.869995,1899.869995,5308300
130 | 2019-05-10,1898.000000,1903.790039,1856.000000,1889.979980,1889.979980,5718000
131 | 2019-05-13,1836.560059,1846.540039,1818.000000,1822.680054,1822.680054,5783400
132 | 2019-05-14,1839.500000,1852.439941,1815.750000,1840.119995,1840.119995,4629100
133 | 2019-05-15,1827.949951,1874.430054,1823.000000,1871.150024,1871.150024,4692600
134 | 2019-05-16,1885.939941,1917.510010,1882.290039,1907.569946,1907.569946,4707800
135 | 2019-05-17,1893.050049,1910.530029,1867.329956,1869.000000,1869.000000,4736600
136 | 2019-05-20,1852.689941,1867.780029,1835.540039,1858.969971,1858.969971,3798200
137 | 2019-05-21,1874.790039,1879.000000,1846.000000,1857.520020,1857.520020,4005100
138 | 2019-05-22,1851.780029,1871.489990,1851.000000,1859.680054,1859.680054,2936600
139 | 2019-05-23,1836.589966,1844.000000,1804.199951,1815.479980,1815.479980,4424300
140 | 2019-05-24,1835.890015,1841.760010,1817.849976,1823.280029,1823.280029,3369700
141 | 2019-05-28,1832.750000,1849.270020,1827.349976,1836.430054,1836.430054,3200000
142 | 2019-05-29,1823.119995,1830.000000,1807.530029,1819.189941,1819.189941,4279000
143 | 2019-05-30,1825.489990,1829.469971,1807.829956,1816.319946,1816.319946,3146900
144 | 2019-05-31,1790.010010,1795.589966,1772.699951,1775.069946,1775.069946,4618800
145 | 2019-06-03,1760.010010,1766.290039,1672.000000,1692.689941,1692.689941,9098700
146 | 2019-06-04,1699.239990,1730.819946,1680.890015,1729.560059,1729.560059,5679100
147 | 2019-06-05,1749.599976,1752.000000,1715.250000,1738.500000,1738.500000,4239800
148 | 2019-06-06,1737.709961,1760.000000,1726.130005,1754.359985,1754.359985,3689300
149 | 2019-06-07,1763.699951,1806.250000,1759.489990,1804.030029,1804.030029,4808200
150 | 2019-06-10,1822.000000,1884.869995,1818.000000,1860.630005,1860.630005,5371000
151 | 2019-06-11,1883.250000,1893.699951,1858.000000,1863.699951,1863.699951,4042700
152 | 2019-06-12,1853.979980,1865.000000,1844.380005,1855.319946,1855.319946,2674500
153 | 2019-06-13,1866.719971,1883.089966,1862.219971,1870.300049,1870.300049,2795800
154 | 2019-06-14,1864.000000,1876.000000,1859.000000,1869.670044,1869.670044,2851200
155 | 2019-06-17,1876.500000,1895.689941,1875.449951,1886.030029,1886.030029,2634300
156 | 2019-06-18,1901.349976,1921.670044,1899.790039,1901.369995,1901.369995,3895700
157 | 2019-06-19,1907.839966,1919.579956,1892.469971,1908.790039,1908.790039,2895300
158 | 2019-06-20,1933.329956,1935.199951,1905.800049,1918.189941,1918.189941,3217200
159 | 2019-06-21,1916.099976,1925.949951,1907.579956,1911.300049,1911.300049,3933600
160 | 2019-06-24,1912.660034,1916.859985,1901.300049,1913.900024,1913.900024,2283000
161 | 2019-06-25,1911.839966,1916.390015,1872.420044,1878.270020,1878.270020,3012300
162 | 2019-06-26,1892.479980,1903.800049,1887.319946,1897.829956,1897.829956,2441900
163 | 2019-06-27,1902.000000,1911.239990,1898.040039,1904.280029,1904.280029,2141700
164 | 2019-06-28,1909.099976,1912.939941,1884.000000,1893.630005,1893.630005,3037400
165 | 2019-07-01,1922.979980,1929.819946,1914.660034,1922.189941,1922.189941,3203300
166 | 2019-07-02,1919.380005,1934.790039,1906.630005,1934.310059,1934.310059,2645900
167 | 2019-07-03,1935.890015,1941.589966,1930.500000,1939.000000,1939.000000,1690300
168 | 2019-07-05,1928.599976,1945.900024,1925.300049,1942.910034,1942.910034,2628400
169 | 2019-07-08,1934.119995,1956.000000,1928.250000,1952.319946,1952.319946,2883400
170 | 2019-07-09,1947.800049,1990.010010,1943.479980,1988.300049,1988.300049,4345700
171 | 2019-07-10,1996.510010,2024.939941,1995.400024,2017.410034,2017.410034,4931900
172 | 2019-07-11,2025.619995,2035.800049,1995.300049,2001.069946,2001.069946,4317800
173 | 2019-07-12,2008.270020,2017.000000,2003.869995,2011.000000,2011.000000,2509300
174 | 2019-07-15,2021.400024,2022.900024,2001.550049,2020.989990,2020.989990,2981300
175 | 2019-07-16,2010.579956,2026.319946,2001.219971,2009.900024,2009.900024,2618200
176 | 2019-07-17,2007.050049,2012.000000,1992.030029,1992.030029,1992.030029,2558800
177 | 2019-07-18,1980.010010,1987.500000,1951.550049,1977.900024,1977.900024,3504300
178 | 2019-07-19,1991.209961,1996.000000,1962.229980,1964.520020,1964.520020,3185600
179 | 2019-07-22,1971.140015,1989.000000,1958.260010,1985.630005,1985.630005,2900000
180 | 2019-07-23,1995.989990,1997.790039,1973.130005,1994.489990,1994.489990,2703500
181 | 2019-07-24,1969.300049,2001.300049,1965.869995,2000.810059,2000.810059,2631300
182 | 2019-07-25,2001.000000,2001.199951,1972.719971,1973.819946,1973.819946,4136500
183 | 2019-07-26,1942.000000,1950.900024,1924.510010,1943.050049,1943.050049,4927100
184 | 2019-07-29,1930.000000,1932.229980,1890.540039,1912.449951,1912.449951,4493200
185 | 2019-07-30,1891.119995,1909.890015,1883.479980,1898.530029,1898.530029,2910900
186 | 2019-07-31,1898.109985,1899.550049,1849.439941,1866.780029,1866.780029,4470700
187 | 2019-08-01,1871.719971,1897.920044,1844.010010,1855.319946,1855.319946,4713300
188 | 2019-08-02,1845.069946,1846.359985,1808.020020,1823.239990,1823.239990,4956200
189 | 2019-08-05,1770.219971,1788.670044,1748.780029,1765.130005,1765.130005,6058200
190 | 2019-08-06,1792.229980,1793.770020,1753.400024,1787.829956,1787.829956,5070300
191 | 2019-08-07,1773.989990,1798.930054,1757.000000,1793.400024,1793.400024,4526900
192 | 2019-08-08,1806.000000,1834.260010,1798.109985,1832.890015,1832.890015,3701200
193 | 2019-08-09,1828.949951,1831.089966,1802.219971,1807.579956,1807.579956,2879800
194 | 2019-08-12,1795.989990,1800.979980,1777.000000,1784.920044,1784.920044,2905500
195 | 2019-08-13,1783.000000,1831.739990,1780.000000,1824.339966,1824.339966,3994000
196 | 2019-08-14,1793.010010,1795.650024,1757.219971,1762.959961,1762.959961,4893600
197 | 2019-08-15,1781.989990,1788.000000,1761.959961,1776.119995,1776.119995,3759100
198 | 2019-08-16,1792.890015,1802.910034,1784.550049,1792.569946,1792.569946,3018000
199 | 2019-08-19,1818.079956,1826.000000,1812.609985,1816.119995,1816.119995,2816300
200 | 2019-08-20,1814.500000,1816.819946,1799.880005,1801.380005,1801.380005,1929500
201 | 2019-08-21,1819.390015,1829.579956,1815.000000,1823.540039,1823.540039,2031800
202 | 2019-08-22,1828.000000,1829.410034,1800.099976,1804.660034,1804.660034,2653500
203 | 2019-08-23,1793.030029,1804.900024,1745.229980,1749.619995,1749.619995,5270800
204 | 2019-08-26,1766.910034,1770.000000,1743.510010,1768.869995,1768.869995,3080000
205 | 2019-08-27,1775.729980,1779.400024,1746.680054,1761.829956,1761.829956,3019700
206 | 2019-08-28,1755.000000,1767.859985,1744.050049,1764.250000,1764.250000,2419700
207 | 2019-08-29,1783.000000,1798.550049,1777.250000,1786.400024,1786.400024,3015100
208 | 2019-08-30,1797.489990,1799.739990,1764.569946,1776.290039,1776.290039,3058700
209 | 2019-09-03,1770.000000,1800.800049,1768.000000,1789.839966,1789.839966,3543000
210 | 2019-09-04,1805.000000,1807.630005,1796.229980,1800.619995,1800.619995,2324100
211 | 2019-09-05,1821.949951,1842.000000,1815.579956,1840.719971,1840.719971,3310800
212 | 2019-09-06,1838.219971,1840.650024,1826.400024,1833.510010,1833.510010,2496900
213 | 2019-09-09,1841.000000,1850.000000,1824.609985,1831.349976,1831.349976,2999500
214 | 2019-09-10,1822.750000,1825.810059,1805.339966,1820.550049,1820.550049,2613900
215 | 2019-09-11,1812.140015,1833.420044,1809.079956,1822.989990,1822.989990,2432800
216 | 2019-09-12,1837.630005,1853.660034,1834.280029,1843.550049,1843.550049,2823500
217 | 2019-09-13,1842.010010,1846.119995,1835.170044,1839.339966,1839.339966,1971300
218 | 2019-09-16,1824.020020,1825.689941,1800.199951,1807.839966,1807.839966,3675500
219 | 2019-09-17,1807.079956,1823.989990,1804.099976,1822.550049,1822.550049,1982400
220 | 2019-09-18,1817.040039,1822.060059,1795.500000,1817.459961,1817.459961,2505600
221 | 2019-09-19,1821.020020,1832.569946,1817.900024,1821.500000,1821.500000,2031500
222 | 2019-09-20,1821.709961,1830.630005,1780.920044,1794.160034,1794.160034,5341900
223 | 2019-09-23,1777.000000,1792.699951,1767.319946,1785.300049,1785.300049,2922300
224 | 2019-09-24,1790.609985,1795.709961,1735.550049,1741.609985,1741.609985,4616000
225 | 2019-09-25,1747.359985,1773.000000,1723.000000,1768.329956,1768.329956,3493200
226 | 2019-09-26,1762.790039,1763.369995,1731.500000,1739.839966,1739.839966,3536800
227 | 2019-09-27,1748.000000,1749.119995,1713.819946,1725.449951,1725.449951,3907200
228 | 2019-09-30,1726.989990,1737.459961,1709.219971,1735.910034,1735.910034,2644700
229 | 2019-10-01,1746.000000,1755.599976,1728.410034,1735.650024,1735.650024,3084500
230 | 2019-10-02,1727.739990,1728.890015,1705.000000,1713.229980,1713.229980,3301100
231 | 2019-10-03,1713.000000,1725.000000,1685.060059,1724.420044,1724.420044,3468200
232 | 2019-10-04,1726.020020,1740.579956,1719.229980,1739.650024,1739.650024,2471900
233 | 2019-10-07,1731.630005,1747.829956,1723.699951,1732.660034,1732.660034,2154700
234 | 2019-10-08,1722.489990,1727.000000,1705.000000,1705.510010,1705.510010,2542000
235 | 2019-10-09,1719.609985,1729.949951,1714.359985,1721.989990,1721.989990,2043500
236 | 2019-10-10,1725.239990,1738.290039,1713.750000,1720.260010,1720.260010,2575200
237 | 2019-10-11,1742.920044,1745.449951,1729.859985,1731.920044,1731.920044,3255000
238 | 2019-10-14,1728.910034,1741.890015,1722.000000,1736.430054,1736.430054,1910200
239 | 2019-10-15,1742.140015,1776.449951,1740.619995,1767.380005,1767.380005,3111700
240 | 2019-10-16,1773.329956,1786.239990,1770.520020,1777.430054,1777.430054,2763400
241 | 2019-10-17,1796.489990,1798.849976,1782.020020,1787.479980,1787.479980,2647400
242 | 2019-10-18,1787.800049,1793.979980,1749.199951,1757.510010,1757.510010,3362500
243 | 2019-10-21,1769.660034,1785.880005,1765.000000,1785.660034,1785.660034,2130400
244 | 2019-10-22,1788.150024,1789.780029,1762.000000,1765.729980,1765.729980,2111700
245 | 2019-10-23,1761.300049,1770.050049,1742.000000,1762.170044,1762.170044,2138200
246 | 2019-10-24,1771.089966,1788.339966,1760.270020,1780.780029,1780.780029,4446100
247 | 2019-10-25,1697.550049,1764.209961,1695.000000,1761.329956,1761.329956,9626400
248 | 2019-10-28,1748.060059,1778.699951,1742.500000,1777.079956,1777.079956,3708900
249 | 2019-10-29,1774.810059,1777.000000,1755.810059,1762.709961,1762.709961,2276900
250 | 2019-10-30,1760.239990,1782.380005,1759.119995,1779.989990,1779.989990,2449400
251 | 2019-10-31,1775.989990,1792.000000,1771.479980,1776.660034,1776.660034,2781200
252 | 2019-11-01,1788.010010,1797.449951,1785.209961,1791.439941,1791.439941,2790400
253 | 2019-11-04,1801.010010,1815.060059,1801.010010,1804.660034,1804.660034,2771900
254 | 2019-11-05,1809.160034,1810.250000,1794.000000,1801.709961,1801.709961,1885500
255 | 2019-11-06,1801.000000,1802.500000,1788.579956,1795.770020,1795.770020,2029800
256 | 2019-11-07,1803.760010,1805.900024,1783.479980,1788.199951,1788.199951,2651100
257 | 2019-11-08,1787.890015,1789.880005,1774.040039,1785.880005,1785.880005,2123300
258 | 2019-11-11,1778.000000,1780.000000,1767.130005,1771.650024,1771.650024,1946000
259 | 2019-11-12,1774.660034,1786.219971,1771.910034,1778.000000,1778.000000,2037600
260 | 2019-11-13,1773.390015,1775.000000,1747.319946,1753.109985,1753.109985,2989500
261 | 2019-11-14,1751.430054,1766.589966,1749.560059,1754.599976,1754.599976,2264800
262 | 2019-11-15,1760.050049,1761.680054,1732.859985,1739.489990,1739.489990,3927600
263 | 2019-11-18,1738.300049,1753.699951,1722.709961,1752.530029,1752.530029,2839500
264 | 2019-11-19,1756.989990,1760.680054,1743.030029,1752.790039,1752.790039,2270800
265 | 2019-11-20,1749.140015,1762.520020,1734.119995,1745.530029,1745.530029,2790000
266 | 2019-11-21,1743.000000,1746.869995,1730.359985,1734.709961,1734.709961,2662900
267 | 2019-11-22,1739.020020,1746.430054,1731.000000,1745.719971,1745.719971,2479100
268 | 2019-11-25,1753.250000,1777.420044,1753.239990,1773.839966,1773.839966,3486200
269 | 2019-11-26,1779.920044,1797.030029,1778.349976,1796.939941,1796.939941,3181200
270 | 2019-11-27,1801.000000,1824.500000,1797.310059,1818.510010,1818.510010,3025600
271 | 2019-11-29,1817.780029,1824.689941,1800.790039,1800.800049,1800.800049,1923400
272 | 2019-12-02,1804.400024,1805.550049,1762.680054,1781.599976,1781.599976,3925600
273 | 2019-12-03,1760.000000,1772.869995,1747.229980,1769.959961,1769.959961,3380900
274 | 2019-12-04,1774.010010,1789.089966,1760.219971,1760.689941,1760.689941,2670100
275 | 2019-12-05,1763.500000,1763.500000,1740.000000,1740.479980,1740.479980,2823800
276 | 2019-12-06,1751.199951,1754.400024,1740.130005,1751.599976,1751.599976,3117400
277 | 2019-12-09,1750.660034,1766.890015,1745.609985,1749.510010,1749.510010,2442800
278 | 2019-12-10,1747.400024,1750.670044,1735.000000,1739.209961,1739.209961,2514300
279 | 2019-12-11,1741.670044,1750.000000,1735.709961,1748.719971,1748.719971,2097600
280 | 2019-12-12,1750.000000,1764.000000,1745.439941,1760.329956,1760.329956,3095900
281 | 2019-12-13,1765.000000,1768.989990,1755.000000,1760.939941,1760.939941,2745700
282 | 2019-12-16,1767.000000,1769.500000,1757.050049,1769.209961,1769.209961,3145200
283 | 2019-12-17,1778.010010,1792.000000,1777.390015,1790.660034,1790.660034,3644400
284 | 2019-12-18,1795.020020,1798.199951,1782.359985,1784.030029,1784.030029,3351400
285 | 2019-12-19,1780.500000,1792.989990,1774.060059,1792.280029,1792.280029,2652800
286 | 2019-12-20,1799.619995,1802.969971,1782.449951,1786.500000,1786.500000,5150800
287 | 2019-12-23,1788.260010,1793.000000,1784.510010,1793.000000,1793.000000,2136400
288 | 2019-12-24,1793.810059,1795.569946,1787.579956,1789.209961,1789.209961,881300
289 | 2019-12-26,1801.010010,1870.459961,1799.500000,1868.770020,1868.770020,6005400
290 | 2019-12-27,1882.920044,1901.400024,1866.010010,1869.800049,1869.800049,6186600
291 | 2019-12-30,1874.000000,1884.000000,1840.619995,1846.890015,1846.890015,3674700
292 | 2019-12-31,1842.000000,1853.260010,1832.229980,1847.839966,1847.839966,2506500
293 | 2020-01-02,1875.000000,1898.010010,1864.150024,1898.010010,1898.010010,4029000
294 | 2020-01-03,1864.500000,1886.199951,1864.500000,1874.969971,1874.969971,3764400
295 | 2020-01-06,1860.000000,1903.689941,1860.000000,1902.880005,1902.880005,4061800
296 | 2020-01-07,1904.500000,1913.890015,1892.040039,1906.859985,1906.859985,4044900
297 | 2020-01-08,1898.040039,1911.000000,1886.439941,1891.969971,1891.969971,3508000
298 | 2020-01-09,1909.890015,1917.819946,1895.800049,1901.050049,1901.050049,3167300
299 | 2020-01-10,1905.369995,1906.939941,1880.000000,1883.160034,1883.160034,2853700
300 | 2020-01-13,1891.310059,1898.000000,1880.800049,1891.300049,1891.300049,2780800
301 | 2020-01-14,1885.880005,1887.109985,1858.550049,1869.439941,1869.439941,3440900
302 | 2020-01-15,1872.250000,1878.859985,1855.089966,1862.020020,1862.020020,2896600
303 | 2020-01-16,1882.989990,1885.589966,1866.020020,1877.939941,1877.939941,2659500
304 | 2020-01-17,1885.890015,1886.640015,1857.250000,1864.719971,1864.719971,3997300
305 | 2020-01-21,1865.000000,1894.270020,1860.000000,1892.000000,1892.000000,3707800
306 | 2020-01-22,1896.089966,1902.500000,1883.339966,1887.459961,1887.459961,3216300
307 | 2020-01-23,1885.109985,1889.979980,1872.760010,1884.579956,1884.579956,2484600
308 | 2020-01-24,1891.369995,1894.989990,1847.439941,1861.640015,1861.640015,3766200
309 | 2020-01-27,1820.000000,1841.000000,1815.339966,1828.339966,1828.339966,3528500
310 | 2020-01-28,1840.500000,1858.109985,1830.020020,1853.250000,1853.250000,2808000
311 | 2020-01-29,1864.000000,1874.750000,1855.020020,1858.000000,1858.000000,2088000
312 | 2020-01-30,1858.000000,1872.869995,1850.609985,1870.680054,1870.680054,6327400
313 | 2020-01-31,2051.469971,2055.719971,2002.270020,2008.719971,2008.719971,15567300
314 | 2020-02-03,2010.599976,2048.500000,2000.250000,2004.199951,2004.199951,5899100
315 | 2020-02-04,2029.880005,2059.800049,2015.369995,2049.669922,2049.669922,5289300
316 | 2020-02-05,2071.020020,2071.020020,2032.000000,2039.869995,2039.869995,4376200
317 | 2020-02-06,2041.020020,2056.300049,2024.800049,2050.229980,2050.229980,3183000
318 | 2020-02-07,2041.989990,2098.530029,2038.099976,2079.280029,2079.280029,5095300
319 | 2020-02-10,2085.010010,2135.600098,2084.959961,2133.909912,2133.909912,5056200
320 | 2020-02-11,2150.899902,2185.949951,2136.000000,2150.800049,2150.800049,5746000
321 | 2020-02-12,2163.199951,2180.250000,2155.290039,2160.000000,2160.000000,3334300
322 | 2020-02-13,2144.989990,2170.280029,2142.000000,2149.870117,2149.870117,3031800
323 | 2020-02-14,2155.679932,2159.040039,2125.889893,2134.870117,2134.870117,2606200
324 | 2020-02-18,2125.020020,2166.070068,2124.110107,2155.669922,2155.669922,2945600
325 | 2020-02-19,2167.800049,2185.100098,2161.120117,2170.219971,2170.219971,2561200
326 | 2020-02-20,2173.070068,2176.790039,2127.449951,2153.100098,2153.100098,3131300
327 | 2020-02-21,2142.149902,2144.550049,2088.000000,2095.969971,2095.969971,4646300
328 | 2020-02-24,2003.180054,2039.300049,1987.969971,2009.290039,2009.290039,6547000
329 | 2020-02-25,2026.420044,2034.599976,1958.420044,1972.739990,1972.739990,6219100
330 | 2020-02-26,1970.280029,2014.670044,1960.449951,1979.589966,1979.589966,5224600
331 | 2020-02-27,1934.380005,1975.000000,1882.760010,1884.300049,1884.300049,8144000
332 | 2020-02-28,1814.630005,1889.760010,1811.130005,1883.750000,1883.750000,9493800
333 | 2020-03-02,1906.489990,1954.510010,1870.000000,1953.949951,1953.949951,6761700
334 | 2020-03-03,1975.369995,1996.329956,1888.089966,1908.989990,1908.989990,7534500
335 | 2020-03-04,1946.569946,1978.000000,1922.000000,1975.829956,1975.829956,4772900
336 | 2020-03-05,1933.000000,1960.719971,1910.000000,1924.030029,1924.030029,4748200
337 | 2020-03-06,1875.000000,1910.869995,1869.500000,1901.089966,1901.089966,5273600
338 | 2020-03-09,1773.859985,1862.770020,1761.290039,1800.609985,1800.609985,7813200
339 | 2020-03-10,1870.880005,1894.270020,1818.170044,1891.819946,1891.819946,7133300
340 | 2020-03-11,1857.849976,1871.319946,1801.500000,1820.859985,1820.859985,5624800
341 | 2020-03-12,1721.979980,1765.000000,1675.000000,1676.609985,1676.609985,11346200
342 | 2020-03-13,1755.000000,1786.310059,1680.619995,1785.000000,1785.000000,8809700
343 | 2020-03-16,1641.510010,1759.449951,1626.030029,1689.150024,1689.150024,8917300
344 | 2020-03-17,1775.469971,1857.780029,1689.239990,1807.839966,1807.839966,10917100
345 | 2020-03-18,1750.000000,1841.660034,1745.000000,1830.000000,1830.000000,9645200
346 | 2020-03-19,1860.000000,1945.000000,1832.650024,1880.930054,1880.930054,10399900
347 | 2020-03-20,1926.310059,1957.000000,1820.729980,1846.089966,1846.089966,9817900
348 | 2020-03-23,1827.750000,1919.400024,1812.000000,1902.829956,1902.829956,7808500
349 | 2020-03-24,1951.500000,1955.000000,1900.339966,1940.099976,1940.099976,7147100
350 | 2020-03-25,1920.689941,1950.260010,1885.780029,1885.839966,1885.839966,6479100
351 | 2020-03-26,1902.000000,1956.489990,1889.290039,1955.489990,1955.489990,6221300
352 | 2020-03-27,1930.859985,1939.790039,1899.920044,1900.099976,1900.099976,5387900
353 | 2020-03-30,1922.829956,1973.630005,1912.339966,1963.949951,1963.949951,6126100
354 | 2020-03-31,1964.349976,1993.020020,1944.010010,1949.719971,1949.719971,5123600
355 | 2020-04-01,1932.969971,1944.959961,1893.000000,1907.699951,1907.699951,4121900
356 | 2020-04-02,1901.640015,1927.530029,1890.000000,1918.829956,1918.829956,4336000
357 | 2020-04-03,1911.150024,1926.329956,1889.150024,1906.589966,1906.589966,3609900
358 | 2020-04-06,1936.000000,1998.520020,1930.020020,1997.589966,1997.589966,5773200
359 | 2020-04-07,2017.109985,2035.719971,1997.619995,2011.599976,2011.599976,5114000
360 | 2020-04-08,2021.000000,2044.000000,2011.150024,2043.000000,2043.000000,3977300
361 | 2020-04-09,2044.300049,2053.000000,2017.660034,2042.760010,2042.760010,4655600
362 | 2020-04-13,2040.000000,2180.000000,2038.000000,2168.870117,2168.870117,6716700
363 | 2020-04-14,2200.469971,2292.000000,2186.209961,2283.320068,2283.320068,8087200
364 | 2020-04-15,2257.679932,2333.370117,2245.000000,2307.679932,2307.679932,6866600
365 | 2020-04-16,2346.000000,2461.000000,2335.000000,2408.189941,2408.189941,12038200
366 | 2020-04-17,2372.330078,2400.000000,2316.020020,2375.000000,2375.000000,7930000
367 | 2020-04-20,2389.949951,2444.979980,2386.050049,2393.610107,2393.610107,5770700
368 | 2020-04-21,2416.610107,2428.310059,2279.659912,2328.120117,2328.120117,7476700
369 | 2020-04-22,2369.000000,2394.000000,2351.000000,2363.489990,2363.489990,4218300
370 | 2020-04-23,2399.979980,2424.219971,2382.080078,2399.449951,2399.449951,5066600
371 | 2020-04-24,2417.000000,2420.429932,2382.000000,2410.219971,2410.219971,3831800
372 | 2020-04-27,2443.199951,2444.879883,2363.000000,2376.000000,2376.000000,5645600
373 | 2020-04-28,2372.100098,2373.500000,2306.000000,2314.080078,2314.080078,5269400
374 | 2020-04-29,2330.010010,2391.889893,2310.000000,2372.709961,2372.709961,4591600
375 | 2020-04-30,2419.840088,2475.000000,2396.010010,2474.000000,2474.000000,9534600
376 | 2020-05-01,2336.800049,2362.439941,2258.189941,2286.040039,2286.040039,9754900
377 | 2020-05-04,2256.379883,2326.979980,2256.379883,2315.989990,2315.989990,4865900
378 | 2020-05-05,2340.000000,2351.000000,2307.129883,2317.800049,2317.800049,3242500
379 | 2020-05-06,2329.439941,2357.449951,2320.000000,2351.260010,2351.260010,3117800
380 | 2020-05-07,2374.780029,2376.000000,2343.110107,2367.610107,2367.610107,3396400
381 | 2020-05-08,2372.139893,2387.239990,2357.000000,2379.610107,2379.610107,3211200
382 | 2020-05-11,2374.699951,2419.669922,2372.110107,2409.000000,2409.000000,3259200
383 | 2020-05-12,2411.850098,2419.000000,2355.000000,2356.949951,2356.949951,3074900
384 | 2020-05-13,2366.800049,2407.699951,2337.800049,2367.919922,2367.919922,4782900
385 | 2020-05-14,2361.010010,2391.370117,2353.209961,2388.850098,2388.850098,3648100
386 | 2020-05-15,2368.520020,2411.000000,2356.370117,2409.780029,2409.780029,4235000
387 | 2020-05-18,2404.350098,2433.000000,2384.010010,2426.260010,2426.260010,4366600
388 | 2020-05-19,2429.830078,2485.000000,2428.969971,2449.330078,2449.330078,4320500
389 | 2020-05-20,2477.870117,2500.010010,2467.270020,2497.939941,2497.939941,3998100
390 | 2020-05-21,2500.000000,2525.449951,2442.540039,2446.739990,2446.739990,5114400
391 | 2020-05-22,2455.010010,2469.850098,2430.129883,2436.879883,2436.879883,2867100
392 | 2020-05-26,2458.000000,2462.000000,2414.060059,2421.860107,2421.860107,3568200
393 | 2020-05-27,2404.989990,2413.580078,2330.000000,2410.389893,2410.389893,5056900
394 | 2020-05-28,2384.330078,2436.969971,2378.229980,2401.100098,2401.100098,3190200
395 | 2020-05-29,2415.939941,2442.370117,2398.199951,2442.370117,2442.370117,3529300
396 | 2020-06-01,2448.000000,2476.929932,2444.169922,2471.040039,2471.040039,2928900
397 | 2020-06-02,2467.000000,2473.530029,2445.310059,2472.409912,2472.409912,2529900
398 | 2020-06-03,2468.010010,2488.000000,2461.169922,2478.399902,2478.399902,2671000
399 | 2020-06-04,2477.429932,2507.540039,2450.010010,2460.600098,2460.600098,2948700
400 | 2020-06-05,2444.510010,2488.649902,2437.129883,2483.000000,2483.000000,3306400
401 | 2020-06-08,2500.199951,2530.000000,2487.340088,2524.060059,2524.060059,3970700
402 | 2020-06-09,2529.439941,2626.429932,2525.000000,2600.860107,2600.860107,5176000
403 | 2020-06-10,2645.000000,2722.350098,2626.260010,2647.449951,2647.449951,4946000
404 | 2020-06-11,2603.500000,2671.379883,2536.229980,2557.959961,2557.959961,5800100
405 | 2020-06-12,2601.209961,2621.479980,2503.350098,2545.020020,2545.020020,5429600
406 | 2020-06-15,2526.600098,2584.000000,2508.000000,2572.679932,2572.679932,3865100
407 | 2020-06-16,2620.000000,2620.000000,2576.000000,2615.270020,2615.270020,3585600
408 | 2020-06-17,2647.500000,2655.000000,2631.820068,2640.979980,2640.979980,2951100
409 | 2020-06-18,2647.010010,2659.639893,2636.110107,2653.979980,2653.979980,2487800
410 | 2020-06-19,2678.080078,2697.429932,2659.000000,2675.010010,2675.010010,5777000
411 | 2020-06-22,2684.500000,2715.000000,2669.000000,2713.820068,2713.820068,3208800
412 | 2020-06-23,2726.020020,2783.110107,2718.040039,2764.409912,2764.409912,4231700
413 | 2020-06-24,2780.000000,2796.000000,2721.000000,2734.399902,2734.399902,4526600
414 | 2020-06-25,2739.550049,2756.229980,2712.139893,2754.580078,2754.580078,2968700
415 | 2020-06-26,2775.060059,2782.570068,2688.000000,2692.870117,2692.870117,6500800
416 | 2020-06-29,2690.010010,2696.800049,2630.080078,2680.379883,2680.379883,4223400
417 | 2020-06-30,2685.070068,2769.629883,2675.030029,2758.820068,2758.820068,3769700
418 | 2020-07-01,2757.989990,2895.000000,2754.000000,2878.699951,2878.699951,6363400
419 | 2020-07-02,2912.010010,2955.560059,2871.100098,2890.300049,2890.300049,6593400
420 | 2020-07-06,2934.969971,3059.879883,2930.000000,3057.040039,3057.040039,6880600
421 | 2020-07-07,3058.550049,3069.550049,2990.000000,3000.120117,3000.120117,5257500
422 | 2020-07-08,3022.610107,3083.969971,3012.429932,3081.110107,3081.110107,5037600
423 | 2020-07-09,3115.989990,3193.879883,3074.000000,3182.629883,3182.629883,6388700
424 | 2020-07-10,3191.760010,3215.000000,3135.699951,3200.000000,3200.000000,5486000
425 | 2020-07-13,3251.060059,3344.290039,3068.389893,3104.000000,3104.000000,7720400
426 | 2020-07-14,3089.000000,3127.379883,2950.000000,3084.000000,3084.000000,7231900
427 | 2020-07-15,3080.229980,3098.350098,2973.179932,3008.870117,3008.870117,5788900
428 | 2020-07-16,2971.060059,3032.000000,2918.229980,2999.899902,2999.899902,6394200
429 | 2020-07-17,3009.000000,3024.000000,2948.449951,2961.969971,2961.969971,4761300
430 | 2020-07-20,3000.199951,3201.360107,2994.020020,3196.840088,3196.840088,7598200
431 | 2020-07-21,3232.489990,3240.580078,3105.719971,3138.290039,3138.290039,6135000
432 | 2020-07-22,3125.000000,3150.000000,3065.260010,3099.909912,3099.909912,4104200
433 | 2020-07-23,3098.270020,3098.270020,2970.000000,2986.550049,2986.550049,5656900
434 | 2020-07-24,2930.000000,3031.580078,2888.000000,3008.909912,3008.909912,5632400
435 | 2020-07-27,3062.000000,3098.000000,3015.770020,3055.209961,3055.209961,4170500
436 | 2020-07-28,3054.270020,3077.090088,2995.760010,3000.330078,3000.330078,3126700
437 | 2020-07-29,3030.989990,3039.159912,2996.770020,3033.530029,3033.530029,2974100
438 | 2020-07-30,3014.000000,3092.000000,3005.000000,3051.879883,3051.879883,6128300
439 | 2020-07-31,3244.000000,3246.820068,3151.000000,3164.679932,3164.679932,8085500
440 | 2020-08-03,3180.510010,3184.000000,3104.000000,3111.889893,3111.889893,5074700
441 | 2020-08-04,3101.209961,3167.239990,3101.209961,3138.830078,3138.830078,4694300
442 | 2020-08-05,3143.770020,3213.590088,3127.300049,3205.030029,3205.030029,3930000
443 | 2020-08-06,3194.360107,3247.469971,3165.429932,3225.000000,3225.000000,3940600
444 | 2020-08-07,3224.010010,3240.810059,3140.669922,3167.459961,3167.459961,3929600
445 | 2020-08-10,3170.310059,3172.510010,3101.520020,3148.159912,3148.159912,3167300
446 | 2020-08-11,3113.199951,3159.219971,3073.000000,3080.669922,3080.669922,3718100
447 | 2020-08-12,3108.000000,3174.389893,3101.419922,3162.239990,3162.239990,3527200
448 | 2020-08-13,3182.989990,3217.520020,3155.000000,3161.020020,3161.020020,3149000
449 | 2020-08-14,3178.179932,3178.239990,3120.000000,3148.020020,3148.020020,2751700
450 | 2020-08-17,3173.120117,3194.969971,3154.179932,3182.409912,3182.409912,2691200
451 | 2020-08-18,3212.000000,3320.000000,3205.820068,3312.489990,3312.489990,5346000
452 | 2020-08-19,3303.010010,3315.899902,3256.000000,3260.479980,3260.479980,4185100
453 | 2020-08-20,3252.000000,3312.620117,3238.000000,3297.370117,3297.370117,3332500
454 | 2020-08-21,3295.000000,3314.399902,3275.389893,3284.719971,3284.719971,3575900
455 | 2020-08-24,3310.149902,3380.320068,3257.560059,3307.459961,3307.459961,4666300
456 | 2020-08-25,3294.989990,3357.399902,3267.000000,3346.489990,3346.489990,3992800
457 | 2020-08-26,3351.110107,3451.739990,3344.570068,3441.850098,3441.850098,6508700
458 | 2020-08-27,3450.050049,3453.000000,3378.000000,3400.000000,3400.000000,4264800
459 | 2020-08-28,3423.000000,3433.370117,3386.500000,3401.800049,3401.800049,2897000
460 | 2020-08-31,3408.989990,3495.000000,3405.000000,3450.959961,3450.959961,4185900
461 | 2020-09-01,3489.580078,3513.870117,3467.000000,3499.120117,3499.120117,3476400
462 | 2020-09-02,3547.000000,3552.250000,3486.689941,3531.449951,3531.449951,3931500
463 | 2020-09-03,3485.000000,3488.409912,3303.000000,3368.000000,3368.000000,8161100
464 | 2020-09-04,3318.000000,3381.500000,3111.129883,3294.620117,3294.620117,8781800
465 | 2020-09-08,3144.000000,3250.850098,3130.000000,3149.840088,3149.840088,6094200
466 | 2020-09-09,3202.989990,3303.179932,3185.000000,3268.610107,3268.610107,5188700
467 | 2020-09-10,3307.219971,3349.889893,3170.550049,3175.110107,3175.110107,5330700
468 | 2020-09-11,3208.689941,3217.340088,3083.979980,3116.219971,3116.219971,5094000
469 | 2020-09-14,3172.939941,3187.389893,3096.000000,3102.969971,3102.969971,4529600
470 | 2020-09-15,3136.159912,3175.020020,3108.919922,3156.129883,3156.129883,4021500
471 | 2020-09-16,3179.989990,3187.239990,3074.149902,3078.100098,3078.100098,4512200
472 | 2020-09-17,3009.250000,3029.429932,2972.550049,3008.729980,3008.729980,6449100
473 | 2020-09-18,3031.739990,3037.800049,2905.540039,2954.909912,2954.909912,8892600
474 | 2020-09-21,2906.500000,2962.000000,2871.000000,2960.469971,2960.469971,6117900
475 | 2020-09-22,3033.840088,3133.989990,3000.199951,3128.989990,3128.989990,6948800
476 | 2020-09-23,3120.429932,3127.000000,2992.379883,2999.860107,2999.860107,5652700
477 | 2020-09-24,2977.790039,3069.300049,2965.000000,3019.790039,3019.790039,5529400
478 | 2020-09-25,3054.860107,3101.540039,2999.000000,3095.129883,3095.129883,4615200
479 | 2020-09-28,3148.850098,3175.040039,3117.169922,3174.050049,3174.050049,4224200
480 | 2020-09-29,3175.389893,3188.260010,3132.540039,3144.879883,3144.879883,3495800
481 | 2020-09-30,3141.139893,3212.879883,3133.989990,3148.729980,3148.729980,4883400
482 | 2020-10-01,3208.000000,3224.000000,3172.000000,3221.260010,3221.260010,4971900
483 | 2020-10-02,3153.629883,3195.800049,3123.000000,3125.000000,3125.000000,5613100
484 | 2020-10-05,3145.840088,3202.530029,3140.850098,3199.199951,3199.199951,3775300
485 | 2020-10-06,3165.000000,3182.000000,3090.000000,3099.959961,3099.959961,5086900
486 | 2020-10-07,3135.000000,3200.000000,3132.389893,3195.689941,3195.689941,4309400
487 | 2020-10-08,3224.989990,3233.290039,3174.989990,3190.550049,3190.550049,3174100
488 | 2020-10-09,3210.000000,3288.989990,3197.830078,3286.649902,3286.649902,4907900
489 | 2020-10-12,3349.939941,3496.239990,3339.550049,3442.929932,3442.929932,8364200
490 | 2020-10-13,3467.989990,3492.379883,3424.219971,3443.629883,3443.629883,5744700
491 | 2020-10-14,3447.000000,3464.879883,3340.000000,3363.709961,3363.709961,5828900
492 | 2020-10-15,3292.010010,3355.879883,3280.000000,3338.649902,3338.649902,5223400
493 | 2020-10-16,3363.229980,3399.659912,3160.000000,3272.709961,3272.709961,6474400
494 | 2020-10-19,3299.610107,3329.000000,3192.739990,3207.209961,3207.209961,5223600
495 | 2020-10-20,3222.280029,3266.000000,3192.010010,3217.010010,3217.010010,4509700
496 | 2020-10-21,3212.500000,3233.879883,3160.000000,3184.939941,3184.939941,4592700
497 | 2020-10-22,3189.870117,3198.750000,3121.939941,3176.399902,3176.399902,4212000
498 | 2020-10-23,3191.000000,3205.330078,3140.000000,3204.399902,3204.399902,3466700
499 | 2020-10-26,3198.739990,3282.979980,3153.300049,3207.040039,3207.040039,5901200
500 | 2020-10-27,3224.939941,3291.659912,3211.300049,3286.330078,3286.330078,4291000
501 | 2020-10-28,3249.300049,3264.020020,3162.469971,3162.780029,3162.780029,5588300
502 | 2020-10-29,3201.270020,3257.250000,3164.000000,3211.010010,3211.010010,6596500
503 | 2020-10-30,3157.750000,3167.000000,3019.000000,3036.149902,3036.149902,8386400
504 |
--------------------------------------------------------------------------------
/forecast_auto_adjustment/images/alibaba_stock_adjust_forecast.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecast_auto_adjustment/images/alibaba_stock_adjust_forecast.png
--------------------------------------------------------------------------------
/forecast_auto_adjustment/images/alibaba_stock_adjust_trendy_forecast.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecast_auto_adjustment/images/alibaba_stock_adjust_trendy_forecast.png
--------------------------------------------------------------------------------
/forecast_auto_adjustment/images/alibaba_stock_normal_forecast.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecast_auto_adjustment/images/alibaba_stock_normal_forecast.png
--------------------------------------------------------------------------------
/forecast_auto_adjustment/images/amazon_stock_adjust_forecast.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecast_auto_adjustment/images/amazon_stock_adjust_forecast.png
--------------------------------------------------------------------------------
/forecast_auto_adjustment/images/amazon_stock_adjust_trendy_forecast.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecast_auto_adjustment/images/amazon_stock_adjust_trendy_forecast.png
--------------------------------------------------------------------------------
/forecast_auto_adjustment/images/amazon_stock_normal_forecast.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecast_auto_adjustment/images/amazon_stock_normal_forecast.png
--------------------------------------------------------------------------------
/forecast_auto_adjustment/images/error_adjust_s1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecast_auto_adjustment/images/error_adjust_s1.png
--------------------------------------------------------------------------------
/forecast_auto_adjustment/images/error_adjust_s2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecast_auto_adjustment/images/error_adjust_s2.png
--------------------------------------------------------------------------------
/forecast_auto_adjustment/images/google_stock_adjust_forecast.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecast_auto_adjustment/images/google_stock_adjust_forecast.png
--------------------------------------------------------------------------------
/forecast_auto_adjustment/images/google_stock_adjust_trendy_forecast.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecast_auto_adjustment/images/google_stock_adjust_trendy_forecast.png
--------------------------------------------------------------------------------
/forecast_auto_adjustment/images/google_stock_normal_forecast.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecast_auto_adjustment/images/google_stock_normal_forecast.png
--------------------------------------------------------------------------------
/forecast_auto_adjustment/images/jd_stock_adjust_forecast.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecast_auto_adjustment/images/jd_stock_adjust_forecast.png
--------------------------------------------------------------------------------
/forecast_auto_adjustment/images/jd_stock_adjust_trendy_forecast.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecast_auto_adjustment/images/jd_stock_adjust_trendy_forecast.png
--------------------------------------------------------------------------------
/forecast_auto_adjustment/images/jd_stock_normal_forecast.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecast_auto_adjustment/images/jd_stock_normal_forecast.png
--------------------------------------------------------------------------------
/forecast_auto_adjustment/ts_features.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | #@author: Jing Wang
3 | #@date: 09/17/2020
4 |
5 | '''
6 | 特征工程模块:时间序列生成,包括基本时间序列,lag特征和滚动特征
7 | '''
8 |
9 | import json
10 | import argparse
11 | import datetime
12 | import pandas as pd
13 | from joblib import Parallel, delayed
14 |
15 | import os
16 | import sys
17 | import util
18 |
19 | import chinese_calendar
20 |
21 | CHINESE_HOLIDAYS = chinese_calendar.constants.holidays
22 | CHINESE_WORKDAYS = chinese_calendar.constants.workdays
23 | CHINESE_LIEUDAYS = chinese_calendar.constants.in_lieu_days
24 |
25 | def get_holiday_stats(min_year, max_year):
26 | '''
27 | 计算节假日的第几天,节假日第1天,节假日最后一天
28 |
29 | Arg:
30 | min_year (int): 开始扫描的年份
31 | max_year (int): 结束扫描的年份
32 | [min_year, max_year]
33 | '''
34 | holiday_days = list(CHINESE_HOLIDAYS.keys())
35 | holiday_days.sort()
36 | holiday_index = {}
37 | holiday_first = set([])
38 | holiday_final = set([])
39 | count = 1
40 | is_prev_final = False
41 | for idx, day in enumerate(holiday_days):
42 | if day.year < min_year or day.year > max_year:
43 | continue
44 | next_day = day + datetime.timedelta(days=1)
45 | prev_day = day - datetime.timedelta(days=1)
46 | if next_day in holiday_days:
47 | if is_prev_final:
48 | holiday_first.add(day)
49 | holiday_index[day] = count
50 | is_prev_final = False
51 | count += 1
52 | else:
53 | if not prev_day in holiday_days:
54 | holiday_first.add(day)
55 | holiday_final.add(day)
56 | is_prev_final = True
57 | holiday_index[day] = count
58 | count = 1
59 | return holiday_index, holiday_first, holiday_final
60 |
61 | def get_before_after_holiday(data, before_count=5, after_count=5):
62 | '''
63 | 节假日前后统计
64 | Args:
65 | data (pd.DataFrame)
66 | before_count (int)
67 | after_count (int)
68 | '''
69 | before = {}
70 | after = {}
71 | first_day = set(data.loc[data["is_holiday_first_fea"] == 1, "ds"].tolist())
72 | final_day = set(data.loc[data["is_holiday_final_fea"] == 1, "ds"].tolist())
73 | for first in first_day:
74 | for c in range(1, before_count+1):
75 | day = first - datetime.timedelta(days=c)
76 | before[day] = c
77 | for final in final_day:
78 | for c in range(1, after_count+1):
79 | day = final + datetime.timedelta(days=c)
80 | after[day] = c
81 | data["before_holiday_day_fea"] = data["ds"].apply(lambda x: before[x] if x in before else 0)
82 | data["after_holiday_day_fea"] = data["ds"].apply(lambda x: after[x] if x in after else 0)
83 | return data
84 |
85 | def basic_ts(data):
86 | '''
87 | 根据ds生成基本的时间序列特征
88 |
89 | Args:
90 | data (DataFrame): 数据表
91 | Return:
92 | data (DataFrame): 数据表带有基本时间序列特征
93 | '''
94 | data["ds"] = data["ds"].apply(util.date_converter)
95 |
96 | # 生成时间特征 x_of_y
97 | data["day_of_week_fea"] = data["ds"].apply(lambda x: x.isoweekday() # monday表示1
98 | if isinstance(x, datetime.datetime) else None)
99 | data["day_of_month_fea"] = data["ds"].apply(lambda x: x.day
100 | if isinstance(x, datetime.datetime) else None)
101 | data["day_of_year_fea"] = data["ds"].apply(lambda x: x.timetuple().tm_yday
102 | if isinstance(x, datetime.datetime) else None)
103 | data["week_of_year_fea"] = data["ds"].apply(lambda x: x.isocalendar()[1]
104 | if isinstance(x, datetime.datetime) else None)
105 | data["month_of_year_fea"] = data["ds"].apply(lambda x: x.month
106 | if isinstance(x, datetime.datetime) else None)
107 | data["is_weekend_fea"] = data["day_of_week_fea"].apply(lambda x: 1 if x >= 6 else 0)
108 |
109 | # 是否节假日,是否工作日,是否休息日/调休
110 | data["is_holiday_fea"] = data["ds"].apply(lambda x: 1 if x.date() in CHINESE_HOLIDAYS else 0)
111 | data["is_workday_fea"] = data["ds"].apply(lambda x: 1 if x.date() in CHINESE_WORKDAYS else 0)
112 | data["is_lieuday_fea"] = data["ds"].apply(lambda x: 1 if x.date() in CHINESE_LIEUDAYS else 0)
113 |
114 | # 节假日第几天
115 | min_year = data["ds"].min().year - 1
116 | max_year = data["ds"].max().year + 1
117 | holiday_index, holiday_first, holiday_final = get_holiday_stats(min_year, max_year)
118 | data["is_holiday_first_fea"] = data["ds"].apply(lambda x: 1 if x.date() in holiday_first else 0)
119 | data["is_holiday_final_fea"] = data["ds"].apply(lambda x: 1 if x.date() in holiday_final else 0)
120 | data["holiday_day_fea"] = data["ds"].apply(lambda x: holiday_index[x.date()] if x.date() in holiday_index else 0)
121 |
122 | # 节前第几天,节后第几天
123 | data = get_before_after_holiday(data, before_count=5, after_count=5)
124 | data["ds"] = data["ds"].apply(util.date_parser)
125 | return data
126 |
127 | def lag_ts(data, lag_windows=[1, 7]):
128 | '''
129 | 根据lag_windows生成lag特征,windows的单位:天
130 |
131 | Args:
132 | data (DataFrame): 输入数据表
133 | lag_windows (list): lag时间窗口大小,单位为天
134 | '''
135 | for lag in lag_windows:
136 | data[f'{lag}_day_before_fea'] = data["y"].shift(lag)
137 | return data
138 |
139 | def roll_ts(data, roll_windows=[1, 7]):
140 | '''
141 | 滚动特征
142 |
143 | Args:
144 | data (DataFrame): 输入数据表
145 | roll_windows (list): 滚动时间窗口大小,单位为天
146 | '''
147 |
148 | for window in roll_windows:
149 | roll = data["y"].shift(1).rolling(window=window)
150 | tmp = pd.concat([roll.max(), roll.min(), roll.mean(), roll.sum(), roll.median()], axis=1)
151 | tmp.columns = [f'max_over_{window}_days_fea', f'min_over_{window}_days_fea',
152 | f'mean_over_{window}_days_fea', f'sum_over_{window}_days_fea', f'median_over_{window}_days_fea']
153 | data = pd.concat([data, tmp], axis=1)
154 | return data
155 |
156 | def ewm_ts(data, advance):
157 | '''
158 | 指数加权平均
159 |
160 | Args:
161 | data (DataFrame):输入数据表
162 | '''
163 | shifted = data["y"].shift(advance)
164 | data["ewm_fea"] = shifted.ewm(alpha=0.5, adjust=True, ignore_na=False).mean()
165 | return data
166 |
167 | def ts_single(data, lag, roll, ewm, lag_windows, roll_windows, ewm_advance):
168 | '''
169 | 基于某个ID的序列,生成关于这个ID的时间序列
170 | '''
171 | data.sort_values("ds", inplace=True)
172 |
173 | # 保证日期的连续性
174 | df = util.fill_ts(data)
175 |
176 | if lag:
177 | df = lag_ts(df, lag_windows)
178 | if roll:
179 | df = roll_ts(df, roll_windows)
180 | if ewm:
181 | df = ewm_ts(df, ewm_advance)
182 |
183 | df.drop(columns=["y"], axis=1, inplace=True)
184 | data = pd.merge(data, df, on="ds", how="left")
185 | return data
186 |
187 | def generate_ts(data, params, n_jobs=-1):
188 | if "ds" in data:
189 | data["ds"] = data["ds"].apply(lambda x: util.date_parser(util.date_converter(x)))
190 | lag = params["lag"]["flag"]
191 | roll = params["rolling"]["flag"]
192 | ewm = params["ewm"]["flag"]
193 |
194 | lag_windows = params["lag"].get("window", None)
195 | roll_windows = params["rolling"].get("window", None)
196 | ewm_advance = params["ewm"].get("advance", None)
197 | skus = data["id"].unique().tolist()
198 | results = Parallel(n_jobs=n_jobs, verbose=0)(delayed(ts_single)(data.loc[data["id"] == sku], \
199 | lag, roll, ewm, lag_windows, roll_windows, ewm_advance) for sku in skus)
200 | output = pd.concat(results, axis=0)
201 | output = basic_ts(output)
202 |
203 | # 填充0
204 | output.fillna(0, inplace=True)
205 | return output
206 |
--------------------------------------------------------------------------------
/forecast_auto_adjustment/util.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # Author: Xingyu Liu 01368856
3 | # Date: Feb 06, 2020
4 |
5 | #@modified: Jing Wang
6 | #@date: 09/18/2020
7 |
8 | import os
9 | import json
10 | import random
11 | import calendar
12 | import numpy as np
13 | import pandas as pd
14 |
15 | import matplotlib.pyplot as plt
16 | import seaborn as sns
17 | from datetime import timedelta, datetime
18 | from monthdelta import monthdelta
19 | import lightgbm as lgb
20 | from sklearn.model_selection import KFold
21 | from bayes_opt import BayesianOptimization
22 | from sklearn import model_selection
23 | from itertools import product
24 | from copy import deepcopy
25 | import xgboost as xgb
26 |
27 | def get_data_path():
28 | folder = os.path.split(os.path.realpath(__file__))[0] # os.path.dirname(os.path.dirname(__file__))
29 | return os.path.join(folder, "")
30 |
31 | def is_json(myjson):
32 | try:
33 | json.loads(myjson)
34 | except:
35 | return False
36 | return True
37 |
38 | def output_json(data, filename):
39 | '''
40 | output data to json
41 | :param data:
42 | :param filename:
43 | :return:
44 | '''
45 | with open(filename, 'w', encoding='utf-8') as f:
46 | json.dump(data, f, ensure_ascii=False)
47 |
48 | def draw_feature_importance(report_path, feature_importance):
49 | # draw feature importance
50 | photoLength = len(feature_importance) / 2 if len(feature_importance) > 10 else 5
51 | plt.figure(figsize=(20, photoLength))
52 | sns.barplot(x='Value', y='Feature', data=feature_importance.sort_values(by='Value', ascending=False))
53 | plt.title("LightGBM Feature Importance")
54 | plt.tight_layout()
55 | plt.savefig(report_path + "feature_importance.png")
56 |
57 | def get_dates(year, month):
58 | year = int(year)
59 | month = int(month)
60 | _, ndays = calendar.monthrange(year, month)
61 | if month < 10:
62 | mon = str(0) + str(month)
63 | else:
64 | mon = str(month)
65 | base = str(year) + mon
66 | dates = []
67 | for d in range(1, ndays):
68 | if d < 10:
69 | d = str(0) + str(d)
70 | else:
71 | d = str(d)
72 | dates.append(int(base + d))
73 | return dates
74 |
75 | def get_period_value_and_unit(period):
76 | '''
77 | 把周期字符串拆解为数值和单位
78 | :param period: 输入的周期,字符串,如"7d"
79 | :return: 周期对应的数值及单位,如返回7和"d"
80 | '''
81 | # default value
82 | period_value = 7
83 | period_unit = 'd'
84 |
85 | if period.endswith('m'):
86 | period_unit = 'm'
87 | period_value = int(period.replace('m', ''))
88 | elif period.endswith('d'):
89 | period_unit = 'd'
90 | period_value = int(period.replace('d', ''))
91 |
92 | return period_value, period_unit
93 |
94 | def add_some_time(cur_time_str, value, unit):
95 | '''
96 | 从某个时刻增加一段时间
97 | :param cur_time_str: 当前时间,字符串类型
98 | :param value: 需要增加的时间长度
99 | :param unit: 时间长度的单位
100 | :return: 结果字符串
101 | '''
102 |
103 | val_start_date = datetime.strptime(cur_time_str, '%Y-%m-%d')
104 | if unit == 'm':
105 | val_week_date = val_start_date + monthdelta(months=value)
106 | elif unit == 'd':
107 | val_week_date = val_start_date + timedelta(days=value)
108 | else:
109 | raise ValueError('Incorrect value with roll_period {}. '.format(str(value)+str(unit)))
110 |
111 | return val_week_date.strftime("%Y-%m-%d")
112 |
113 |
114 | def train_test_split(X, y, train_ratio=0.7):
115 | num_periods, num_features = X.shape
116 | train_periods = int(num_periods * train_ratio)
117 | random.seed(2)
118 | Xtr = X[:train_periods]
119 | ytr = y[:train_periods]
120 | Xte = X[train_periods:]
121 | yte = y[train_periods:]
122 | return Xtr, ytr, Xte, yte
123 |
124 |
125 | ###############################################################
126 | # metric
127 | ###############################################################
128 |
129 | # define MAPE function
130 | def mean_absolute_percentage_error(y_true, y_pred):
131 | '''
132 | :param y_true: 实际Y值
133 | :param y_pred: 预测Y值
134 | :return: MAPE
135 | '''
136 | y_true, y_pred = np.array(y_true), np.array(y_pred)
137 | mape = np.mean(np.abs((y_true - y_pred) / (y_true))) * 100
138 | return mape
139 |
140 | def MAPE_handle_zero(y_true, y_pred):
141 | '''
142 | * 此处,为了防止一些实际值为0的情况,此处分母处加了1e-2,可能会导致MAPE的值高启,需要注意。
143 | :param y_true: 实际Y值
144 | :param y_pred: 预测Y值
145 | :return: MAPE
146 | '''
147 | y_true, y_pred = np.array(y_true), np.array(y_pred)
148 | mape = np.mean(np.abs((y_true - y_pred) / (y_true + 1e-2))) * 100
149 | return mape
150 |
151 | # define WMAPE function
152 | def weighted_mean_absolute_percentage_error(y_true, y_pred):
153 | '''
154 | :param y_true: 实际Y值
155 | :param y_pred: 预测Y值
156 | :return: WMAPE
157 | '''
158 | y_true, y_pred = np.array(y_true), np.array(y_pred)
159 | wmape = 100 * np.sum(np.abs(y_true - y_pred)) / np.sum(y_true)
160 | return wmape
161 |
162 | def WMAPE_handle_zero(y_true, y_pred):
163 | '''
164 | :param y_true: 实际Y值
165 | :param y_pred: 预测Y值
166 | :return: WMAPE
167 | '''
168 | y_true, y_pred = np.array(y_true), np.array(y_pred)
169 | wmape = 100 * np.sum(np.abs(y_true - y_pred)) / (np.sum(y_true) + 1e-2)
170 | return wmape
171 |
172 |
173 | # define SMAPE function
174 | def symmetric_mean_absolute_percentage_error(y_true, y_pred):
175 | '''
176 | :param y_true: 实际Y值
177 | :param y_pred: 预测Y值
178 | :return: SMAPE
179 | '''
180 | y_true, y_pred = np.array(y_true), np.array(y_pred)
181 | smape = 2.0 * np.mean(np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true))) * 100
182 | return smape
183 |
184 | def SMAPE_handle_zero(y_true, y_pred):
185 | '''
186 | * 此处,为了防止一些实际值为0的情况,此处分母处加了0.01,可能会导致MAPE的值高启,需要注意。
187 | :param y_true: 实际Y值
188 | :param y_pred: 预测Y值
189 | :return: SMAPE
190 | '''
191 | y_true, y_pred = np.array(y_true), np.array(y_pred)
192 | smape = 2.0 * np.mean(np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true) + 1e-2)) * 100
193 | return smape
194 |
195 | def add_lag_and_window_feature_name(train_features, lag_list, window_list):
196 | '''
197 | 添加需要滚动的特征名称
198 | :param train_features:
199 | :param lag_list:
200 | :param window_list:
201 | :return:
202 | '''
203 | for lag in lag_list:
204 | train_features.append(f'{lag}_day_before')
205 | for w in window_list:
206 | train_features.extend([f'max_over_{w}_days', f'min_over_{w}_days', f'mean_over_{w}_days', f'sum_over_{w}_days'])
207 |
208 |
209 | def construct_features(data, lag_list, window_list):
210 | basic = pd.DataFrame(data.y)
211 | for lag in lag_list:
212 | tmp = basic.shift(lag)
213 | tmp.rename(columns={'y': f'{lag}_day_before'}, inplace=True)
214 | data = pd.concat([data, tmp], axis=1)
215 |
216 | for w in window_list:
217 | shifted = basic.shift(1)
218 | window = shifted.rolling(window=w)
219 | tmp = pd.concat([window.max(), window.min(), window.mean(), window.sum()], axis=1)
220 | tmp.columns = [f'max_over_{w}_days', f'min_over_{w}_days', f'mean_over_{w}_days', f'sum_over_{w}_days']
221 | data = pd.concat([data, tmp], axis=1)
222 |
223 | return data
224 |
225 | def date_converter(x):
226 | '''
227 | 转换为日期格式
228 | '''
229 | if x is None:
230 | return x
231 | try:
232 | x = str(x)
233 | except Exception:
234 | return x
235 |
236 | try:
237 | return datetime.strptime(x, "%Y-%m-%d")
238 | except Exception:
239 | try:
240 | return datetime.strptime(x, "%Y/%m/%d")
241 | except Exception:
242 | try:
243 | return datetime.strptime(x, "%Y%m%d")
244 | except Exception:
245 | return x
246 |
247 | def date_parser(x):
248 | '''
249 | 日期格式转换为string
250 | '''
251 | if not isinstance(x, datetime):
252 | return None
253 |
254 | try:
255 | return x.strftime("%Y-%m-%d")
256 | except Exception:
257 | try:
258 | return x.strptime("%Y/%m/%d")
259 | except Exception:
260 | try:
261 | return x.strptime("%Y%m%d")
262 | except Exception:
263 | return None
264 |
265 | def fill_ts(data):
266 | '''
267 | 填充时间序列,只保留两列,[ts, y]
268 | '''
269 |
270 | min_dt = date_converter(data["ds"].min())
271 | max_dt = date_converter(data["ds"].max())
272 | date_list = [date_parser(x) for x in pd.date_range(start=min_dt, end=max_dt)]
273 | date_df = pd.DataFrame(date_list, columns=["ds"])
274 | df = pd.merge(date_df, data[["ds", "y"]], on="ds", how="left")
275 | df["y"].fillna(0, inplace=True)
276 | return df
277 |
278 | def dt64_to_datetime(dt64):
279 | '''
280 | :param dt64:
281 | :return:
282 | '''
283 | if np.isnat(dt64):
284 | return None
285 | else:
286 | unix_epoch = np.datetime64(0, 's')
287 | one_second = np.timedelta64(1, 's')
288 | seconds_since_epoch = (dt64 - unix_epoch) / one_second
289 | return datetime.utcfromtimestamp(seconds_since_epoch)
290 |
291 | def get_date_diff(start_date_str, end_date_str):
292 | '''
293 | 获取日期差
294 | :param start_date_str:str
295 | :param end_date_str:str
296 | :return:
297 | '''
298 | start_date = datetime.strptime(start_date_str, "%Y-%m-%d")
299 | end_date = datetime.strptime(end_date_str, "%Y-%m-%d")
300 | ret_val = (end_date-start_date).days
301 | return ret_val
302 |
303 | def get_dates_list(start_date, end_date):
304 | '''
305 | 获取日期区间
306 | :param start_date:str
307 | :param end_date:str
308 | :return:
309 | '''
310 | date_list = []
311 | start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d")
312 | end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d")
313 | while start_date <= end_date:
314 | date_str = start_date.strftime("%Y-%m-%d")
315 | date_list.append(date_str)
316 | start_date += datetime.timedelta(days=1)
317 | return date_list
318 |
319 | def get_model_info(model_name, data, results, mode):
320 | 'Get model information output'
321 | train_size = len(data[data["set_flag"] == mode["train"]])
322 | val_size = len(data[data["set_flag"] == mode["validation"]])
323 | test_size = len(data[data["set_flag"] == mode["test"]])
324 | val_data = results[results["set_flag"] == mode["validation"]]
325 | y = val_data["y"]
326 | ypred = val_data["y_pred"]
327 | info = {}
328 | info["model"] = model_name
329 | info["train_set_size"] = train_size
330 | info["validation_set_size"] = val_size
331 | info["test_set_size"] = test_size
332 | info["WMAPE"] = WMAPE_handle_zero(y, ypred)
333 | return info
334 |
335 | class GridSearchCV(object):
336 |
337 | def __init__(self, params_grid, model="lightgbm", cv=5, random_state=0):
338 | self.cv = cv
339 | self.random_state = random_state
340 |
341 | basic_params = {}
342 | search_params = {}
343 | for param, values in params_grid.items():
344 | if len(values) == 1:
345 | basic_params[param] = values
346 | else:
347 | search_params[param] = values
348 | self.basic_params = basic_params
349 | self.param_grid = search_params
350 |
351 | self.model = model
352 | self.num_boost_round = 1000
353 | self.early_stopping_rounds = 250
354 |
355 | def generate_params(self):
356 | # Always sort the keys of a dictionary, for reproducibility
357 | items = sorted(self.param_grid.items())
358 | if not items:
359 | yield {}
360 | else:
361 | keys, values = zip(*items)
362 | for v in product(*values):
363 | params = dict(zip(keys, v))
364 | yield params
365 |
366 | def fit(self, X, y, features, cat_features=None, init_points=5, n_iter=5,
367 | bayes_automated_tune=False,
368 | grid_tune=True):
369 | '''
370 | Grid Search Fit
371 | Args:
372 | X (data frame)
373 | y (np array)
374 | features (list): a list of feature columns to use
375 | init_points (int): how many steps of random exploration
376 | n_iter (int): how many iterations of bayesian optimization
377 | bayes_automated_tuning (bool): automated fine tuning
378 | grid_tune (bool): grid search
379 |
380 | Note:
381 | You could just set either init_points or n_iter as 0
382 | '''
383 | self.Xtrain = X
384 | self.ytrain = y
385 | self.features = features
386 | self.cat_features = cat_features
387 |
388 | if bayes_automated_tune and len(self.param_grid) > 0:
389 | optimizer = BayesianOptimization(
390 | f=self.fold_train,
391 | pbounds=self.param_grid
392 | )
393 | optimizer.maximize(
394 | init_points=init_points,
395 | n_iter=n_iter,
396 | )
397 |
398 | # get best parameters
399 | best_param = optimizer.max["params"]
400 | for p, val in best_param.items():
401 | if p in ["min_child_samples", "num_leaves",
402 | "max_depth", "n_estimators", "random_state"]:
403 | val = int(val)
404 | self.basic_params[p] = val
405 |
406 | if grid_tune and len(self.param_grid) > 0:
407 | best_score = float("-inf")
408 | best_param = None
409 | for param in self.generate_params():
410 | score = self.fold_train(**param)
411 | if score > best_score:
412 | best_score = score
413 | best_param = deepcopy(self.basic_params)
414 | self.basic_params = best_param
415 |
416 | if "weight" not in X.columns:
417 | X["weight"] = 1
418 |
419 | Xtr, Xval, ytr, yval = model_selection.train_test_split(X, y,
420 | test_size=0.1, random_state=self.random_state)
421 |
422 | if self.cat_features is None:
423 | cat_feat = "auto"
424 | else:
425 | cat_feat = self.cat_features
426 |
427 | if self.model == "lightgbm":
428 | trn_data = lgb.Dataset(
429 | Xtr[features],
430 | label=ytr,
431 | weight=Xtr.weight,
432 | categorical_feature=cat_feat
433 | )
434 |
435 | val_data = lgb.Dataset(
436 | Xval[features],
437 | label=yval,
438 | weight=Xval.weight,
439 | categorical_feature=cat_feat
440 | )
441 |
442 | self.best_estimator_ = lgb.train(
443 | self.basic_params,
444 | trn_data,
445 | num_boost_round=self.num_boost_round,
446 | valid_sets=[trn_data, val_data],
447 | early_stopping_rounds=self.early_stopping_rounds,
448 | verbose_eval=False,
449 | )
450 | elif self.model == "xgboost":
451 | trn_data = xgb.DMatrix(Xtr[features], label=ytr)
452 | val_data = xgb.DMatrix(Xval[features], label=yval)
453 | params = {k: v[0] for k, v in self.basic_params.items()}
454 | self.best_estimator_ = xgb.train(params, trn_data,
455 | evals=[(val_data, "validation")],
456 | verbose_eval=False,
457 | num_boost_round=self.num_boost_round,
458 | early_stopping_rounds=self.early_stopping_rounds)
459 |
460 | self.best_params_ = self.basic_params
461 |
462 | def fold_train(self, **kwargs):
463 | for p, val in kwargs.items():
464 | if p in ["min_child_samples", "num_leaves", "max_depth",
465 | "n_estimators", "random_state"]:
466 | val = int(val)
467 | self.basic_params[p] = [val]
468 |
469 | scores = []
470 | Xtrain = self.Xtrain
471 | ytrain = self.ytrain
472 | features = self.features
473 |
474 | if self.cat_features is None:
475 | cat_feat = "auto"
476 | else:
477 | cat_feat = self.cat_features
478 |
479 | if "weight" not in Xtrain.columns:
480 | Xtrain["weight"] = 1
481 |
482 | folds = KFold(n_splits=self.cv, shuffle=True, random_state=self.random_state)
483 | for fold_idx, (trn_idx, val_idx) in enumerate(folds.split(Xtrain.values, ytrain)):
484 | t_x = Xtrain.iloc[trn_idx]
485 | v_x = Xtrain.iloc[val_idx]
486 | label_train = ytrain[trn_idx].ravel()
487 | label_val = ytrain[val_idx].ravel()
488 |
489 | if self.model == "lightgbm":
490 | trn_data = lgb.Dataset(
491 | t_x[features],
492 | label=label_train,
493 | weight=t_x.weight,
494 | categorical_feature=cat_feat
495 | )
496 | val_data = lgb.Dataset(
497 | v_x[features],
498 | label=label_val,
499 | weight=v_x.weight,
500 | categorical_feature=cat_feat
501 | )
502 | # start = datetime.now()
503 | regressor = lgb.train(
504 | self.basic_params,
505 | trn_data,
506 | num_boost_round=self.num_boost_round,
507 | valid_sets=[trn_data, val_data],
508 | early_stopping_rounds=self.early_stopping_rounds,
509 | verbose_eval=False,
510 | )
511 |
512 | val_feat = v_x[features]
513 | elif self.model == "xgboost":
514 | trn_data = xgb.DMatrix(t_x[features], label=label_train)
515 | val_data = xgb.DMatrix(v_x[features], label=label_val)
516 | params = {k: v[0] for k, v in self.basic_params.items()}
517 | regressor = xgb.train(params, trn_data,
518 | evals=[(val_data, "validation")],
519 | verbose_eval=False,
520 | num_boost_round=self.num_boost_round,
521 | early_stopping_rounds=self.early_stopping_rounds)
522 | val_feat = xgb.DMatrix(v_x[features])
523 |
524 | ypred = regressor.predict(val_feat).ravel()
525 | mae = np.mean(np.abs(ypred - label_val))
526 | scores.append(mae)
527 | # end = datetime.now()
528 | # print("Time spent: {}s".format((end-start).total_seconds()))
529 | # raise
530 | return -np.mean(scores)
531 |
--------------------------------------------------------------------------------
/forecast_auto_adjustment/validation.py:
--------------------------------------------------------------------------------
1 | #-*-coding:utf-8
2 | #@Author: Jing Wang
3 | #@Date: 2020-11-06 14:47:16
4 | #@Last Modified by: Jing Wang
5 | #@Last Modified time: 2020-11-06 14:47:16
6 | #@reference:
7 |
8 | import pandas as pd
9 | import numpy as np
10 | import matplotlib.pyplot as plt
11 | import matplotlib.ticker as ticker
12 | import ts_features
13 | import util
14 | from sklearn.linear_model import LinearRegression
15 |
16 | def mape(y: np.array, ypred: np.array):
17 | return np.mean(np.abs(y - ypred) / y)
18 |
19 | def generate_ts(data: pd.DataFrame):
20 | data = data[["Date", "Close"]]
21 | data.columns = ["ds", "y"]
22 | data["id"] = "amazon"
23 | params = {
24 | "lag": {
25 | "flag": True,
26 | "window": [7, 14],
27 | },
28 | "rolling": {
29 | "flag": True,
30 | "window": [7, 14],
31 | },
32 | "ewm": {
33 | "flag": True,
34 | "advance": 7
35 | }
36 | }
37 | data = ts_features.generate_ts(data, params)
38 | return data
39 |
40 | def train(data: pd.DataFrame,
41 | train_end_date: str = "2019-12-31",
42 | test_end_date: str = "2020-10-29",
43 | test_start_date: str = "2020-01-01",
44 | roll_days: int = 14,
45 | adjust: bool = False,
46 | trendy: bool = False):
47 |
48 | results = []
49 | prev_error = None
50 | losses = []
51 | lr = LinearRegression()
52 | prev_idx = data[data["ds"] <= train_end_date].index[-1]
53 | count = 0
54 | while True:
55 | train = data.iloc[:prev_idx+1]
56 | test = data.iloc[prev_idx+1: prev_idx+roll_days]
57 | roll_end_date = train["ds"].tolist()[-1]
58 | if len(test) == 0:
59 | break
60 | prev_idx = test.index.tolist()[-1]
61 | feat_cols = [c for c in data.columns.tolist() if c not in ["ds", "y", "id"]]
62 | Xtrain, Xtest = train[feat_cols], test[feat_cols]
63 | ytrain, ytest = train["y"], test["y"]
64 |
65 | regressor = lr.fit(Xtrain, ytrain)
66 | ypred = regressor.predict(Xtest)
67 |
68 | # use other error
69 | # moving_average = np.mean(ytrain[:-roll_days])
70 | # error = (ytest.ravel() - ypred.ravel()) * (ypred.ravel() > moving_average).astype(int) + \
71 | # (ytest.ravel() - moving_average) * (ypred.ravel() <= moving_average).astype(int)
72 |
73 | error = ytest.ravel() - ypred.ravel()
74 | recent = np.array(ytrain[:-roll_days])
75 | if len(recent) >= 2:
76 | trend = np.sign(recent[-1] - recent[-2])
77 | else:
78 | trend = 1
79 | if count > 0 and adjust:
80 | postprocess = prev_error[:len(ypred)]
81 | if trendy:
82 | if trend == -1:
83 | ypred[postprocess < 0] += postprocess[postprocess < 0]
84 | if trend == 1:
85 | ypred[postprocess > 0] += postprocess[postprocess > 0]
86 | else:
87 | ypred += postprocess
88 | loss = mape(ypred.ravel(), np.array(ytest).ravel())
89 | prev_error = error
90 | count += 1
91 | test["ypred"] = ypred
92 | results.append(test)
93 | losses.append(loss)
94 | if roll_end_date >= test_end_date:
95 | break
96 |
97 | return results, losses
98 |
99 | def result_plot(results, title="result_plot"):
100 | plt.figure()
101 | results = pd.concat(results, axis=0)
102 | plt.plot(range(len(results)), results["y"])
103 | plt.plot(range(len(results)), results["ypred"])
104 | plt.legend(["y", "ypred"])
105 | plt.title(title)
106 | plt.savefig(title + ".png")
107 |
108 | def evaluation(losses):
109 | mu = np.mean(losses)
110 | std = np.std(losses)
111 | cv = round(std / mu, 3)
112 | mu = round(mu, 3)
113 | return mu, cv
114 |
115 | def single_main(company, filename, test_start, test_end):
116 | data = pd.read_csv(filename)
117 | data = generate_ts(data)
118 | normal_results, normal_losses = train(data)
119 | adjust_results, adjust_losses = train(data, adjust=True)
120 | adjust_trend_results, adjust_trend_losses = train(data, test_start_date=test_start,
121 | test_end_date=test_end, adjust=True, trendy=True)
122 |
123 | result_plot(normal_results, company + "_stock_normal_forecast")
124 | result_plot(adjust_results, company + "_stock_adjust_forecast")
125 | result_plot(adjust_trend_results, company + "_stock_adjust_trendy_forecast")
126 |
127 | normal_mu, normal_cv = evaluation(normal_losses)
128 | adjust_mu, adjust_cv = evaluation(adjust_losses)
129 | adjust_trend_mu, adjust_trend_cv = evaluation(adjust_trend_losses)
130 |
131 | row = [normal_mu, normal_cv, adjust_mu, adjust_cv, adjust_trend_mu, adjust_trend_cv]
132 | return row
133 |
134 | def main():
135 | filenames = ["amazon_stock.csv", "google_stock.csv", "alibaba_stock.csv", "jd_stock.csv"]
136 | companies = ["amazon", "google", "alibaba", "jd"]
137 | test_start = "2020-01-01"
138 | test_end = "2020-10-29"
139 | results = []
140 | for f, company in zip(filenames, companies):
141 | row = single_main(company, f, test_start, test_end)
142 | results.append([company] + row)
143 | cols = ["Company", "Original Avg MAPE", "Original CV", "Adjust Avg MAPE", "Adjust CV",
144 | "Adjust Trendy MAPE", "Adjust Trendy CV"]
145 | results = pd.DataFrame(results, columns=cols)
146 | return results
147 |
148 | if __name__ == "__main__":
149 | results = main()
150 | print(results)
151 |
--------------------------------------------------------------------------------
/forecast_reconcilation/README.md:
--------------------------------------------------------------------------------
1 | # Hierarchical Forecast Reconcilation Method
2 |
3 | ## Usage
4 | * Step 1: Input DataFrame and construct hierarchy tree
5 | ```python
6 | # 数据格式参考reconcilation_test
7 | data = pd.read_csv("reconcilation_test.csv")
8 |
9 | # get all series,获取所有品牌系列信息。all代表所有的品牌之和
10 | series = data.loc[~data["series"].isna() & data["sku"].isna(),
11 | ["series"]].drop_duplicates()
12 | series = series["series"].tolist()
13 | series = [s for s in series if s != "all"]
14 | skus = data.loc[~data["sku"].isna(), ["series", "sku"]].drop_duplicates()
15 | skus = (skus["series"] + "_" + skus["sku"]).tolist()
16 |
17 | # top level is root, series, skus, 因为stores就1个,就作为root
18 | total = {"root": series} # root对应层,是第一层
19 | skus_h = {k: [v for v in skus if v.startswith(k)] for k in series}
20 | hierarchy = {**total, **skus_h}
21 |
22 | tree = HierarchyTree.from_nodes(hierarchy)
23 | ```
24 | * Step 2: Split train and validation data
25 | ```python
26 | def clear_ids(ids):
27 | cols = []
28 | for c in ids:
29 | if isinstance(c, tuple) or isinstance(c, list):
30 | cols.append(c[1])
31 | else:
32 | cols.append(c)
33 | new_cols = []
34 | for c in cols:
35 | if c.endswith("_"):
36 | if c == "all_":
37 | new_cols.append("root")
38 | else:
39 | new_cols.append(c[:-1])
40 | continue
41 | new_cols.append(c)
42 | return new_cols
43 |
44 | def mape(y, ypred):
45 | y = np.array(y).ravel()
46 | ypred = np.array(ypred).ravel()
47 | return np.abs(y-ypred) / y
48 |
49 | def preprocess(df):
50 | df.fillna("", inplace=True)
51 | df.loc[:, "id"] = df.loc[:, "series"] + "_" + df.loc[:, "sku"]
52 | df["residual"] = mape(df["y"], df["ypred"])
53 | return df
54 |
55 | train_data = data[data["flag"] == "val"] # to be changed
56 | val_data = data[data["flag"] == "val"]
57 | val_data = preprocess(val_data)
58 | train_data = preprocess(train_data)
59 |
60 | # 预测集合, forecast data
61 | forecasts = pd.pivot_table(val_data, values=["ypred"], index=["date"], columns=["id"])
62 | # mape结果, MAPE result
63 | residuals = pd.pivot_table(val_data, values=["residual"], index=["date"], columns=["id"])
64 | # historical data to calculate ratio if using top down method
65 | history = pd.pivot_table(train_data, values=["y"], index=["date"], columns=["id"])
66 | forecasts.columns = clear_ids(forecasts.columns)
67 | residuals.columns = clear_ids(residuals.columns)
68 | history.columns = clear_ids(history.columns)
69 | val_data["id"] = clear_ids(val_data["id"])
70 | ```
71 | * Step 3: run recilation method
72 | ```python
73 | res = optimal_reconcilation(forecasts, tree, method="mint", residuals=residuals)
74 | # postprocess
75 | res = pd.merge(res, val_data[["id", "y", "ypred", "date"]], how="left", on=["id", "date"])
76 | res.loc[res["id"] == "root", "id"] = "all"
77 | res["mape"] = mape(res["y"], res["ypred"])
78 | res["mape_new"] = mape(res["y"], res["ypred_new"])
79 | res[["series", "sku"]] = res["id"].str.split("_", expand=True)
80 | res.drop(columns=["id"], inplace=True)
81 | ```
82 |
83 | ## Examples
84 | To run examples,
85 | ```shell
86 | python reconcilation.py
87 | ```
88 | ## Reference:
89 | * [Hyndman, R. J., & Athanasopoulos, G. (2018). Forecasting: principles and practice. OTexts](https://otexts.com/fpp2/).
90 | * [Optimal Forecast Reconciliation for Hierarchical and Grouped Time
91 | Series Through Trace Minimization](https://robjhyndman.com/papers/MinT.pdf)
92 | * [scikit-hts](https://github.com/jingw2/scikit-hts/blob/master/hts/functions.py)
93 |
--------------------------------------------------------------------------------
/forecast_reconcilation/data/reconcilation_test.csv:
--------------------------------------------------------------------------------
1 | date,series,sku,y,ypred,flag
2 | 20200920,红胖子,粉底1,1500,1900,val
3 | 20200921,红胖子,粉底1,3000,2500,val
4 | 20200922,红胖子,粉底1,,1200,test
5 | 20200923,红胖子,粉底1,,1000,test
6 | 20200920,红胖子,粉底2,2500,2000,val
7 | 20200921,红胖子,粉底2,2000,3000,val
8 | 20200922,红胖子,粉底2,,1500,test
9 | 20200923,红胖子,粉底2,,2000,test
10 | 20200920,红胖子,,4000,3500,val
11 | 20200921,红胖子,,5000,6000,val
12 | 20200922,红胖子,,,3000,test
13 | 20200923,红胖子,,,4000,test
14 | 20200920,黑管,唇膏1,2000,1900,val
15 | 20200921,黑管,唇膏1,2000,2500,val
16 | 20200922,黑管,唇膏1,,3000,test
17 | 20200923,黑管,唇膏1,,1000,test
18 | 20200920,黑管,唇膏2,2500,2000,val
19 | 20200921,黑管,唇膏2,2000,3000,val
20 | 20200922,黑管,唇膏2,,1500,test
21 | 20200923,黑管,唇膏2,,4000,test
22 | 20200920,黑管,,4500,3500,val
23 | 20200921,黑管,,4000,6000,val
24 | 20200922,黑管,,,6000,test
25 | 20200923,黑管,,,4000,test
26 | 20200920,all,,8500,8000,val
27 | 20200921,all,,9000,11000,val
28 | 20200922,all,,,9000,test
29 | 20200923,all,,,7000,test
30 |
--------------------------------------------------------------------------------
/forecast_reconcilation/data_structure.py:
--------------------------------------------------------------------------------
1 |
2 | from collections import OrderedDict
3 |
4 | class HierarchyTree(object):
5 |
6 | def __init__(self):
7 | self._nodes = OrderedDict()
8 | self._bottom = []
9 |
10 | def add(self, node):
11 | self._nodes[node.name] = node
12 |
13 | def remove(self, node):
14 | del self._nodes[node.name]
15 | for parent in node.parent:
16 | parent.extend(node.children)
17 | self._nodes[parent.name] = parent
18 | for child in node.children:
19 | child.parent.remove(node)
20 | child.parent.extend(node.parent)
21 | self._nodes[child.name] = child
22 |
23 | def isin(self, node):
24 | return node.name in self._nodes
25 |
26 | @property
27 | def nodes(self):
28 | return self._nodes
29 |
30 | @property
31 | def num_nodes(self):
32 | return len(self._nodes)
33 |
34 | @property
35 | def root(self):
36 | return self._nodes["root"]
37 |
38 | def get_node(self, nodename):
39 | return self._nodes[nodename]
40 |
41 | @property
42 | def num_bottom_level(self):
43 | return len(self._bottom)
44 |
45 | def add_bottom(self, node):
46 | self._bottom.append(node)
47 |
48 | @property
49 | def bottom(self):
50 | return self._bottom
51 |
52 | @staticmethod
53 | def from_nodes(hierarchy: dict):
54 | tree = HierarchyTree()
55 | queue = ["root"]
56 | node = TreeNode("root")
57 | tree.add(node)
58 | while queue:
59 | nodename = queue.pop(0)
60 | node = TreeNode(nodename)
61 | if tree.isin(node):
62 | node = tree.get_node(nodename)
63 | if nodename not in hierarchy:
64 | tree.add(node)
65 | tree.add_bottom(node)
66 | continue
67 | for child in hierarchy[nodename]:
68 | child = TreeNode(child)
69 | if not tree.isin(child):
70 | child.parent.append(node)
71 | node.children.append(child)
72 | tree.add(child)
73 | else:
74 | child = tree.get_node(child.name)
75 | child.parent.append(node)
76 | node.children.append(child)
77 | queue.append(child.name)
78 | return tree
79 |
80 | def __repr__(self):
81 | print([nodename for nodename in self._nodes])
82 |
83 | class TreeNode(object):
84 |
85 | def __init__(self, name=None):
86 | self.name = name
87 | self.children = []
88 | self.parent = []
89 |
90 | def append(self, child):
91 | self._children.append(child)
92 |
93 | def __repr__(self):
94 | print(self.name)
95 |
--------------------------------------------------------------------------------
/forecast_reconcilation/reconcilation.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | #@author: Jing Wang
3 | #@date: 09/24/2020
4 |
5 | '''
6 | Hierarchical Forecast Reconcilation
7 | 层级预测后的调和
8 | * 实现Forecasting Principles and Practice中的最优调和方法, 章节10.7
9 | * 参考代码:https://github.com/carlomazzaferro/scikit-hts/blob/master/hts/functions.py
10 | '''
11 | from data_structure import HierarchyTree
12 | import pandas as pd
13 | import numpy as np
14 |
15 | def get_summing_matrix(tree: HierarchyTree):
16 | '''
17 | 递归生成Summing Matrix
18 | '''
19 | nodename = list(tree.nodes.keys())
20 | bottoms = tree.bottom
21 | num_bottoms = tree.num_bottom_level
22 | num_nodes = tree.num_nodes
23 | mat = np.zeros((num_nodes, num_bottoms))
24 |
25 | def dfs(mat, node):
26 | idx = nodename.index(node.name)
27 | if node.name != "root" and not node.children:
28 | mat[idx, bottoms.index(node)] = 1
29 | for child in node.children:
30 | dfs(mat, child)
31 | child_idx = nodename.index(child.name)
32 | mat[idx] += mat[child_idx]
33 |
34 | dfs(mat, tree.root)
35 | return mat
36 |
37 | def get_forecast_prop(forecasts_dict: dict, tree: HierarchyTree):
38 | queue = [tree.root]
39 | props = {"root": 1}
40 | while queue:
41 | node = queue.pop(0)
42 | if len(node.children) == 0:
43 | continue
44 | s = sum([forecasts_dict[child.name][0] for child in node.children])
45 | for child in node.children:
46 | ratio = forecasts_dict[child.name][0] / s
47 | props[child.name] = props[node.name] * ratio
48 | queue.append(child)
49 | p = [props[node.name] for node in tree.bottom]
50 | p = np.asarray(p).reshape((-1, 1))
51 | return p
52 |
53 | def top_down(forecasts: pd.DataFrame,
54 | history: pd.DataFrame,
55 | tree: HierarchyTree,
56 | horizon: int = 7,
57 | method="avg_hist_prop"):
58 | '''
59 | Top down method
60 | 从上至下拆分
61 | 1. 按照历史比例: Average Historical Proportions, avg_hist_prop
62 | p_j = 1 / T * \sum_{t=1}^T y_{j, t} / y_t
63 | 2. 按照历史平均比例: Proportions of Historical Average, prop_hist_avg
64 | 3. 按预测比例: Forecast Proportions, forecast_prop
65 |
66 | forecasts and history format like this:
67 | | date | root | series1 | series_2 | series1_sku1 | series2_sku1 |
68 | | 20200922 | 1000 | 200 | 300 | 100 | 250 |
69 | date is index, pivot table
70 | '''
71 | nodenames = list(tree.nodes.keys())
72 | S = get_summing_matrix(tree)
73 | history.sort_index(inplace=True) # sort dates ascending
74 | history = history[-horizon:]
75 | history = df_to_array(history, nodenames)
76 | dates = forecasts.index.tolist()
77 |
78 | forecasts_dict = forecasts.to_dict(orient="list")
79 | forecasts = df_to_array(forecasts, nodenames)
80 | bottom_ids = [nodenames.index(bot.name) for bot in tree.bottom]
81 | root_id = nodenames.index("root")
82 |
83 | y_root = forecasts[root_id].reshape((1, -1))
84 | if method == "avg_hist_prop":
85 | p = np.mean(history[bottom_ids] / history[root_id], axis=1)
86 | p /= p.sum() # re-standardize
87 | p = p.reshape((-1, 1))
88 | if method == "prop_hist_avg":
89 | p = np.mean(history[bottom_ids], axis=1) / np.mean(history[root_id])
90 | p /= p.sum() # re-standardize
91 | p = p.reshape((-1, 1))
92 | if method == "forecast_prop":
93 | p = get_forecast_prop(forecasts_dict, tree)
94 |
95 | y = S @ p @ y_root
96 | results = pd.DataFrame(y, columns=dates)
97 | results["id"] = nodenames
98 | cols = [c for c in results.columns.tolist() if c != "id"]
99 | results = pd.melt(results, id_vars=["id"], value_vars=cols)
100 | results.columns = ["id", "date", "ypred_new"]
101 | return results
102 |
103 |
104 | def bottom_up(forecasts: pd.DataFrame, tree: HierarchyTree):
105 | '''
106 | 自下而上汇总
107 | y_tilde = S y_hat_bottom
108 | '''
109 | nodenames = list(tree.nodes.keys())
110 | S = get_summing_matrix(tree)
111 | ypred = df_to_array(forecasts, nodenames)
112 | num_bottom_level = tree.num_bottom_level
113 | bottom_pred = ypred[-num_bottom_level:, :]
114 | y = S @ bottom_pred
115 |
116 | dates = forecasts.index.tolist()
117 | results = pd.DataFrame(y, columns=dates)
118 | results["id"] = nodenames
119 | cols = [c for c in results.columns.tolist() if c != "id"]
120 | results = pd.melt(results, id_vars=["id"], value_vars=cols)
121 | results.columns = ["id", "date", "ypred_new"]
122 | return results
123 |
124 | def optimal_reconcilation(forecasts: pd.DataFrame, tree: HierarchyTree, method="ols",
125 | residuals: pd.DataFrame = None):
126 | '''
127 | Optimal Reconcilation Algorithm:
128 | 最优调和算法
129 | y_tilde = S P y_hat_bottom
130 | y_tilde = S (S^T W_h^{-1} S)^{-1} S^T W_h^{-1} y_hat_bottom
131 |
132 | S: summing matrix,反映层级汇总关系
133 | P: constraint matrix
134 | W_h: W_h = Var[y_{T+h} - y_tilde] = SP W_h P^T S^T, y_{T+h} is true value
135 |
136 | Task is to estimate W_h
137 | 1. ols: oridinary least square method,最小二乘法 W_h = k_h I
138 | 2. wls: weighted least square method,加权最小二乘法, W_h = k_h diag(W_hat1)
139 | W_hat1 = 1 / T * \sum_{t=1}^T e_t e_t^T,
140 | e_t is n dimension vector of residuals,e_t是残差/误差向量
141 | 3. nseries: W_h = k_h Omega, Omega = diag(S 1), 1 is unit vector of dimension。
142 | S列求和后取最小线
143 | 4. mint: W_h = k_h W_1, W_1 sample/residual covariance, 样本协方差矩阵,也可以用残差协方差矩阵
144 | the number of bottom-level series is much larger than T, so shrinkage covariance to
145 | diagnoal
146 |
147 | forecasts format like this:
148 | | date | all | series1 | series_2 | series1_sku1 | series2_sku1 |
149 | | 20200922 | 1000| 200 | 300 | 100 | 250 |
150 | date is index, pivot table
151 | '''
152 | nodenames = list(tree.nodes.keys())
153 | num_nodes = tree.num_nodes
154 | for name in nodenames:
155 | assert name in forecasts.columns
156 | dates = forecasts.index.tolist()
157 |
158 | S = get_summing_matrix(tree)
159 | ypred = df_to_array(forecasts, nodenames)
160 | kh = 1
161 | if method == "ols":
162 | Wh = np.eye(num_nodes) * kh
163 | if method == "wls":
164 | residuals = df_to_array(residuals, nodenames)
165 | What1 = residuals @ residuals.T
166 | diag = np.eye(num_nodes) * np.diag(What1)
167 | Wh = kh * diag
168 | if method == "nseries":
169 | diag = np.eye(num_nodes) * np.diag(np.sum(S, axis=1))
170 | Wh = kh * diag
171 | if method == "mint":
172 | residuals = df_to_array(residuals, nodenames)
173 | cov = np.cov(residuals)
174 | diag = np.eye(num_nodes) * np.diag(cov)
175 | Wh = kh * diag
176 | inv_Wh = np.linalg.inv(Wh)
177 | coef = S @ (np.linalg.inv(S.T @ inv_Wh @ S)) @ S.T @ inv_Wh
178 | y = coef @ ypred
179 |
180 | results = pd.DataFrame(y, columns=dates)
181 | results["id"] = nodenames
182 | cols = [c for c in results.columns.tolist() if c != "id"]
183 | results = pd.melt(results, id_vars=["id"], value_vars=cols)
184 | results.columns = ["id", "date", "ypred_new"]
185 | return results
186 |
187 | def df_to_array(forecasts, nodenames):
188 | '''
189 | DataFrame to array based on node names input
190 |
191 | Usage:
192 |
193 | DataFrame like this:
194 |
195 | | all | series1 | series_2 | series1_sku1 | series2_sku1 |
196 | | 1000| 200 | 300 | 100 | 250 |
197 |
198 | to Array:
199 | array([1000, 200, 300, 100, 250]).T
200 | '''
201 | forecasts = forecasts[nodenames]
202 | arr = np.asarray(forecasts).T
203 | return arr
204 |
205 | def example():
206 | data = pd.read_csv("reconcilation_test.csv")
207 | series = data.loc[~data["series"].isna() & data["sku"].isna(),
208 | ["series"]].drop_duplicates()
209 | series = series["series"].tolist()
210 | series = [s for s in series if s != "all"]
211 | skus = data.loc[~data["sku"].isna(), ["series", "sku"]].drop_duplicates()
212 | skus = (skus["series"] + "_" + skus["sku"]).tolist()
213 | # 因为stores就1个,就作为root
214 | total = {"root": series} # root对应层,是第一层
215 | skus_h = {k: [v for v in skus if v.startswith(k)] for k in series}
216 | hierarchy = {**total, **skus_h}
217 |
218 | tree = HierarchyTree.from_nodes(hierarchy)
219 |
220 | def clear_ids(ids):
221 | cols = []
222 | for c in ids:
223 | if isinstance(c, tuple) or isinstance(c, list):
224 | cols.append(c[1])
225 | else:
226 | cols.append(c)
227 | new_cols = []
228 | for c in cols:
229 | if c.endswith("_"):
230 | if c == "all_":
231 | new_cols.append("root")
232 | else:
233 | new_cols.append(c[:-1])
234 | continue
235 | new_cols.append(c)
236 | return new_cols
237 |
238 | def mape(y, ypred):
239 | y = np.array(y).ravel()
240 | ypred = np.array(ypred).ravel()
241 | return np.abs(y-ypred) / y
242 |
243 | def preprocess(df):
244 | df.fillna("", inplace=True)
245 | df.loc[:, "id"] = df.loc[:, "series"] + "_" + df.loc[:, "sku"]
246 | df["residual"] = mape(df["y"], df["ypred"])
247 | return df
248 |
249 | train_data = data[data["flag"] == "val"] # to be changed
250 | val_data = data[data["flag"] == "val"]
251 | val_data = preprocess(val_data)
252 | train_data = preprocess(train_data)
253 |
254 | forecasts = pd.pivot_table(val_data, values=["ypred"], index=["date"], columns=["id"])
255 | residuals = pd.pivot_table(val_data, values=["residual"], index=["date"], columns=["id"])
256 | history = pd.pivot_table(train_data, values=["y"], index=["date"], columns=["id"])
257 | forecasts.columns = clear_ids(forecasts.columns)
258 | residuals.columns = clear_ids(residuals.columns)
259 | history.columns = clear_ids(history.columns)
260 | val_data["id"] = clear_ids(val_data["id"])
261 |
262 | # res = optimal_reconcilation(forecasts, tree, method="mint", residuals=residuals)
263 | res = top_down(forecasts, history, tree, method="prop_hist_avg")
264 | res = pd.merge(res, val_data[["id", "y", "ypred", "date"]], how="left", on=["id", "date"])
265 | res.loc[res["id"] == "root", "id"] = "all"
266 | res["mape"] = mape(res["y"], res["ypred"])
267 | res["mape_new"] = mape(res["y"], res["ypred_new"])
268 | res[["series", "sku"]] = res["id"].str.split("_", expand=True)
269 | res.drop(columns=["id"], inplace=True)
270 | return res
271 |
272 | if __name__ == "__main__":
273 | res = example()
274 | print("result: ", res)
275 |
--------------------------------------------------------------------------------
/forecastability/README.md:
--------------------------------------------------------------------------------
1 | # Forecastability Analysis
2 |
3 | This is a tool to implement forecastability analysis, including calculating:
4 | * Frequency
5 | * Stability
6 | * Periodicity
7 | * Percent of Products that single customer occupies over 50% demands
8 |
9 | ## Input Data
10 | | columname | type | note |
11 | |---|---|---|
12 | | date | string | yyyy-mm-dd or yyyy/mm/dd |
13 | | sku_code | string | code of SKU |
14 | | customer_code | string | code of customer |
15 | | qty | float | demand quantity |
16 |
17 |
18 | ## Usage:
19 | ```python
20 | import forecastability
21 | fa = forecastability.Forecastability(data, tm="date")
22 | # calculate frequency
23 | fa.frequency()
24 | # calculate stability
25 | fa.stability()
26 | # calculate periodicity
27 | fa.periodicity()
28 | # calculate single customer percent
29 | fa.single_customer_percent()
30 | # render forecastability report
31 | fa.render("forecastability_report.html")
32 | ```
33 |
34 | ## Reference:
35 | [1] [时间周期序列周期性挖掘](https://wenku.baidu.com/view/8ad300afb8f67c1cfad6b87a.html)
36 |
37 | [2] [供应链三道防线:需求预测,库存计划,供应链执行](https://book.douban.com/subject/30223850/)
38 |
39 | [3] [Hyndman, R. J., & Athanasopoulos, G. (2018). Forecasting: principles and practice. OTexts.](https://otexts.com/fpp2/)
40 |
--------------------------------------------------------------------------------
/forecastability/build/lib/forecastability/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecastability/build/lib/forecastability/__init__.py
--------------------------------------------------------------------------------
/forecastability/build/lib/forecastability/forecastability.py:
--------------------------------------------------------------------------------
1 | #-*-coding:utf-8-*-
2 | #@Author: Jing Wang
3 | #@Date: 2020-10-29 16:51:06
4 | #@Last Modified by: Jing Wang
5 | #@Last Modified time: 2020-10-29 16:51:06
6 | #@reference:
7 |
8 | '''
9 | Calculate forecastability and output report
10 | '''
11 | import util
12 | import period_detect
13 | from joblib import Parallel, delayed, parallel_backend
14 | import pandas as pd
15 |
16 | class Forecastability:
17 |
18 | def __init__(self, data, tm="date"):
19 | '''
20 | Args:
21 | data (data frame): with columns \
22 | ["date", "sku_code", "customer_code", "qty"]
23 | tm (str): time dimension, ["date", "week", "month", "year"]
24 | '''
25 | self.data = data
26 | if tm not in ["date", "week", "month", "year"]:
27 | raise Exception("Time dimension is invalid!")
28 | self.tm = tm
29 |
30 | def preprocess(self):
31 | '''
32 | Create necessary time dimension
33 | '''
34 | self.data["date"] = self.data["date"].apply(util.date_converter)
35 | self.data["year"] = self.data["date"].apply(lambda x: str(x.year))
36 | if self.tm == "week":
37 | self.data["week"] = self.data["date"].apply(lambda x: str(x.isocalendar()[1])
38 | if x.isocalendar()[1] > 9 else "0" + str(x.isocalendar()[1]))
39 | self.data["week"] = int(self.data["year"] + self.data["week"])
40 | if self.tm == "month":
41 | self.data["month"] = self.data["month"].apply(lambda x: str(x.month)
42 | if x.month > 9 else "0" + str(x.month))
43 | self.data["month"] = int(self.data["year"] + self.data["month"])
44 |
45 |
46 | def frequency(self, high=0.75, low=0.3):
47 | '''
48 | Calculate frequency of products
49 | Args:
50 | high (float): high bar for high frequency
51 | low (float): low bar for extremely low frequency
52 | '''
53 |
54 | # calculate frequency
55 | sku_date_count = self.data.groupby(["sku_code"])[self.tm].apply(lambda x: len(set(x))).reset_index()
56 | sku_date_count.columns = ["sku_code", "tm_stats"]
57 | tot_tm = len(self.data[self.tm].unique())
58 | sku_date_count["freq_stats"] = sku_date_count["tm_stats"] / tot_tm
59 |
60 | # split to high, low and extreme low
61 | def freq_split(x):
62 | if x >= high:
63 | return "高频"
64 | elif x >= low:
65 | return "低频"
66 | return "极端低频"
67 |
68 | sku_date_count["frequency"] = sku_date_count["freq_stats"].apply(freq_split)
69 | self.freq = sku_date_count[["sku_code", "frequency"]]
70 | return self.freq
71 |
72 | def stability(self, high=5, low=0.7):
73 | '''
74 | Calculate stability of products
75 | Args:
76 | high (float): high bar for extremely unstable
77 | low (float): low bar for stable
78 | '''
79 | # calculate stability
80 | groupby_demand = self.data.groupby(["sku_code", self.tm])["qty"].sum().reset_index()
81 | groupby_demand = groupby_demand.groupby(["sku_code"]).agg(["mean", "std"]).reset_index()
82 | groupby_demand.columns = ["sku_code", "mean", "std"]
83 | groupby_demand["cv"] = groupby_demand["std"] / groupby_demand["mean"]
84 |
85 | # split stability
86 | def stable_split(x):
87 | if x < low:
88 | return "稳定"
89 | elif x < high:
90 | return "不稳定"
91 | return "极端不稳定"
92 |
93 | groupby_demand["stability"] = groupby_demand["cv"].apply(stable_split)
94 | self.stable = groupby_demand[["sku_code", "stability"]]
95 | return self.stable
96 |
97 | def periodicity(self, threshold=0.8):
98 | '''
99 | Calculate periodicity based on threshold of confidence
100 | '''
101 | groupby_demand = self.data.groupby(["sku_code", self.tm])["qty"].sum().reset_index()
102 | groupby_demand = util.fill_ts(groupby_demand, self.tm)
103 | groupby_demand.sort_values(self.tm, inplace=True)
104 |
105 | skus = groupby_demand["sku_code"].unique().tolist()
106 | print("number of skus: ", len(skus))
107 | with parallel_backend("multiprocessing", n_jobs=-1):
108 | results = Parallel()(delayed(self.single_period_detection)(groupby_demand,
109 | sku, threshold, i) for i, sku in enumerate(skus))
110 | result = pd.concat(results, axis=0)
111 | result = result[result["periodicity"].apply(lambda x: len(x) > 0)]
112 | self.period = result
113 | return self.period
114 |
115 | def single_period_detection(self, groupby_demand, sku, threshold, counter):
116 | sku_demand = groupby_demand[groupby_demand["sku_code"] == sku]["qty"].tolist()
117 | period_res = period_detect.solve(sku_demand, threshold, method="dp")
118 | if period_res is None or len(period_res) == 0:
119 | return pd.DataFrame()
120 | period_res = {key: score for key, score in period_res.items() if len(set(key)) > 1}
121 | res = pd.DataFrame([[sku, period_res]], columns=["sku_code", "periodicity"])
122 | return res
123 |
124 | def single_customer_percent(self, percent=0.5):
125 | '''
126 | Calculate percent of single customer for different products
127 | Args:
128 | percent (float): percent threshold of single customer
129 | '''
130 | # week frequency 周频率计算
131 | if self.tm != "week":
132 | self.data["week"] = self.data["date"].apply(lambda x: str(x.isocalendar()[1])
133 | if x.isocalendar()[1] > 9 else "0" + str(x.isocalendar()[1]))
134 | self.data["week"] = self.data["year"] + self.data["week"]
135 | groupby_sku = self.data.groupby(["sku_code"])["week"].apply(lambda x: len(set(x))).reset_index()
136 | groupby_sku.columns = ["sku_code", "n_weeks"]
137 |
138 | # customer percent
139 | groupby_cust = self.data.groupby(["sku_code", "customer_code"])["qty"].sum().reset_index()
140 | groupby_cust.columns = ["sku_code", "customer_code", "customer_qty"]
141 | groupby_sku_sum = self.data.groupby(["sku_code"])["qty"].sum().reset_index()
142 | groupby_sku_sum.columns = ["sku_code", "qty_sum"]
143 | groupby_cust = pd.merge(groupby_cust, groupby_sku_sum, on="sku_code", how="left")
144 | groupby_cust["customer_percent"] = groupby_cust["customer_qty"] / groupby_cust["qty_sum"]
145 |
146 | # single customer percent 单一客户占比超过percent的产品比例
147 | merge_df = pd.merge(groupby_sku, groupby_cust[["sku_code",
148 | "customer_percent"]], on="sku_code", how="inner")
149 | filter_merge_df = merge_df[merge_df["customer_percent"] > percent]
150 |
151 | merge_df = merge_df.groupby(["n_weeks"])["sku_code"].apply(lambda x: len(set(x))).reset_index()
152 | merge_df.columns = ["n_weeks", "n_skus"]
153 | filter_merge_df = filter_merge_df.groupby(["n_weeks"])["sku_code"].apply(lambda x: len(set(x))).reset_index()
154 | filter_merge_df.columns = ["n_weeks", "sat_n_skus"]
155 |
156 | result = pd.merge(merge_df, filter_merge_df, on="n_weeks", how="inner")
157 | result["sku_percent"] = result["sat_n_skus"] / result["n_skus"]
158 | self.single_customer = result[["n_weeks", "sku_percent"]]
159 | return self.single_customer
160 |
161 | def render(self, filename="forecastability_report"):
162 |
163 | file = open("{}.html".format(filename), "w")
164 |
165 | # 频率和稳定性表
166 | if self.freq is not None and self.stable is not None:
167 | merge_df = pd.merge(self.freq, self.stable, on="sku_code", how="inner")
168 | high_freq = merge_df[merge_df["frequency"] == "高频"]
169 | high_stable = len(high_freq[high_freq["stability"] == "稳定"])
170 | high_unstable = len(high_freq[high_freq["stability"] == "不稳定"])
171 | high_xunstable = len(high_freq[high_freq["stability"] == "极端不稳定"])
172 |
173 | low_freq = merge_df[merge_df["frequency"] == "低频"]
174 | low_stable = len(low_freq[low_freq["stability"] == "稳定"])
175 | low_unstable = len(low_freq[low_freq["stability"] == "不稳定"])
176 | low_xunstable = len(low_freq[low_freq["stability"] == "极端不稳定"])
177 |
178 | xlow_freq = merge_df[merge_df["frequency"] == "极端低频"]
179 | xlow_stable = len(xlow_freq[xlow_freq["stability"] == "稳定"])
180 | xlow_unstable = len(xlow_freq[xlow_freq["stability"] == "不稳定"])
181 | xlow_xunstable = len(xlow_freq[xlow_freq["stability"] == "极端不稳定"])
182 |
183 | n_stable = len(merge_df[merge_df["stability"] == "稳定"])
184 | n_unstable = len(merge_df[merge_df["stability"] == "不稳定"])
185 | n_xunstable = len(merge_df[merge_df["stability"] == "极端不稳定"])
186 |
187 | start = '''
188 |
189 |
190 |
191 | Forecastability Report
192 |
193 |
194 |
195 | '''
196 | headers = ["频率/稳定性", "稳定", "不稳定", "极端不稳定", "总计"]
197 | rows = [
198 | ["高频", high_stable, high_unstable, high_xunstable, len(high_freq)],
199 | ["低频", low_stable, low_unstable, low_xunstable, len(low_freq)],
200 | ["极端低频", xlow_stable, xlow_unstable, xlow_xunstable, len(xlow_freq)],
201 | ["总计", n_stable, n_unstable, n_xunstable, len(merge_df)]
202 | ]
203 | freq_stable_table = util.get_table(headers, rows, "频率和稳定性统计表")
204 | else:
205 | freq_stable_table = ""
206 |
207 | # 周期性表
208 | if self.period is not None:
209 | headers = ["SKU编码", "周期性结果"]
210 | rows = self.period.values.tolist()
211 | period_table = util.get_table(headers, rows, "周期性识别结果表")
212 | else:
213 | period_table = ""
214 |
215 | end = ''
216 |
217 | # 单一客户占比图
218 | if self.single_customer is not None:
219 | x = self.single_customer["n_weeks"].tolist()
220 | y = [round(s * 100, 2) for s in self.single_customer["sku_percent"].tolist()]
221 | line_charts = util.get_line_charts(x, y, title="单一客户占比超过50%SKU比例和SKU频率图",
222 | xname="有需求的周数", yname="单一客户占比超过50%的SKU比例")
223 | else:
224 | line_charts = ""
225 |
226 | file.write(start + freq_stable_table + period_table + line_charts + end)
227 | file.close()
228 |
229 |
230 | if __name__ == "__main__":
231 | filename = "forecastability_test.csv"
232 | data = pd.read_csv(filename)
233 | data = data[:40000]
234 | fa = Forecastability(data)
235 | fa.preprocess()
236 | fa.frequency()
237 | fa.stability()
238 | fa.periodicity()
239 | fa.single_customer_percent()
240 | # result = fa.single_customer_percent()
241 | fa.render()
242 | # import matplotlib.pyplot as plt
243 | # plt.plot(result["n_weeks"], result["sku_percent"])
244 | # plt.show()
245 |
--------------------------------------------------------------------------------
/forecastability/build/lib/forecastability/period_detect.py:
--------------------------------------------------------------------------------
1 | # /usr/bin/env python 3.6
2 | # -*-coding:utf-8-*-
3 |
4 | '''
5 | Period Detection Solver
6 |
7 | Reference link:
8 | https://wenku.baidu.com/view/8ad300afb8f67c1cfad6b87a.html
9 |
10 | Author: Jing Wang (jingw2@foxmail.com)
11 | '''
12 | import numpy as np
13 |
14 | ## algorithm
15 | def recurse(n, m):
16 | '''
17 | recursion method
18 | find minimum ERP distance from (n - p - 1, n - 1) to (0, p)
19 |
20 | Args:
21 | n (int): starting row index
22 | m (int): starting column index
23 |
24 | Return:
25 | d (int): minimum ERP distance
26 | '''
27 | cache = {}
28 | d = 0
29 | if (n, m) in cache:
30 | return cache[(n, m)]
31 | if n == 0 and m == p:
32 | d += matrix[n][m]
33 | elif n == 0 and m > p:
34 | d += recurse(n, m - 1) + matrix[n, m]
35 | else:
36 | d += min([recurse(n-1, m-1), recurse(n-1, m)]) + matrix[n][m]
37 | cache[(n, m)] = d
38 | return d
39 |
40 | def dp(n, m):
41 | '''
42 | dynamic programming
43 | find minimum ERP distance from (n - p - 1, n - 1) to (0, p)
44 |
45 | Args:
46 | n (int): starting row index
47 | m (int): starting column index
48 |
49 | Return:
50 | minimum ERP distance
51 | '''
52 | nr, nc = matrix.shape
53 | d = np.zeros((nr, nc))
54 | for i in range(n, -1, -1):
55 | for j in range(m, p - 1, -1):
56 | if i < nr - 1 and j < nc - 1:
57 | valid = []
58 | if (j - i - 1) >= (m - n):
59 | valid.append(d[i + 1, j])
60 | if (j + 1 - i) >= (m - n):
61 | valid.append(d[i, j + 1])
62 | if (j - i) >= (m - n):
63 | valid.append(d[i + 1, j + 1])
64 | if len(valid) > 0:
65 | d[i, j] = min(valid) + matrix[i][j]
66 | else:
67 | d[i, j] = matrix[i][j]
68 | elif i < nr - 1 and j == nc - 1:
69 | if (j - i - 1) >= (m - n):
70 | d[i, j] = d[i + 1, j]+ matrix[i][j]
71 | else:
72 | d[i, j] = matrix[i, j]
73 | elif i == nr - 1 and j < nc - 1:
74 | if (j + 1 - i) >= (m - n):
75 | d[i, j] = d[i, j + 1] + matrix[i][j]
76 | else:
77 | d[i, j] = matrix[i, j]
78 |
79 |
80 | return d[0, p]
81 |
82 | def solve(s, threshold, method = "dp"):
83 | '''
84 | solve function
85 | '''
86 |
87 | # check
88 | if len(s) == 0 or len(s) == 1:
89 | return None
90 |
91 | try:
92 | s[0]
93 | s[0:]
94 | except:
95 | raise Exception("Please make sure input can be sliced!")
96 |
97 | # generate distance matrix
98 | global matrix, p
99 | n = len(s)
100 | matrix = np.zeros((n, n))
101 | for i in range(n):
102 | for j in range(n):
103 | if i == j:
104 | matrix[i, j] = float("inf") # leave the main diagonal
105 | continue
106 | if s[i] == s[j]:
107 | matrix[i, j] = 0
108 | else:
109 | matrix[i, j] = 1
110 |
111 | result = {}
112 | for p in range(1, n // 2 + 1):
113 | if method == "dp":
114 | d = dp(n - p - 1, n - 1)
115 | else:
116 | d = recurse(len(s) - p - 1, n - 1)
117 | confidence = (n - p - d) / (n - p)
118 |
119 | if confidence > threshold:
120 | result[tuple(s[:p])] = round(confidence, 3)
121 |
122 | return result
123 |
124 |
125 | s = "ababac"
126 | if __name__ == '__main__':
127 | print(solve(s, 0.7))
128 |
--------------------------------------------------------------------------------
/forecastability/build/lib/forecastability/util.py:
--------------------------------------------------------------------------------
1 | #-*-coding:utf-8-*-
2 | from datetime import datetime
3 | import pandas as pd
4 |
5 |
6 | def date_converter(x):
7 | '''
8 | 转换为日期格式
9 | '''
10 | if x is None:
11 | return x
12 | try:
13 | x = str(x)
14 | except Exception:
15 | return x
16 |
17 | try:
18 | return datetime.strptime(x, "%Y-%m-%d")
19 | except Exception:
20 | try:
21 | return datetime.strptime(x, "%Y/%m/%d")
22 | except Exception:
23 | try:
24 | return datetime.strptime(x, "%Y%m%d")
25 | except Exception:
26 | return x
27 |
28 |
29 | def date_parser(x):
30 | '''
31 | 日期格式转换为string
32 | '''
33 | if not isinstance(x, datetime):
34 | return None
35 |
36 | try:
37 | return x.strftime("%Y-%m-%d")
38 | except Exception:
39 | try:
40 | return x.strptime("%Y/%m/%d")
41 | except Exception:
42 | try:
43 | return x.strptime("%Y%m%d")
44 | except Exception:
45 | return None
46 |
47 |
48 | def fill_ts(data, tm):
49 | '''
50 | 填充时间序列,只保留两列,[ts, y]
51 | '''
52 | data[tm] = data[tm].apply(date_parser)
53 | if tm == "date":
54 | min_dt = date_converter(data[tm].min())
55 | max_dt = date_converter(data[tm].max())
56 | tm_list = [date_parser(x) for x in pd.date_range(start=min_dt, end=max_dt)]
57 | else:
58 | min_dt = data[tm].min()
59 | max_dt = data[tm].max()
60 | tm_list = list(range(min_dt, max_dt+1))
61 | tm_df = pd.DataFrame(tm_list, columns=[tm])
62 | df = pd.merge(tm_df, data[[tm, "sku_code", "qty"]], on=tm, how="left")
63 | df["qty"].fillna(0, inplace=True)
64 | return df
65 |
66 |
67 | def get_table(headers, rows, tablename):
68 | table_style = '''
69 |
127 |
128 | '''
129 | title = '{}
'.format(tablename)
130 | table = ''
131 | for h in headers:
132 | table += " {} | ".format(h)
133 | table += '
'
134 |
135 | for r in rows:
136 | table += ''
137 | for ele in r:
138 | table += '{} | '.format(ele)
139 | table += '
'
140 | end = '
'
141 |
142 | return table_style + title + table + end
143 |
144 | def get_line_charts(x, y, title, xname, yname):
145 | params = {
146 | 'title': {
147 | 'text': title,
148 | },
149 | 'toolbox': {
150 | 'show': 'true',
151 | 'orient': 'vertical',
152 | 'left': 'right',
153 | 'top': 'center',
154 | 'feature': {
155 | 'mark': {'show': 'true'},
156 | 'dataView': {'show': 'true', 'readOnly': 'false'},
157 | 'magicType': {'show': 'true', 'type': ['line', 'bar', 'stack', 'tiled']},
158 | 'restore': {'show': 'true'},
159 | 'saveAsImage': {'show': 'true'}
160 | }
161 | },
162 | 'tooltip': {
163 | "show": 'true',
164 | 'trigger': 'axis'
165 | },
166 | 'legend': {
167 | 'data': []
168 | },
169 | 'xAxis': {
170 | 'data': x,
171 | 'name': xname,
172 | "nameLocation": "middle",
173 | "nameGap": 25,
174 | 'nameTextStyle': {
175 | 'fontSize': 14
176 | }
177 | },
178 | 'yAxis': {
179 | 'name': yname,
180 | 'type': 'value',
181 | "nameLocation": "middle",
182 | "nameGap": 40,
183 | 'nameTextStyle': {
184 | 'fontSize': 14
185 | },
186 | "axisLabel": {
187 | "show": 'true',
188 | "position": "right",
189 | "margin": 8,
190 | "formatter": "{value}%"
191 | }
192 | },
193 | 'series': [{
194 | 'name': "",
195 | 'type': 'line',
196 | 'data': y
197 | }]
198 | }
199 | chart = ('''
200 |
201 |
211 | ''' % str(params))
212 | return chart
213 |
--------------------------------------------------------------------------------
/forecastability/dist/forecastability-0.0.2-py3-none-any.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecastability/dist/forecastability-0.0.2-py3-none-any.whl
--------------------------------------------------------------------------------
/forecastability/dist/forecastability-0.0.2-py3.6.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecastability/dist/forecastability-0.0.2-py3.6.egg
--------------------------------------------------------------------------------
/forecastability/dist/forecastability-0.0.2.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecastability/dist/forecastability-0.0.2.tar.gz
--------------------------------------------------------------------------------
/forecastability/forecastability.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
1 | Metadata-Version: 2.1
2 | Name: forecastability
3 | Version: 0.0.2
4 | Summary: forecastability analysis
5 | Home-page: https://github.com/jingw2/solver/tree/master/forecastability
6 | Author: Jing Wang
7 | Author-email: jingw2@foxmail.com
8 | License: MIT
9 | Description: # Forecastability Analysis
10 |
11 | This is a tool to implement forecastability analysis, including calculating:
12 | * Frequency
13 | * Stability
14 | * Periodicity
15 | * Percent of Products that single customer occupies over 50% demands
16 |
17 | ## Input Data
18 | | columname | type | note |
19 | |---|---|---|
20 | | date | string | yyyy-mm-dd or yyyy/mm/dd |
21 | | sku_code | string | code of SKU |
22 | | customer_code | string | code of customer |
23 | | qty | float | demand quantity |
24 |
25 |
26 | ## Usage:
27 | ```python
28 | import forecastability
29 | fa = forecastability.Forecastability(data, tm="date")
30 | # calculate frequency
31 | fa.frequency()
32 | # calculate stability
33 | fa.stability()
34 | # calculate periodicity
35 | fa.periodicity()
36 | # calculate single customer percent
37 | fa.single_customer_percent()
38 | # render forecastability report
39 | fa.render("forecastability_report.html")
40 | ```
41 |
42 | ## Reference:
43 | [1] [时间周期序列周期性挖掘](https://wenku.baidu.com/view/8ad300afb8f67c1cfad6b87a.html)
44 |
45 | [2] [供应链三道防线:需求预测,库存计划,供应链执行](https://book.douban.com/subject/30223850/)
46 |
47 | [3] [Hyndman, R. J., & Athanasopoulos, G. (2018). Forecasting: principles and practice. OTexts.](https://otexts.com/fpp2/)
48 |
49 | Platform: UNKNOWN
50 | Requires-Python: >=3.6
51 | Description-Content-Type: text/markdown
52 |
--------------------------------------------------------------------------------
/forecastability/forecastability.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
1 | README.md
2 | setup.py
3 | forecastability/__init__.py
4 | forecastability/forecastability.py
5 | forecastability/period_detect.py
6 | forecastability/util.py
7 | forecastability.egg-info/PKG-INFO
8 | forecastability.egg-info/SOURCES.txt
9 | forecastability.egg-info/dependency_links.txt
10 | forecastability.egg-info/requires.txt
11 | forecastability.egg-info/top_level.txt
--------------------------------------------------------------------------------
/forecastability/forecastability.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/forecastability/forecastability.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | joblib>=0.15.1
2 | numpy>=1.14.6
3 | pandas>=0.25.3
4 |
--------------------------------------------------------------------------------
/forecastability/forecastability.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | forecastability
2 |
--------------------------------------------------------------------------------
/forecastability/forecastability/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/forecastability/forecastability/__init__.py
--------------------------------------------------------------------------------
/forecastability/forecastability/forecastability.py:
--------------------------------------------------------------------------------
1 | #-*-coding:utf-8-*-
2 | #@Author: Jing Wang
3 | #@Date: 2020-10-29 16:51:06
4 | #@Last Modified by: Jing Wang
5 | #@Last Modified time: 2020-10-29 16:51:06
6 | #@reference:
7 |
8 | '''
9 | Calculate forecastability and output report
10 | '''
11 | import util
12 | import period_detect
13 | from joblib import Parallel, delayed, parallel_backend
14 | import pandas as pd
15 |
16 | class Forecastability:
17 |
18 | def __init__(self, data, tm="date"):
19 | '''
20 | Args:
21 | data (data frame): with columns \
22 | ["date", "sku_code", "customer_code", "qty"]
23 | tm (str): time dimension, ["date", "week", "month", "year"]
24 | '''
25 | self.data = data
26 | if tm not in ["date", "week", "month", "year"]:
27 | raise Exception("Time dimension is invalid!")
28 | self.tm = tm
29 |
30 | def preprocess(self):
31 | '''
32 | Create necessary time dimension
33 | '''
34 | self.data["date"] = self.data["date"].apply(util.date_converter)
35 | self.data["year"] = self.data["date"].apply(lambda x: str(x.year))
36 | if self.tm == "week":
37 | self.data["week"] = self.data["date"].apply(lambda x: str(x.isocalendar()[1])
38 | if x.isocalendar()[1] > 9 else "0" + str(x.isocalendar()[1]))
39 | self.data["week"] = int(self.data["year"] + self.data["week"])
40 | if self.tm == "month":
41 | self.data["month"] = self.data["month"].apply(lambda x: str(x.month)
42 | if x.month > 9 else "0" + str(x.month))
43 | self.data["month"] = int(self.data["year"] + self.data["month"])
44 |
45 |
46 | def frequency(self, high=0.75, low=0.3):
47 | '''
48 | Calculate frequency of products
49 | Args:
50 | high (float): high bar for high frequency
51 | low (float): low bar for extremely low frequency
52 | '''
53 |
54 | # calculate frequency
55 | sku_date_count = self.data.groupby(["sku_code"])[self.tm].apply(lambda x: len(set(x))).reset_index()
56 | sku_date_count.columns = ["sku_code", "tm_stats"]
57 | tot_tm = len(self.data[self.tm].unique())
58 | sku_date_count["freq_stats"] = sku_date_count["tm_stats"] / tot_tm
59 |
60 | # split to high, low and extreme low
61 | def freq_split(x):
62 | if x >= high:
63 | return "高频"
64 | elif x >= low:
65 | return "低频"
66 | return "极端低频"
67 |
68 | sku_date_count["frequency"] = sku_date_count["freq_stats"].apply(freq_split)
69 | self.freq = sku_date_count[["sku_code", "frequency"]]
70 | return self.freq
71 |
72 | def stability(self, high=5, low=0.7):
73 | '''
74 | Calculate stability of products
75 | Args:
76 | high (float): high bar for extremely unstable
77 | low (float): low bar for stable
78 | '''
79 | # calculate stability
80 | groupby_demand = self.data.groupby(["sku_code", self.tm])["qty"].sum().reset_index()
81 | groupby_demand = groupby_demand.groupby(["sku_code"]).agg(["mean", "std"]).reset_index()
82 | groupby_demand.columns = ["sku_code", "mean", "std"]
83 | groupby_demand["cv"] = groupby_demand["std"] / groupby_demand["mean"]
84 |
85 | # split stability
86 | def stable_split(x):
87 | if x < low:
88 | return "稳定"
89 | elif x < high:
90 | return "不稳定"
91 | return "极端不稳定"
92 |
93 | groupby_demand["stability"] = groupby_demand["cv"].apply(stable_split)
94 | self.stable = groupby_demand[["sku_code", "stability"]]
95 | return self.stable
96 |
97 | def periodicity(self, threshold=0.8):
98 | '''
99 | Calculate periodicity based on threshold of confidence
100 | '''
101 | groupby_demand = self.data.groupby(["sku_code", self.tm])["qty"].sum().reset_index()
102 | groupby_demand = util.fill_ts(groupby_demand, self.tm)
103 | groupby_demand.sort_values(self.tm, inplace=True)
104 |
105 | skus = groupby_demand["sku_code"].unique().tolist()
106 | print("number of skus: ", len(skus))
107 | with parallel_backend("multiprocessing", n_jobs=-1):
108 | results = Parallel()(delayed(self.single_period_detection)(groupby_demand,
109 | sku, threshold, i) for i, sku in enumerate(skus))
110 | result = pd.concat(results, axis=0)
111 | result = result[result["periodicity"].apply(lambda x: len(x) > 0)]
112 | self.period = result
113 | return self.period
114 |
115 | def single_period_detection(self, groupby_demand, sku, threshold, counter):
116 | sku_demand = groupby_demand[groupby_demand["sku_code"] == sku]["qty"].tolist()
117 | period_res = period_detect.solve(sku_demand, threshold, method="dp")
118 | if period_res is None or len(period_res) == 0:
119 | return pd.DataFrame()
120 | period_res = {key: score for key, score in period_res.items() if len(set(key)) > 1}
121 | res = pd.DataFrame([[sku, period_res]], columns=["sku_code", "periodicity"])
122 | return res
123 |
124 | def single_customer_percent(self, percent=0.5):
125 | '''
126 | Calculate percent of single customer for different products
127 | Args:
128 | percent (float): percent threshold of single customer
129 | '''
130 | # week frequency 周频率计算
131 | if self.tm != "week":
132 | self.data["week"] = self.data["date"].apply(lambda x: str(x.isocalendar()[1])
133 | if x.isocalendar()[1] > 9 else "0" + str(x.isocalendar()[1]))
134 | self.data["week"] = self.data["year"] + self.data["week"]
135 | groupby_sku = self.data.groupby(["sku_code"])["week"].apply(lambda x: len(set(x))).reset_index()
136 | groupby_sku.columns = ["sku_code", "n_weeks"]
137 |
138 | # customer percent
139 | groupby_cust = self.data.groupby(["sku_code", "customer_code"])["qty"].sum().reset_index()
140 | groupby_cust.columns = ["sku_code", "customer_code", "customer_qty"]
141 | groupby_sku_sum = self.data.groupby(["sku_code"])["qty"].sum().reset_index()
142 | groupby_sku_sum.columns = ["sku_code", "qty_sum"]
143 | groupby_cust = pd.merge(groupby_cust, groupby_sku_sum, on="sku_code", how="left")
144 | groupby_cust["customer_percent"] = groupby_cust["customer_qty"] / groupby_cust["qty_sum"]
145 |
146 | # single customer percent 单一客户占比超过percent的产品比例
147 | merge_df = pd.merge(groupby_sku, groupby_cust[["sku_code",
148 | "customer_percent"]], on="sku_code", how="inner")
149 | filter_merge_df = merge_df[merge_df["customer_percent"] > percent]
150 |
151 | merge_df = merge_df.groupby(["n_weeks"])["sku_code"].apply(lambda x: len(set(x))).reset_index()
152 | merge_df.columns = ["n_weeks", "n_skus"]
153 | filter_merge_df = filter_merge_df.groupby(["n_weeks"])["sku_code"].apply(lambda x: len(set(x))).reset_index()
154 | filter_merge_df.columns = ["n_weeks", "sat_n_skus"]
155 |
156 | result = pd.merge(merge_df, filter_merge_df, on="n_weeks", how="inner")
157 | result["sku_percent"] = result["sat_n_skus"] / result["n_skus"]
158 | self.single_customer = result[["n_weeks", "sku_percent"]]
159 | return self.single_customer
160 |
161 | def render(self, filename="forecastability_report"):
162 |
163 | file = open("{}.html".format(filename), "w")
164 |
165 | # 频率和稳定性表
166 | if self.freq is not None and self.stable is not None:
167 | merge_df = pd.merge(self.freq, self.stable, on="sku_code", how="inner")
168 | high_freq = merge_df[merge_df["frequency"] == "高频"]
169 | high_stable = len(high_freq[high_freq["stability"] == "稳定"])
170 | high_unstable = len(high_freq[high_freq["stability"] == "不稳定"])
171 | high_xunstable = len(high_freq[high_freq["stability"] == "极端不稳定"])
172 |
173 | low_freq = merge_df[merge_df["frequency"] == "低频"]
174 | low_stable = len(low_freq[low_freq["stability"] == "稳定"])
175 | low_unstable = len(low_freq[low_freq["stability"] == "不稳定"])
176 | low_xunstable = len(low_freq[low_freq["stability"] == "极端不稳定"])
177 |
178 | xlow_freq = merge_df[merge_df["frequency"] == "极端低频"]
179 | xlow_stable = len(xlow_freq[xlow_freq["stability"] == "稳定"])
180 | xlow_unstable = len(xlow_freq[xlow_freq["stability"] == "不稳定"])
181 | xlow_xunstable = len(xlow_freq[xlow_freq["stability"] == "极端不稳定"])
182 |
183 | n_stable = len(merge_df[merge_df["stability"] == "稳定"])
184 | n_unstable = len(merge_df[merge_df["stability"] == "不稳定"])
185 | n_xunstable = len(merge_df[merge_df["stability"] == "极端不稳定"])
186 |
187 | start = '''
188 |
189 |
190 |
191 | Forecastability Report
192 |
193 |
194 |
195 | '''
196 | headers = ["频率/稳定性", "稳定", "不稳定", "极端不稳定", "总计"]
197 | rows = [
198 | ["高频", high_stable, high_unstable, high_xunstable, len(high_freq)],
199 | ["低频", low_stable, low_unstable, low_xunstable, len(low_freq)],
200 | ["极端低频", xlow_stable, xlow_unstable, xlow_xunstable, len(xlow_freq)],
201 | ["总计", n_stable, n_unstable, n_xunstable, len(merge_df)]
202 | ]
203 | freq_stable_table = util.get_table(headers, rows, "频率和稳定性统计表")
204 | else:
205 | freq_stable_table = ""
206 |
207 | # 周期性表
208 | if self.period is not None:
209 | headers = ["SKU编码", "周期性结果"]
210 | rows = self.period.values.tolist()
211 | period_table = util.get_table(headers, rows, "周期性识别结果表")
212 | else:
213 | period_table = ""
214 |
215 | end = ''
216 |
217 | # 单一客户占比图
218 | if self.single_customer is not None:
219 | x = self.single_customer["n_weeks"].tolist()
220 | y = [round(s * 100, 2) for s in self.single_customer["sku_percent"].tolist()]
221 | line_charts = util.get_line_charts(x, y, title="单一客户占比超过50%SKU比例和SKU频率图",
222 | xname="有需求的周数", yname="单一客户占比超过50%的SKU比例")
223 | else:
224 | line_charts = ""
225 |
226 | file.write(start + freq_stable_table + period_table + line_charts + end)
227 | file.close()
228 |
229 |
230 | if __name__ == "__main__":
231 | filename = "forecastability_test.csv"
232 | data = pd.read_csv(filename)
233 | data = data[:40000]
234 | fa = Forecastability(data)
235 | fa.preprocess()
236 | fa.frequency()
237 | fa.stability()
238 | fa.periodicity()
239 | fa.single_customer_percent()
240 | # result = fa.single_customer_percent()
241 | fa.render()
242 | # import matplotlib.pyplot as plt
243 | # plt.plot(result["n_weeks"], result["sku_percent"])
244 | # plt.show()
245 |
--------------------------------------------------------------------------------
/forecastability/forecastability/period_detect.py:
--------------------------------------------------------------------------------
1 | # /usr/bin/env python 3.6
2 | # -*-coding:utf-8-*-
3 |
4 | '''
5 | Period Detection Solver
6 |
7 | Reference link:
8 | https://wenku.baidu.com/view/8ad300afb8f67c1cfad6b87a.html
9 |
10 | Author: Jing Wang (jingw2@foxmail.com)
11 | '''
12 | import numpy as np
13 |
14 | ## algorithm
15 | def recurse(n, m):
16 | '''
17 | recursion method
18 | find minimum ERP distance from (n - p - 1, n - 1) to (0, p)
19 |
20 | Args:
21 | n (int): starting row index
22 | m (int): starting column index
23 |
24 | Return:
25 | d (int): minimum ERP distance
26 | '''
27 | cache = {}
28 | d = 0
29 | if (n, m) in cache:
30 | return cache[(n, m)]
31 | if n == 0 and m == p:
32 | d += matrix[n][m]
33 | elif n == 0 and m > p:
34 | d += recurse(n, m - 1) + matrix[n, m]
35 | else:
36 | d += min([recurse(n-1, m-1), recurse(n-1, m)]) + matrix[n][m]
37 | cache[(n, m)] = d
38 | return d
39 |
40 | def dp(n, m):
41 | '''
42 | dynamic programming
43 | find minimum ERP distance from (n - p - 1, n - 1) to (0, p)
44 |
45 | Args:
46 | n (int): starting row index
47 | m (int): starting column index
48 |
49 | Return:
50 | minimum ERP distance
51 | '''
52 | nr, nc = matrix.shape
53 | d = np.zeros((nr, nc))
54 | for i in range(n, -1, -1):
55 | for j in range(m, p - 1, -1):
56 | if i < nr - 1 and j < nc - 1:
57 | valid = []
58 | if (j - i - 1) >= (m - n):
59 | valid.append(d[i + 1, j])
60 | if (j + 1 - i) >= (m - n):
61 | valid.append(d[i, j + 1])
62 | if (j - i) >= (m - n):
63 | valid.append(d[i + 1, j + 1])
64 | if len(valid) > 0:
65 | d[i, j] = min(valid) + matrix[i][j]
66 | else:
67 | d[i, j] = matrix[i][j]
68 | elif i < nr - 1 and j == nc - 1:
69 | if (j - i - 1) >= (m - n):
70 | d[i, j] = d[i + 1, j]+ matrix[i][j]
71 | else:
72 | d[i, j] = matrix[i, j]
73 | elif i == nr - 1 and j < nc - 1:
74 | if (j + 1 - i) >= (m - n):
75 | d[i, j] = d[i, j + 1] + matrix[i][j]
76 | else:
77 | d[i, j] = matrix[i, j]
78 |
79 |
80 | return d[0, p]
81 |
82 | def solve(s, threshold, method = "dp"):
83 | '''
84 | solve function
85 | '''
86 |
87 | # check
88 | if len(s) == 0 or len(s) == 1:
89 | return None
90 |
91 | try:
92 | s[0]
93 | s[0:]
94 | except:
95 | raise Exception("Please make sure input can be sliced!")
96 |
97 | # generate distance matrix
98 | global matrix, p
99 | n = len(s)
100 | matrix = np.zeros((n, n))
101 | for i in range(n):
102 | for j in range(n):
103 | if i == j:
104 | matrix[i, j] = float("inf") # leave the main diagonal
105 | continue
106 | if s[i] == s[j]:
107 | matrix[i, j] = 0
108 | else:
109 | matrix[i, j] = 1
110 |
111 | result = {}
112 | for p in range(1, n // 2 + 1):
113 | if method == "dp":
114 | d = dp(n - p - 1, n - 1)
115 | else:
116 | d = recurse(len(s) - p - 1, n - 1)
117 | confidence = (n - p - d) / (n - p)
118 |
119 | if confidence > threshold:
120 | result[tuple(s[:p])] = round(confidence, 3)
121 |
122 | return result
123 |
124 |
125 | s = "ababac"
126 | if __name__ == '__main__':
127 | print(solve(s, 0.7))
128 |
--------------------------------------------------------------------------------
/forecastability/forecastability/util.py:
--------------------------------------------------------------------------------
1 | #-*-coding:utf-8-*-
2 | from datetime import datetime
3 | import pandas as pd
4 |
5 |
6 | def date_converter(x):
7 | '''
8 | 转换为日期格式
9 | '''
10 | if x is None:
11 | return x
12 | try:
13 | x = str(x)
14 | except Exception:
15 | return x
16 |
17 | try:
18 | return datetime.strptime(x, "%Y-%m-%d")
19 | except Exception:
20 | try:
21 | return datetime.strptime(x, "%Y/%m/%d")
22 | except Exception:
23 | try:
24 | return datetime.strptime(x, "%Y%m%d")
25 | except Exception:
26 | return x
27 |
28 |
29 | def date_parser(x):
30 | '''
31 | 日期格式转换为string
32 | '''
33 | if not isinstance(x, datetime):
34 | return None
35 |
36 | try:
37 | return x.strftime("%Y-%m-%d")
38 | except Exception:
39 | try:
40 | return x.strptime("%Y/%m/%d")
41 | except Exception:
42 | try:
43 | return x.strptime("%Y%m%d")
44 | except Exception:
45 | return None
46 |
47 |
48 | def fill_ts(data, tm):
49 | '''
50 | 填充时间序列,只保留两列,[ts, y]
51 | '''
52 | data[tm] = data[tm].apply(date_parser)
53 | if tm == "date":
54 | min_dt = date_converter(data[tm].min())
55 | max_dt = date_converter(data[tm].max())
56 | tm_list = [date_parser(x) for x in pd.date_range(start=min_dt, end=max_dt)]
57 | else:
58 | min_dt = data[tm].min()
59 | max_dt = data[tm].max()
60 | tm_list = list(range(min_dt, max_dt+1))
61 | tm_df = pd.DataFrame(tm_list, columns=[tm])
62 | df = pd.merge(tm_df, data[[tm, "sku_code", "qty"]], on=tm, how="left")
63 | df["qty"].fillna(0, inplace=True)
64 | return df
65 |
66 |
67 | def get_table(headers, rows, tablename):
68 | table_style = '''
69 |
127 |
128 | '''
129 | title = '{}
'.format(tablename)
130 | table = ''
131 | for h in headers:
132 | table += " {} | ".format(h)
133 | table += '
'
134 |
135 | for r in rows:
136 | table += ''
137 | for ele in r:
138 | table += '{} | '.format(ele)
139 | table += '
'
140 | end = '
'
141 |
142 | return table_style + title + table + end
143 |
144 | def get_line_charts(x, y, title, xname, yname):
145 | params = {
146 | 'title': {
147 | 'text': title,
148 | },
149 | 'toolbox': {
150 | 'show': 'true',
151 | 'orient': 'vertical',
152 | 'left': 'right',
153 | 'top': 'center',
154 | 'feature': {
155 | 'mark': {'show': 'true'},
156 | 'dataView': {'show': 'true', 'readOnly': 'false'},
157 | 'magicType': {'show': 'true', 'type': ['line', 'bar', 'stack', 'tiled']},
158 | 'restore': {'show': 'true'},
159 | 'saveAsImage': {'show': 'true'}
160 | }
161 | },
162 | 'tooltip': {
163 | "show": 'true',
164 | 'trigger': 'axis'
165 | },
166 | 'legend': {
167 | 'data': []
168 | },
169 | 'xAxis': {
170 | 'data': x,
171 | 'name': xname,
172 | "nameLocation": "middle",
173 | "nameGap": 25,
174 | 'nameTextStyle': {
175 | 'fontSize': 14
176 | }
177 | },
178 | 'yAxis': {
179 | 'name': yname,
180 | 'type': 'value',
181 | "nameLocation": "middle",
182 | "nameGap": 40,
183 | 'nameTextStyle': {
184 | 'fontSize': 14
185 | },
186 | "axisLabel": {
187 | "show": 'true',
188 | "position": "right",
189 | "margin": 8,
190 | "formatter": "{value}%"
191 | }
192 | },
193 | 'series': [{
194 | 'name': "",
195 | 'type': 'line',
196 | 'data': y
197 | }]
198 | }
199 | chart = ('''
200 |
201 |
211 | ''' % str(params))
212 | return chart
213 |
--------------------------------------------------------------------------------
/forecastability/requirements.txt:
--------------------------------------------------------------------------------
1 | joblib>=0.15.1
2 | numpy>=1.14.6
3 | pandas>=0.25.3
4 |
--------------------------------------------------------------------------------
/forecastability/setup.py:
--------------------------------------------------------------------------------
1 | try:
2 | from setuptools import setup
3 | except:
4 | from distutils.core import setup
5 | from setuptools import find_packages
6 |
7 | from os import path
8 | this_directory = path.abspath(path.dirname(__file__))
9 | with open(path.join(this_directory, 'README.md'), encoding='utf-8') as f:
10 | long_description = f.read()
11 |
12 | # 必须装的环境
13 | with open(path.join(this_directory, "requirements.txt")) as fp:
14 | install_requires = fp.read().strip().split("\n")
15 |
16 | VERSION = "0.0.2" # 每次更新的版本号需要不同,PyPI不支持覆盖
17 | LICENSE = 'MIT'
18 | setup(
19 | version=VERSION,
20 | setup_requires=["numpy"],
21 | install_requires=install_requires,
22 | name='forecastability',
23 | description='forecastability analysis',
24 | long_description=long_description,
25 | long_description_content_type='text/markdown',
26 | url='https://github.com/jingw2/solver/tree/master/forecastability',
27 | author='Jing Wang',
28 | author_email='jingw2@foxmail.com',
29 | license=LICENSE,
30 | packages=find_packages(),
31 | python_requires='>=3.6')
32 |
--------------------------------------------------------------------------------
/km/README.md:
--------------------------------------------------------------------------------
1 | ## KM (Kuhn-Munkras) Solver
2 |
3 | The main function of this solver is to solve the best match of the bipartie graph.
4 |
5 | The theory of this algorithm refer to:
6 |
7 | * http://blog.sina.com.cn/s/blog_691ce2b701016reh.html
8 |
9 | * http://www.cnblogs.com/wenruo/p/5264235.html
10 |
11 | Hungarian algorithm is the core algorithm in KM. Hungarian algorithm is to find the most number of
12 | pairs in bipartie graph. But KM is to find the best pairs to maximize the weights of the graph.
13 |
14 | Hungarian algorithm can be implemented by DFS or BFS. Two methods were compared in different fully-connected
15 | bipartie graphs. The time spent distribution is shown below,
16 | 
17 |
18 | It can be seen that dfs method is little better than bfs when the size is small, but bfs is obviously faster than
19 | dfs with the size growing.
20 |
21 | Usage:
22 | ```python
23 | import numpy as np
24 | import km
25 |
26 | # create a graph
27 | graph = np.random.randn(3, 3)
28 |
29 | # solve using km solver
30 | match, totWeight = km.solve(graph, method = "bfs", verbose = 0, is_constraint_on_weight=True)
31 |
32 | # match is the dictionary, key is the right index, value is
33 | # the matched left index, or -1, which is no match.
34 |
35 | # Argument:
36 | # * graph (np.array like):
37 | # every row represents the left vertices of bipartie graph
38 | # every column represents the right vertices of bipartie graph
39 | # * verbose (boolean): 1 to show print
40 | # * method: (str): which method to use, dfs or bfs
41 | # * is_constraint_on_weight (boolean):
42 | # want to constrain on weight, impossible match on weight = 0 edge
43 | ```
44 |
--------------------------------------------------------------------------------
/km/dfs vs bfs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/km/dfs vs bfs.png
--------------------------------------------------------------------------------
/km/km.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python 3.6
2 | #-*-coding:utf-8-*-
3 |
4 | '''
5 | KM (Kuhn-Munkras) algorithm solver
6 |
7 | Reference link:
8 |
9 | * http://www.cnblogs.com/wenruo/p/5264235.html --> algorithm implementation and example
10 | * http://blog.sina.com.cn/s/blog_691ce2b701016reh.html --> algorithm theory intro
11 | * https://blog.csdn.net/dark_scope/article/details/8880547 --> Hungarian algorithm intro
12 |
13 | Date: 06/10/2018
14 | Author: Jing Wang (jingw2@foxmail.com)
15 |
16 | Example Use:
17 | #################################################
18 | import random
19 | import km
20 |
21 | ## left and right vertice number
22 | numLeft = 4
23 | numRight = 5
24 |
25 | ## construct graph
26 | graph = np.zeros((numLeft, numRight))
27 | for i in range(numLeft):
28 | for j in range(numRight):
29 | graph[i, j] = random.choice(list(range(10)))
30 |
31 | ## solve
32 | match, weight = km.solve(graph, verbose = 0, method = 'bfs')
33 |
34 | ## Note that, match is a dictionary, with key the index of left
35 | value is the index of matched right
36 | #################################################
37 | '''
38 |
39 | import numpy as np
40 | import os
41 | from collections import deque
42 | import random
43 | import argparse
44 | import matplotlib.pyplot as plt
45 |
46 | def dfs(left, graph, is_constraint_on_weight):
47 | '''
48 | depth first search method
49 |
50 | Args:
51 | * left (int): the left element index
52 | * graph (array like): graph to solve
53 |
54 | Return:
55 | * boolean : if match is found, return True, otherwise False
56 | '''
57 |
58 | ## visited
59 | visitedLeft[left] = True
60 |
61 | for right in range(numRight):
62 | if is_constraint_on_weight:
63 | if graph[left, right] == 0: continue
64 | if visitedRight[right]: continue # every round, every right can only be retrieved once
65 | gap = leftExpect[left] + rightExpect[right] - graph[left, right]
66 |
67 | if gap == 0: # match expectation
68 | visitedRight[right] = True
69 |
70 | # if right has no match or the matched left can find other rights
71 | if match[right] == -1 or dfs(match[right], graph, is_constraint_on_weight):
72 | match[right] = left
73 | return True
74 |
75 | else: # to accelerate
76 | slack[right] = min(slack[right], gap)
77 |
78 | return False
79 |
80 | def bfs(left, graph, is_constraint_on_weight):
81 | '''
82 | breath first search method
83 |
84 | Args:
85 | * left (int): the left element index
86 | * graph (array like): graph to solve
87 | * is_constraint_on_weight (boolean)
88 |
89 | Return:
90 | * boolean : if match is found, return True, otherwise False
91 | '''
92 |
93 | visitedLeft[left] = True
94 |
95 | queue.append(left) # push to the end
96 | prev[left] = -1
97 | flag = False # has found expand path
98 |
99 | while (len(queue) != 0 and not flag):
100 | firstEle = queue[0]
101 | for right in range(numRight):
102 | if flag: break
103 | if is_constraint_on_weight:
104 | if graph[firstEle, right] == 0: continue
105 | if visitedRight[right]: continue
106 | gap = leftExpect[firstEle] + rightExpect[right] - graph[firstEle, right]
107 |
108 | if gap == 0:
109 |
110 | ## push new vertice
111 | queue.append(match[right])
112 | visitedRight[right] = True
113 |
114 | if match[right] != -1: # find
115 | visitedLeft[match[right]] = True
116 | prev[match[right]] = firstEle
117 | else:
118 | # recursion
119 | flag = True
120 | d = firstEle
121 | e = right
122 | while d != -1:
123 | t = matchLeft[d]
124 | matchLeft[d] = e
125 | match[e] = d
126 | d = prev[d]
127 | e = t
128 |
129 | queue.popleft() # remove the first element
130 |
131 | if matchLeft[left] != -1:
132 | return True
133 | else:
134 | ## slack = min{(x, y) | Lx(x) + Ly(y) - W(x, y), x in S, y not in T}, S is visited left, T is not visited right
135 | for left in range(numLeft):
136 | if not visitedLeft[left]: continue
137 | for right in range(numRight):
138 | if visitedRight[right]: continue
139 | gap = leftExpect[left] + rightExpect[right] - graph[left, right]
140 | if gap == 0: continue
141 | slack[right] = min(slack[right], gap)
142 | return False
143 |
144 | def solve(graph, verbose = 0, method = 'dfs', is_constraint_on_weight=True):
145 |
146 | '''
147 | KM algorithm solver
148 |
149 | Args:
150 | * graph (np.array like):
151 | every row represents the left vertices of bipartie graph
152 | every column represents the right vertices of bipartie graph
153 | * verbose (boolean): 1 to show print
154 | * method: (str): which method to use, dfs or bfs
155 | * is_constraint_on_weight (boolean):
156 | want to constrain on weight, impossible match on weight = 0 edge
157 |
158 | Return:
159 | * match (dict): key is the right element, if value = -1, the right has no match
160 | value is the matched left element
161 | * weight (float): total weights of matched graph
162 |
163 | Raise:
164 | feasibility error
165 | '''
166 |
167 | ## check graph
168 | global numLeft, numRight
169 | numLeft, numRight = graph.shape
170 | is_transpose = False
171 | if numLeft > numRight:
172 | print("Left is greater than right, transpose graph matrix")
173 | graph = graph.T
174 | numLeft, numRight = graph.shape
175 | is_transpose = True
176 |
177 | ## initialize
178 | global leftExpect, rightExpect, visitedLeft, visitedRight, match, slack, matchLeft, prev, queue
179 | leftExpect = {g : np.max(graph[g]) for g in range(numLeft)}
180 | rightExpect = {b : 0 for b in range(numRight)}
181 | match = {b: -1 for b in range(numRight)} ## for rights
182 | matchLeft = {a: -1 for a in range(numLeft)}
183 | prev = {l : -1 for l in range(numLeft)}
184 | queue = deque() # for bfs
185 |
186 | # find match for every left
187 | for lix in range(numLeft):
188 |
189 | slack = {b : float('inf') for b in range(numRight)} # how many expectation value needs for rights to match
190 | while True:
191 | # if left has no match, lower the expectation value util match is found
192 |
193 | ## initialize every round
194 | visitedLeft = {g : False for g in range(numLeft)}
195 | visitedRight = {b : False for b in range(numRight)}
196 |
197 | if method == 'dfs':
198 | if dfs(lix, graph, is_constraint_on_weight):
199 | break # find match
200 | else:
201 | if matchLeft[lix] == -1:
202 | while len(queue) != 0: queue.pop()
203 | if bfs(lix, graph, is_constraint_on_weight):
204 | break # find match
205 |
206 | ##### cannot find match
207 |
208 | ## find the minimum value to decrease
209 | diff = float('inf')
210 | for right in range(numRight):
211 | if not visitedRight[right]:
212 | diff = min(slack[right], diff)
213 |
214 |
215 | ## all retrived lefts should decrease expectation value
216 | for left in range(numLeft):
217 | if visitedLeft[left]:
218 | leftExpect[left] -= diff
219 |
220 | ## keep c[x] + c[y] = weight[(x, y)]
221 | for right in range(numRight):
222 | # if over one left can match with this right
223 | if visitedRight[right]:
224 | rightExpect[right] += diff
225 | else:
226 | slack[right] -= diff
227 |
228 | if verbose:
229 | print('Finish to match left {}'.format(lix))
230 |
231 | ## output maximum weights
232 | weight = 0
233 | out = {}
234 | for right, left in match.items():
235 | if verbose:
236 | print('left {}, right {}'.format(left, right))
237 | if left != -1:
238 | weight += graph[left, right]
239 | if is_transpose: # exchange the order
240 | out[right] = left
241 | else:
242 | out[left] = right
243 |
244 | if verbose:
245 | print('Maximum match weights: ', weight)
246 |
247 | return out, weight
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
--------------------------------------------------------------------------------
/nmf/README.md:
--------------------------------------------------------------------------------
1 | # Nonnegative Matrix Factorization
2 |
3 | ## Usage
4 |
5 | ## Reference
6 | [1] https://www.cnblogs.com/wuliytTaotao/p/10814770.html
7 |
--------------------------------------------------------------------------------
/nmf/nmf.py:
--------------------------------------------------------------------------------
1 | #-*-coding:utf-8
2 | #@Author: Jing Wang
3 | #@Date: 2020-10-28 14:46:49
4 | #@Last Modified by: Jing Wang
5 | #@Last Modified time: 2020-10-28 14:46:49
6 | #@reference: https://www.cnblogs.com/wuliytTaotao/p/10814770.html
7 |
8 | '''
9 | Nonegative Matrix Factorization Method:
10 | Dealing with NA value
11 | '''
12 | import numpy as np
13 |
14 | class NMF(object):
15 |
16 | def __init__(self, k=3, alpha=1e-4, beta=0.5,
17 | max_iters=20000, epsilon=1e-3,
18 | normalize=False, bias=True):
19 | '''
20 | Args:
21 | k (int): 分解矩阵的rank, k < min(m, n), m, n are dimensions of input
22 | alpha (float): learning rate, 学习率
23 | beta (float): regularizer coefficients,正则项系数
24 | max_iters (int): maximum iteration, 最大迭代次数
25 | epsilon (float): error tolerance, error容忍度
26 | normalize (bool): 是否对X使用normalize
27 | bias (bool): 是否使用bias
28 |
29 | Note:
30 | - 如果矩阵很大,建议学习率alpha小一些(如1e-5),不然容易出现nan或者无穷大,如果矩阵较小,可取大一些(如1e-3)。
31 | - 如果想要尽可能精确,k不能取太小,要贴近min(m, n)
32 | '''
33 | self.k = k
34 | self.alpha = alpha
35 | self.beta = beta
36 | self.max_iters = max_iters
37 | self.epsilon = epsilon
38 | self.normalize = normalize
39 | self.bias = bias
40 |
41 | def fit(self, X):
42 | '''
43 | Args:
44 | X (array like)
45 |
46 | 如果没有bias,迭代过程为:
47 | e_{ij} = x_{ij} - \sum_{l=1}^k u_{il}v_{jl}
48 | u_{il} = u_{il} + alpha * (2 e_{ij}v_{jl} - beta u_{il})
49 | v_{jl} = v_{jl} + alpha * (2 e_{ij}u_{il} - beta v_{jl})
50 | 如果有bias,迭代过程为:
51 | e_{ij} = x_{ij} - \sum_{l=1}^k u_{il}v_{jl} - b - bu_i - bv_j
52 | u_{il} = u_{il} + alpha * (2 e_{ij}v_{jl} - beta u_{il})
53 | v_{jl} = v_{jl} + alpha * (2 e_{ij}u_{il} - beta v_{jl})
54 | bu_i = bu_i + alpha * (2 e_{ij} - beta * bu_i)
55 | bv_j = bv_j + alpha * (2 e_{ij} - beta * bv_j)
56 | '''
57 | X = np.asarray(X)
58 | m, n = X.shape
59 | # normalize X
60 | if self.normalize:
61 | X = self._normalize(X)
62 |
63 | # initialize U and V
64 | self.U_ = np.random.uniform(size=(m, self.k))
65 | self.V_ = np.random.uniform(size=(n, self.k))
66 | if self.bias:
67 | # initialize b, bu, bv
68 | self.b_ = X[~np.isnan(X)].mean()
69 | self.bu_ = np.zeros(m)
70 | self.bv_ = np.zeros(n)
71 |
72 | losses = []
73 | for t in range(self.max_iters):
74 | Xhat = self.U_.dot(self.V_.T)
75 | if self.bias:
76 | Xhat += self.b_ + self.bu_[:, np.newaxis] + self.bv_[np.newaxis, :]
77 | e = X - Xhat
78 | resid = e[~np.isnan(X)]
79 | loss = np.sum(np.square(resid))
80 | e[np.isnan(X)] = 0
81 | self.U_ += self.alpha * (2 * e.dot(self.V_) - self.beta * self.U_)
82 | self.V_ += self.alpha * (2 * e.T.dot(self.U_) - self.beta * self.V_)
83 | if self.bias:
84 | self.bu_ = self.alpha * (2 * np.sum(e, axis=1) - self.beta * self.bu_)
85 | self.bv_ = self.alpha * (2 * np.sum(e, axis=0) - self.beta * self.bv_)
86 | losses.append(loss)
87 | if loss < self.epsilon:
88 | break
89 | self.Xhat_ = self.U_.dot(self.V_.T)
90 | if self.bias:
91 | self.Xhat_ += self.b_ + self.bu_[:, np.newaxis] + self.bv_[np.newaxis, :]
92 | if self.normalize:
93 | self.Xhat_ = self._denormalize(self.Xhat_)
94 |
95 | def _normalize(self, X):
96 | '''
97 | Normalize X for nonegative matrix factorization
98 | 将X标准化,加速converge
99 |
100 | 标准化方法(只计算非NaN的位置):
101 | (X - X.max) / (X.max - X.min)
102 | '''
103 | self.max = X[~np.isnan(X)].max()
104 | self.min = X[~np.isnan(X)].min()
105 | X[~np.isnan(X)] = (X[~np.isnan(X)] - self.max) / ((self.max - self.min))
106 | return X
107 |
108 | def _denormalize(self, Xhat):
109 | '''
110 | Inverse Normalize, estimated Xhat reverse to the original range of X
111 | 将结果Xhat回到X的范围内
112 |
113 | Xhat * (X.max - X.min) + X.max
114 | '''
115 | return Xhat * (self.max - self.min) + self.max
116 |
117 |
118 | if __name__ == '__main__':
119 | X = np.random.uniform(0, 100, size=(100, 100))
120 | import random
121 | nan_count = 100
122 | cache = set()
123 | for _ in range(nan_count):
124 | i, j = random.choice(range(100)), random.choice(range(100))
125 | while (i, j) in cache:
126 | i, j = random.choice(range(100)), random.choice(range(100))
127 | cache.add((i, j))
128 | X[i, j] = np.nan
129 |
130 | # X = np.array([
131 | # [5, 3, 0, 1],
132 | # [4, 0, 0, 1],
133 | # [1, 1, 0, 5],
134 | # [1, 0, 0, 4],
135 | # [0, 1, 5, 4],
136 | # ], dtype=np.float)
137 |
138 | # # replace 0 with np.nan
139 | # X[X == 0] = np.nan
140 | print(X)
141 | clf = NMF(k=80)
142 | clf.fit(X)
143 | print("Xhat: ", clf.Xhat_)
144 |
145 | e = X - clf.Xhat_
146 | # print(e[~np.isnan(X)].sum())
147 |
--------------------------------------------------------------------------------
/period_detection/README.md:
--------------------------------------------------------------------------------
1 | ## Period Detection Solver
2 |
3 | It is to find possible repeated elements by solving minimum Edit Distance with real Penalty (ERP), which is called ERPP (ERP based Period Detection Algorithm).Let's simply describe the algorithm by the following example:
4 |
5 | Assume we have a string "ababac", we construct a distance matrix. The values in the main diagonal are all zeros, which will affect the calculation of minimum ERP. Thus, they are changed to infinite. The following graphs show the ERP calculation in period 1 and period 2.
6 |
7 | 
8 |
9 | For period 1, the element is "a". The origin and destination of ERP is (n-2, n-1) and (0, 1). The route with minimum distance is shown by arrows in the graph. The value is 3.
10 |
11 | 
12 |
13 | For period 2, the element is "ab". The origin and destination of ERP is (n-3, n-1) and (0, 2). The route with minimum distance is shown by arrows in the graph. The value is 1.
14 |
15 | In general, for period p, we need to find the minimum route from (n-p-1, n-1) to (0, p). The confidence can be calculated in terms of minimum ERP,
16 |
17 | 
18 |
19 |
20 | Solver Arguments:
21 | * s (list, tuple or string)
22 | * threshold (confidence threshold)
23 | * method (recursion or dp), for big length of s, please use dp. By default, it is dp.
24 |
25 | ```Python
26 | import period_detect
27 |
28 | s = "ababac"
29 | result = period_detect.solve(s, threshold = 0.7, method = "dp")
30 |
31 | ## result = {"ab" : 0.75}
32 | ```
33 |
34 |
35 | Reference link:
36 |
37 | * https://wenku.baidu.com/view/8ad300afb8f67c1cfad6b87a.html
38 |
--------------------------------------------------------------------------------
/period_detection/confidence.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/period_detection/confidence.gif
--------------------------------------------------------------------------------
/period_detection/p1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/period_detection/p1.png
--------------------------------------------------------------------------------
/period_detection/p2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/period_detection/p2.png
--------------------------------------------------------------------------------
/period_detection/period_detect.py:
--------------------------------------------------------------------------------
1 | # /usr/bin/env python 3.6
2 | # -*-coding:utf-8-*-
3 |
4 | '''
5 | Period Detection Solver
6 |
7 | Reference link:
8 | https://wenku.baidu.com/view/8ad300afb8f67c1cfad6b87a.html
9 |
10 | Author: Jing Wang (jingw2@foxmail.com)
11 | '''
12 | import numpy as np
13 |
14 | ## algorithm
15 | def recurse(n, m):
16 | '''
17 | recursion method
18 | find minimum ERP distance from (n - p - 1, n - 1) to (0, p)
19 |
20 | Args:
21 | n (int): starting row index
22 | m (int): starting column index
23 |
24 | Return:
25 | d (int): minimum ERP distance
26 | '''
27 | cache = {}
28 | d = 0
29 | if (n, m) in cache:
30 | return cache[(n, m)]
31 | if n == 0 and m == p:
32 | d += matrix[n][m]
33 | elif n == 0 and m > p:
34 | d += recurse(n, m - 1) + matrix[n, m]
35 | else:
36 | d += min([recurse(n-1, m-1), recurse(n-1, m)]) + matrix[n][m]
37 | cache[(n, m)] = d
38 | return d
39 |
40 | def dp(n, m):
41 | '''
42 | dynamic programming
43 | find minimum ERP distance from (n - p - 1, n - 1) to (0, p)
44 |
45 | Args:
46 | n (int): starting row index
47 | m (int): starting column index
48 |
49 | Return:
50 | minimum ERP distance
51 | '''
52 | nr, nc = matrix.shape
53 | d = np.zeros((nr, nc))
54 | for i in range(n, -1, -1):
55 | for j in range(m, p - 1, -1):
56 | if i < nr - 1 and j < nc - 1:
57 | valid = []
58 | if (j - i - 1) >= (m - n):
59 | valid.append(d[i + 1, j])
60 | if (j + 1 - i) >= (m - n):
61 | valid.append(d[i, j + 1])
62 | if (j - i) >= (m - n):
63 | valid.append(d[i + 1, j + 1])
64 | if len(valid) > 0:
65 | d[i, j] = min(valid) + matrix[i][j]
66 | else:
67 | d[i, j] = matrix[i][j]
68 | elif i < nr - 1 and j == nc - 1:
69 | if (j - i - 1) >= (m - n):
70 | d[i, j] = d[i + 1, j]+ matrix[i][j]
71 | else:
72 | d[i, j] = matrix[i, j]
73 | elif i == nr - 1 and j < nc - 1:
74 | if (j + 1 - i) >= (m - n):
75 | d[i, j] = d[i, j + 1] + matrix[i][j]
76 | else:
77 | d[i, j] = matrix[i, j]
78 |
79 |
80 | return d[0, p]
81 |
82 | def solve(s, threshold, method = "dp"):
83 | '''
84 | solve function
85 | '''
86 |
87 | # check
88 | if len(s) == 0 or len(s) == 1:
89 | return None
90 |
91 | try:
92 | s[0]
93 | s[0:]
94 | except:
95 | raise Exception("Please make sure input can be sliced!")
96 |
97 | # generate distance matrix
98 | global matrix, p
99 | n = len(s)
100 | matrix = np.zeros((n, n))
101 | for i in range(n):
102 | for j in range(n):
103 | if i == j:
104 | matrix[i, j] = float("inf") # leave the main diagonal
105 | continue
106 | if s[i] == s[j]:
107 | matrix[i, j] = 0
108 | else:
109 | matrix[i, j] = 1
110 |
111 | result = {}
112 | for p in range(1, n // 2 + 1):
113 | if method == "dp":
114 | d = dp(n - p - 1, n - 1)
115 | else:
116 | d = recurse(len(s) - p - 1, n - 1)
117 | confidence = (n - p - d) / (n - p)
118 |
119 | if confidence > threshold:
120 | result[tuple(s[:p])] = round(confidence, 3)
121 |
122 | return result
123 |
124 |
125 | s = "ababac"
126 | if __name__ == '__main__':
127 | print(solve(s, 0.7))
128 |
--------------------------------------------------------------------------------
/psoco/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2018 The Python Packaging Authority
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | ————————————————
21 | 版权声明:本文为CSDN博主「云中鲸」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
22 | 原文链接:https://blog.csdn.net/qq_38486203/java/article/details/83659287
--------------------------------------------------------------------------------
/psoco/README.md:
--------------------------------------------------------------------------------
1 | ## Particle Swarm Optimization Constraint Optimization Solver
2 | [](https://badge.fury.io/py/psoco)
3 | 
4 | ### Arguments
5 | |Name |Type|Default Value|
6 | |-----|----|-------------|
7 | |particle_size|int|2000|
8 | |max_iter|int|1000|
9 | |sol_size|int|7|
10 | |fitness|function|null|
11 | |constraints|a list of functions|null|
12 |
13 | ### Usage
14 | 
15 |
16 | Transform constraints, it becomes:
17 |
18 | 
19 |
20 | Note: In order to faster search optimal solutions, please initialize solutions with specific low and high.
21 | ```python
22 | import psoco
23 | import math
24 |
25 | def objective(x):
26 | '''create objectives based on inputs x as 2D array'''
27 | return (x[:, 0] - 2) ** 2 + (x[:, 1] - 1) ** 2
28 |
29 |
30 | def constraints1(x):
31 | '''create constraint1 based on inputs x as 2D array'''
32 | return x[:, 0] - 2 * x[:, 1] + 1
33 |
34 |
35 | def constraints2(x):
36 | '''create constraint2 based on inputs x as 2D array'''
37 | return - (x[:, 0] - 2 * x[:, 1] + 1)
38 |
39 |
40 | def constraints3(x):
41 | '''create constraint3 based on inputs x as 2D array'''
42 | return x[:, 0] ** 2 / 4. + x[:, 1] ** 2 - 1
43 |
44 | def new_penalty_func(k):
45 | '''Easy Problem can use \sqrt{k}'''
46 | return math.sqrt(k)
47 |
48 | constraints = [constraints1, constraints2, constraints3]
49 | num_runs = 10
50 | # random parameters lead to variations, so run several time to get mean
51 | for _ in range(num_runs):
52 | pso = psoco.PSOCO(sol_size=2, fitness=objective, constraints=constraints)
53 | pso.h = new_penalty_func
54 | pso.init_Population(low=0, high=1) # x并集的上下限,默认为0和1
55 | pso.solve()
56 | # best solutions
57 | x = pso.gbest.reshape((1, -1))
58 | ```
59 | ### Reference
60 | * [Particle Swarm Optimization Method for
61 | Constrained Optimization Problems](https://www.cs.cinvestav.mx/~constraint/papers/eisci.pdf)
62 |
--------------------------------------------------------------------------------
/psoco/build/lib/psoco/__init__.py:
--------------------------------------------------------------------------------
1 | import psoco
--------------------------------------------------------------------------------
/psoco/build/lib/psoco/psoco.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python 3.7
2 | #-*-coding:utf-8-*-
3 |
4 |
5 | '''
6 | Particle Swarm Optimization Constraint Optimization
7 | Author: Jing Wang (jingw2@foxmail.com)
8 | '''
9 |
10 | import math
11 | import numpy as np
12 | import random
13 |
14 | class PSOCO:
15 |
16 | def __init__(self,
17 | particle_size=2000,
18 | max_iter=1000,
19 | sol_size=7,
20 | fitness=None,
21 | constraints=None):
22 | '''
23 | Particle Swarm Optimization Constraint Optimization
24 | Args:
25 | particle_size (int): 粒子数量
26 | max_iter (int): 最大迭代次数
27 | sol_size (int): 解的维度
28 | fitness (callable function): fitness函数,接受参数 x 为解
29 | constraints (list): 一系列约束条件,全部表示为 <= 0的形式
30 | '''
31 | self.c1 = 2
32 | self.c2 = 2
33 | self.w = 1.2 # 逐渐减少到0.1
34 | self.kai = 0.73
35 | self.vmax = 4 # 最大速度,防止爆炸
36 | self.particle_size = particle_size
37 | self.max_iter = max_iter
38 | self.sol_size = sol_size
39 |
40 | # pso parameters
41 | self.X = np.zeros((self.particle_size, self.sol_size))
42 | self.V = np.zeros((self.particle_size, self.sol_size))
43 | self.pbest = np.zeros((self.particle_size, self.sol_size)) #个体经历的最佳位置和全局最佳位置
44 | self.gbest = np.zeros((1, self.sol_size))
45 | self.p_fit = np.zeros(self.particle_size) # 每个particle的最优值
46 | self.fit = float("inf")
47 | self.iter = 1
48 |
49 | self.constraints = constraints
50 | if constraints is not None:
51 | for cons in constraints:
52 | if not callable(cons):
53 | raise Exception("Constraint is not callable or None!")
54 | if not callable(fitness):
55 | raise Exception("Fitness is not callable!")
56 | self.sub_fitness = fitness
57 |
58 | def fitness(self, x, k):
59 | '''fitness函数 + 惩罚项'''
60 | obj = self.sub_fitness(x)
61 | obj = obj.reshape((-1, 1))
62 | return obj + self.h(k) * self.H(x)
63 |
64 | def init_Population(self, low=0, high=1):
65 | '''初始化粒子'''
66 | self.X = np.random.uniform(size=(self.particle_size, self.sol_size), low=low, high=high)
67 | self.V = np.random.uniform(size=(self.particle_size, self.sol_size))
68 | self.pbest = self.X
69 | self.p_fit = self.fitness(self.X, 1)
70 | best = np.min(self.p_fit)
71 | best_idx = np.argmin(self.p_fit)
72 | if best < self.fit:
73 | self.fit = best
74 | self.gbest = self.X[best_idx]
75 |
76 | def solve(self):
77 | '''求解'''
78 | fitness = []
79 | w_step = (self.w - 0.1) / self.max_iter
80 | for k in range(1, self.max_iter+1):
81 | tmp_obj = self.fitness(self.X, k)
82 |
83 | # 更新pbest
84 | stack = np.hstack((tmp_obj.reshape((-1, 1)), self.p_fit.reshape((-1, 1))))
85 | best_arg = np.argmin(stack, axis=1).ravel().tolist()
86 | self.p_fit = np.minimum(tmp_obj, self.p_fit)
87 | X_expand = np.expand_dims(self.X, axis=2)
88 | p_best_expand = np.expand_dims(self.pbest, axis=2)
89 | concat = np.concatenate((X_expand, p_best_expand), axis=2)
90 | self.pbest = concat[range(0, len(best_arg)), :, best_arg]
91 |
92 | # 更新fit和gbest
93 | best = np.min(self.p_fit)
94 | best_idx = np.argmin(self.p_fit)
95 | if best < self.fit:
96 | self.fit = best
97 | self.gbest = self.X[best_idx]
98 |
99 | # 更新速度
100 |
101 | # 分粒子更新
102 | # for i in range(self.particle_size):
103 | # self.V[i] = self.w*self.V[i] + self.c1*random.random()*(self.pbest[i] - self.X[i]) + \
104 | # self.c2*random.random()*(self.gbest - self.X[i])
105 | # self.X[i] = self.X[i] + self.V[i]
106 |
107 | rand1 = np.random.random(size=(self.particle_size, self.sol_size))
108 | rand2 = np.random.random(size=(self.particle_size, self.sol_size))
109 | # 群体更新
110 | self.V = self.kai * (self.w*self.V + self.c1*rand1*(self.pbest - self.X) + \
111 | self.c2*rand2*(self.gbest - self.X))
112 | self.V[self.V > self.vmax] = self.vmax
113 | self.V[self.V < -self.vmax] = -self.vmax
114 |
115 | self.X = self.X + self.V
116 | fitness.append(self.fit)
117 | self.w -= w_step
118 |
119 | return fitness
120 |
121 | # relative violated function
122 | def q(self, g):
123 | return np.maximum(0, g)
124 |
125 | # power of penalty function
126 | def gamma(self, qscore):
127 | result = np.zeros_like(qscore)
128 | result[qscore >= 1] = 2
129 | result[qscore < 1] = 1
130 | return result
131 |
132 | # multi-assignment function
133 | def theta(self, qscore):
134 | result = np.zeros_like(qscore)
135 | result[qscore < 0.001] = 10
136 | result[qscore <= 0.1] = 10
137 | result[qscore <= 1] = 100
138 | result[qscore > 1] = 300
139 | return result
140 |
141 | # penalty score
142 | def h(self, k):
143 | return k * math.sqrt(k)
144 |
145 | # penalty factor
146 | def H(self, x):
147 | res = 0
148 | for cons_func in self.constraints:
149 | qscore = self.q(cons_func(x))
150 | if len(qscore.shape) == 1 or qscore.shape[1] == 1:
151 | qscore = qscore.reshape((-1, 1))
152 | res += self.theta(qscore) * np.power(qscore, self.gamma(qscore))
153 | else:
154 | for i in range(qscore.shape[1]):
155 | qscorei = qscore[:, i].reshape((-1, 1))
156 | res += self.theta(qscorei) * \
157 | np.power(qscorei, self.gamma(qscorei))
158 | return res
159 |
--------------------------------------------------------------------------------
/psoco/dist/psoco-0.0.0.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/psoco/dist/psoco-0.0.0.tar.gz
--------------------------------------------------------------------------------
/psoco/dist/psoco-0.0.7.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/psoco/dist/psoco-0.0.7.tar.gz
--------------------------------------------------------------------------------
/psoco/dist/psoco-0.0.8.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jingw2/solver/19e8ec16fa28ee6a02ea619e2fd94a3e526aa035/psoco/dist/psoco-0.0.8.tar.gz
--------------------------------------------------------------------------------
/psoco/psoco.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
1 | Metadata-Version: 2.1
2 | Name: psoco
3 | Version: 0.0.8
4 | Summary: partical swarm optimization constraint optimization solver
5 | Home-page: https://github.com/jingw2/solver/tree/master/psoco
6 | Author: Jing Wang
7 | Author-email: jingw2@foxmail.com
8 | License: MIT
9 | Description: ## Particle Swarm Optimization Constraint Optimization Solver
10 |
11 | ### Arguments
12 | |Name |Type|Default Value|
13 | |-----|----|-------------|
14 | |particle_size|int|2000|
15 | |max_iter|int|1000|
16 | |sol_size|int|7|
17 | |fitness|function|null|
18 | |constraints|a list of functions|null|
19 |
20 | ### Usage
21 | 
22 |
23 | Transform constraints, it becomes:
24 |
25 | 
26 |
27 | Note: In order to faster search optimal solutions, please initialize solutions with specific low and high.
28 | ```python
29 | import psoco
30 | import math
31 |
32 | def objective(x):
33 | '''create objectives based on inputs x as 2D array'''
34 | return (x[:, 0] - 2) ** 2 + (x[:, 1] - 1) ** 2
35 |
36 |
37 | def constraints1(x):
38 | '''create constraint1 based on inputs x as 2D array'''
39 | return x[:, 0] - 2 * x[:, 1] + 1
40 |
41 |
42 | def constraints2(x):
43 | '''create constraint2 based on inputs x as 2D array'''
44 | return - (x[:, 0] - 2 * x[:, 1] + 1)
45 |
46 |
47 | def constraints3(x):
48 | '''create constraint3 based on inputs x as 2D array'''
49 | return x[:, 0] ** 2 / 4. + x[:, 1] ** 2 - 1
50 |
51 | def new_penalty_func(k):
52 | '''Easy Problem can use \sqrt{k}'''
53 | return math.sqrt(k)
54 |
55 | constraints = [constraints1, constraints2, constraints3]
56 | num_runs = 10
57 | # random parameters lead to variations, so run several time to get mean
58 | for _ in range(num_runs):
59 | pso = psoco.PSOCO(sol_size=2, fitness=objective, constraints=constraints)
60 | pso.h = new_penalty_func
61 | pso.init_Population(low=0, high=1) # x并集的上下限,默认为0和1
62 | pso.solve()
63 | # best solutions
64 | x = pso.gbest.reshape((1, -1))
65 | ```
66 | ### Reference
67 | * [Particle Swarm Optimization Method for
68 | Constrained Optimization Problems](https://www.cs.cinvestav.mx/~constraint/papers/eisci.pdf)
69 |
70 | Platform: UNKNOWN
71 | Requires-Python: >=3.6
72 | Description-Content-Type: text/markdown
73 |
--------------------------------------------------------------------------------
/psoco/psoco.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
1 | README.md
2 | setup.py
3 | psoco/__init__.py
4 | psoco/psoco.py
5 | psoco.egg-info/PKG-INFO
6 | psoco.egg-info/SOURCES.txt
7 | psoco.egg-info/dependency_links.txt
8 | psoco.egg-info/top_level.txt
--------------------------------------------------------------------------------
/psoco/psoco.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/psoco/psoco.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | psoco
2 |
--------------------------------------------------------------------------------
/psoco/psoco/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | __all__ = ['PSOCO']
3 |
4 | from .psoco import PSOCO
--------------------------------------------------------------------------------
/psoco/psoco/psoco.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python 3.7
2 | #-*-coding:utf-8-*-
3 |
4 |
5 | '''
6 | Particle Swarm Optimization Constraint Optimization
7 | Author: Jing Wang (jingw2@foxmail.com)
8 | '''
9 |
10 | import math
11 | import numpy as np
12 | import random
13 |
14 | class PSOCO:
15 |
16 | def __init__(self,
17 | particle_size=2000,
18 | max_iter=1000,
19 | sol_size=7,
20 | fitness=None,
21 | constraints=None):
22 | '''
23 | Particle Swarm Optimization Constraint Optimization
24 | Args:
25 | particle_size (int): 粒子数量
26 | max_iter (int): 最大迭代次数
27 | sol_size (int): 解的维度
28 | fitness (callable function): fitness函数,接受参数 x 为解
29 | constraints (list): 一系列约束条件,全部表示为 <= 0的形式
30 | '''
31 | self.c1 = 2
32 | self.c2 = 2
33 | self.w = 1.2 # 逐渐减少到0.1
34 | self.kai = 0.73
35 | self.vmax = 4 # 最大速度,防止爆炸
36 | self.particle_size = particle_size
37 | self.max_iter = max_iter
38 | self.sol_size = sol_size
39 |
40 | # pso parameters
41 | self.X = np.zeros((self.particle_size, self.sol_size))
42 | self.V = np.zeros((self.particle_size, self.sol_size))
43 | self.pbest = np.zeros((self.particle_size, self.sol_size)) #个体经历的最佳位置和全局最佳位置
44 | self.gbest = np.zeros((1, self.sol_size))
45 | self.p_fit = np.zeros(self.particle_size) # 每个particle的最优值
46 | self.fit = float("inf")
47 | self.iter = 1
48 |
49 | self.constraints = constraints
50 | if constraints is not None:
51 | for cons in constraints:
52 | if not callable(cons):
53 | raise Exception("Constraint is not callable or None!")
54 | if not callable(fitness):
55 | raise Exception("Fitness is not callable!")
56 | self.sub_fitness = fitness
57 |
58 | def fitness(self, x, k):
59 | '''fitness函数 + 惩罚项'''
60 | obj = self.sub_fitness(x)
61 | obj = obj.reshape((-1, 1))
62 | return obj + self.h(k) * self.H(x)
63 |
64 | def init_Population(self, low=0, high=1):
65 | '''初始化粒子'''
66 | self.X = np.random.uniform(size=(self.particle_size, self.sol_size), low=low, high=high)
67 | self.V = np.random.uniform(size=(self.particle_size, self.sol_size))
68 | self.pbest = self.X
69 | self.p_fit = self.fitness(self.X, 1)
70 | best = np.min(self.p_fit)
71 | best_idx = np.argmin(self.p_fit)
72 | if best < self.fit:
73 | self.fit = best
74 | self.gbest = self.X[best_idx]
75 |
76 | def solve(self):
77 | '''求解'''
78 | fitness = []
79 | w_step = (self.w - 0.1) / self.max_iter
80 | for k in range(1, self.max_iter+1):
81 | tmp_obj = self.fitness(self.X, k)
82 |
83 | # 更新pbest
84 | stack = np.hstack((tmp_obj.reshape((-1, 1)), self.p_fit.reshape((-1, 1))))
85 | best_arg = np.argmin(stack, axis=1).ravel().tolist()
86 | self.p_fit = np.minimum(tmp_obj, self.p_fit)
87 | X_expand = np.expand_dims(self.X, axis=2)
88 | p_best_expand = np.expand_dims(self.pbest, axis=2)
89 | concat = np.concatenate((X_expand, p_best_expand), axis=2)
90 | self.pbest = concat[range(0, len(best_arg)), :, best_arg]
91 |
92 | # 更新fit和gbest
93 | best = np.min(self.p_fit)
94 | best_idx = np.argmin(self.p_fit)
95 | if best < self.fit:
96 | self.fit = best
97 | self.gbest = self.X[best_idx]
98 |
99 | # 更新速度
100 |
101 | # 分粒子更新
102 | # for i in range(self.particle_size):
103 | # self.V[i] = self.w*self.V[i] + self.c1*random.random()*(self.pbest[i] - self.X[i]) + \
104 | # self.c2*random.random()*(self.gbest - self.X[i])
105 | # self.X[i] = self.X[i] + self.V[i]
106 |
107 | rand1 = np.random.random(size=(self.particle_size, self.sol_size))
108 | rand2 = np.random.random(size=(self.particle_size, self.sol_size))
109 | # 群体更新
110 | self.V = self.kai * (self.w*self.V + self.c1*rand1*(self.pbest - self.X) + \
111 | self.c2*rand2*(self.gbest - self.X))
112 | self.V[self.V > self.vmax] = self.vmax
113 | self.V[self.V < -self.vmax] = -self.vmax
114 |
115 | self.X = self.X + self.V
116 | fitness.append(self.fit)
117 | self.w -= w_step
118 |
119 | return fitness
120 |
121 | # relative violated function
122 | def q(self, g):
123 | return np.maximum(0, g)
124 |
125 | # power of penalty function
126 | def gamma(self, qscore):
127 | result = np.zeros_like(qscore)
128 | result[qscore >= 1] = 2
129 | result[qscore < 1] = 1
130 | return result
131 |
132 | # multi-assignment function
133 | def theta(self, qscore):
134 | result = np.zeros_like(qscore)
135 | result[qscore < 0.001] = 10
136 | result[qscore <= 0.1] = 10
137 | result[qscore <= 1] = 100
138 | result[qscore > 1] = 300
139 | return result
140 |
141 | # penalty score
142 | def h(self, k):
143 | return k * math.sqrt(k)
144 |
145 | # penalty factor
146 | def H(self, x):
147 | res = 0
148 | for cons_func in self.constraints:
149 | qscore = self.q(cons_func(x))
150 | if len(qscore.shape) == 1 or qscore.shape[1] == 1:
151 | qscore = qscore.reshape((-1, 1))
152 | res += self.theta(qscore) * np.power(qscore, self.gamma(qscore))
153 | else:
154 | for i in range(qscore.shape[1]):
155 | qscorei = qscore[:, i].reshape((-1, 1))
156 | res += self.theta(qscorei) * \
157 | np.power(qscorei, self.gamma(qscorei))
158 | return res
159 |
--------------------------------------------------------------------------------
/psoco/setup.py:
--------------------------------------------------------------------------------
1 | try:
2 | from setuptools import setup
3 | except:
4 | from distutils.core import setup
5 | from setuptools import find_packages
6 |
7 | from os import path
8 | this_directory = path.abspath(path.dirname(__file__))
9 | with open(path.join(this_directory, 'README.md'), encoding='utf-8') as f:
10 | long_description = f.read()
11 |
12 | VERSION = "0.0.8"
13 | LICENSE = "MIT"
14 | setup(name='psoco',
15 | version=VERSION,
16 | description='partical swarm optimization constraint optimization solver',
17 | long_description=long_description,
18 | long_description_content_type='text/markdown',
19 | url='https://github.com/jingw2/solver/tree/master/psoco',
20 | author='Jing Wang',
21 | author_email='jingw2@foxmail.com',
22 | license=LICENSE,
23 | packages=find_packages(),
24 | python_requires='>=3.6')
--------------------------------------------------------------------------------
/psoco/tests/tests.py:
--------------------------------------------------------------------------------
1 | #-*-coding:utf-8-*-
2 |
3 | import sys
4 | import os
5 | import numpy as np
6 | dirpath = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
7 | sys.path.append(dirpath)
8 | import psoco
9 | import math
10 |
11 | def objective(x):
12 | '''create objectives based on inputs x as 2D array'''
13 | return (x[:, 0] - 2) ** 2 + (x[:, 1] - 1) ** 2
14 |
15 |
16 | def constraints1(x):
17 | '''create constraint1 based on inputs x as 2D array'''
18 | return x[:, 0] - 2 * x[:, 1] + 1
19 |
20 |
21 | def constraints2(x):
22 | '''create constraint2 based on inputs x as 2D array'''
23 | return - (x[:, 0] - 2 * x[:, 1] + 1)
24 |
25 |
26 | def constraints3(x):
27 | '''create constraint3 based on inputs x as 2D array'''
28 | return x[:, 0] ** 2 / 4. + x[:, 1] ** 2 - 1
29 |
30 | def new_penalty_func(k):
31 | '''Easy Problem can use \sqrt{k}'''
32 | return math.sqrt(k)
33 |
34 | constraints = [constraints1, constraints2, constraints3]
35 | num_runs = 10
36 | # random parameters lead to variations, so run several time to get mean
37 | sol_size = 2
38 | results = np.zeros((num_runs, sol_size))
39 | for r in range(num_runs):
40 | pso = psoco.PSOCO(sol_size=sol_size, fitness=objective, constraints=constraints)
41 | pso.h = new_penalty_func
42 | pso.init_Population(low=0, high=1) # x并集的上下限,默认为0和1
43 | pso.solve()
44 | # best solutions
45 | x = pso.gbest.reshape((1, -1))
46 | results[r] = x
47 |
48 | results = np.mean(results, axis=0)
49 | print("results: ", results)
50 | results = [round(r, 2) for r in results]
51 | assert results == [0.82, 0.91]
--------------------------------------------------------------------------------