├── .gitignore ├── img ├── fib-snake.png ├── longterm-memory.png ├── shortterm-memory.png └── fibonacci_timeseries.png ├── subjective_drawdown_models ├── subjective_drawdown_model1.pkl └── subjective_drawdown_model2.pkl ├── demo_fibonacci_ml.py ├── variables.py ├── readme.md ├── subjective_drawdown_finder.py ├── fib_utils.py └── core.py /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | \#*\# 3 | .\#* 4 | fibonacci_ml_v0.py 5 | -------------------------------------------------------------------------------- /img/fib-snake.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/faraway1nspace/fibonacci_ml/HEAD/img/fib-snake.png -------------------------------------------------------------------------------- /img/longterm-memory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/faraway1nspace/fibonacci_ml/HEAD/img/longterm-memory.png -------------------------------------------------------------------------------- /img/shortterm-memory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/faraway1nspace/fibonacci_ml/HEAD/img/shortterm-memory.png -------------------------------------------------------------------------------- /img/fibonacci_timeseries.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/faraway1nspace/fibonacci_ml/HEAD/img/fibonacci_timeseries.png -------------------------------------------------------------------------------- /subjective_drawdown_models/subjective_drawdown_model1.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/faraway1nspace/fibonacci_ml/HEAD/subjective_drawdown_models/subjective_drawdown_model1.pkl -------------------------------------------------------------------------------- /subjective_drawdown_models/subjective_drawdown_model2.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/faraway1nspace/fibonacci_ml/HEAD/subjective_drawdown_models/subjective_drawdown_model2.pkl -------------------------------------------------------------------------------- /demo_fibonacci_ml.py: -------------------------------------------------------------------------------- 1 | import os 2 | import datetime 3 | import yfinance as yf 4 | import pandas as pd 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import copy 8 | import re 9 | 10 | from fibonacci_ml.core import * 11 | 12 | # get timeseries for QQQ 13 | data = yf.download("QQQ") 14 | 15 | # initialize the FibonacciTechnicalAnalysis object 16 | fib_maker = FibonacciTechnicalAnalysis(data, drawdown_criteria=0.20, do_plot=False) 17 | 18 | # make the features 19 | features = fib_maker.make_fib_features() 20 | 21 | -------------------------------------------------------------------------------- /variables.py: -------------------------------------------------------------------------------- 1 | TARGET_DENSITY = 0.235 2 | DRAWDOWN_CRITERIA=0.2 3 | RECOVERY_CRITERIA=0.02 4 | FIB_LEVELS = [0, 0.236, 0.382, 0.5, 0.618, 0.786, 1, 1.618, 2.618, 4.236, 6.854, 11.09, 17.944] 5 | 6 | def GET_WEIGHTS_FOR_LONGTERM_MEMORY_ORIG(drawdown_criteria=None): 7 | """default weights""" 8 | if drawdown_criteria is None: 9 | drawdown_criteria = DRAWDOWN_CRITERIA 10 | 11 | weights_for_longterm_memory = {'crit1':{'sd':1/365, 'mu':0, 'p':1}, 12 | 'crit2':{'sd':1/drawdown_criteria, 'mu':0, 'p':1.3}, 13 | 'crit3':{'sd':1/365, 'mu':0, 'p':1}, 14 | 'crit4':{'sd':1/(365*drawdown_criteria), 'mu':0, 'p':1.2}, 15 | 'crit5':{'sd':1.3, 'mu':-2.6, 'p':1} 16 | } 17 | return weights_for_longterm_memory 18 | 19 | # 20 | DEFAULT_MEMORY_FEATURES = {'max_drawdown': {1: 0.5150874123152283, 2: 0.566382779151033, 3: 0.6531398034264879}, 21 | 'time_since_peak': {1: 6.8089679500637335, 2: 7.651139423056396, 3: 8.308649817574022}, 22 | 'duration': {1: 6.389924016638725, 2: 6.837994549153265, 3: 7.518228268861231}, 23 | 'precovery': {1: -0.05884945292016635, 2: -0.525211373546828, 3: -0.7116667775865574}, 24 | 'fib_lev':{1: -1, 2: -1, 3: -1}, 25 | 'box01':{1: 0.45192856058112363, 2: 0.44829321386954174, 3: 0.45923949807178127}} 26 | #'topdist': {1: 0.2031, 2: 0.3948, 3: 0.8395}, 27 | #'botdist': {1: 0.0992, 2: 0.0506, 3: 0.1165}} # 28 | 29 | # best: (1.7585311911056594, 0.27694100799162485, 1.4815901831140343) 30 | #x0= np.array([ 3.45477257e+02, 0.00000000e+00, 9.95226003e-01, 31 | # 9.34217669e-02, 1.24536817e+00, 32 | # 3.79783410e+02, -3.04482610e+00, 1.00425408e+00, 33 | # 3.47102625e+02, -1.07730288e-01, 1.19487055e+00, 34 | # 1.39050476e+00, -2.69211513e+00, 1.00000000e+00]) 35 | 36 | def GET_WEIGHTS_FOR_LONGTERM_MEMORY(drawdown_criteria=None): 37 | """default weights""" 38 | if drawdown_criteria is None: 39 | drawdown_criteria = DRAWDOWN_CRITERIA 40 | 41 | weights_for_longterm_memory = {'crit1':{'sd':1/345.477257, 'mu':0, 'p':0.995226003}, 42 | 'crit2':{'sd':1/drawdown_criteria, 'mu':0.0934217669, 'p':1.24536}, 43 | 'crit3':{'sd':1/379.78, 'mu':-3.0448261, 'p':1.00425408}, 44 | 'crit4':{'sd':1/(347.10*drawdown_criteria), 'mu':-0.10773, 'p':1.19487055}, 45 | 'crit5':{'sd':1.3905, 'mu':-2.692115, 'p':1} 46 | } 47 | return weights_for_longterm_memory 48 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Fibonacci ML: Automatic Fib Extensions/Retracements for Machine Learning 2 | 3 | This repository contains code for automatically finding fibonacci-retracements in a price timeseries, and converting them _into features for statistical analysis_ (i.e., feature engineering), like "the percentage that a spot-price is between two fib-levels" (`fib-box`) to know whether the spot price is closer to a fib-level below or a fib-level above. See below for all features. 4 | 5 | The project attempts to: 6 | - remove the subjectiveness of drawing fibonacci retracements. 7 | - convert fib-retracement & extensions (both short-term and long-term) into a time-series of features that can be easily loaded into a machine-learning model (e.g, to model prices according to fib-features). 8 | 9 | - Inputs: 10 | -- pandas dataframe of OHLC prices 11 | - Outputs: 12 | -- pandas dataframe of features corresponding to a contiguous time-series representing *all* fib-retracements and extensions. 13 | 14 | The main features and benefits of the approach are the following: 15 | - automatically finds drawdowns with either: 16 | 1. a user-specified `drawdown_criteria` (usually 0.2); 17 | 2. automatically finds an appropriate criteria, adjusting for the volatility of the ticker under-analysis (e.g., we want a larger drawdown criteria for more volatile stocks). 18 | - converts the fibs into a smooth contiguous timeseries of features of the drawdown (`max_drawdown`,`duration`, `precovery` (percent recovery), `fib_lev` (the fib-level), `time_since_peak_d`, and `fib-box01` (the % that price is between two fib-levels). 19 | - tracks three fibonacci-retracements in parallel: i) most recent drawdown; ii) the previous drawdown; and iii) the "long-term memory" of any significant monster drawdowns (even decades earlier). For example, the dot-com dubble resulted in a moster drawdown in the Nasdaq and who's fibonacci extensions may still be important as features a decade later. 20 | - doesn't "cheat": the features at time `t` are never calculated using information from the future, which many backtesters violate when they first make fib-retracements for an entire time-series, and then fail to mask/hide the future information from prices in the past (illegal!) 21 | 22 | The figure below (top) shows the automatic finding of retracements and extensions for `QQQ`. Notice that the levels correspond to: `0, 0.236, 0.382, 0.5, 0.618, 0.786, 1, 1.618, 2.618, 4.236, 6.854, 11.09, 17.944` 23 | 24 | ![](img/fibonacci_timeseries.png?raw=true) 25 | 26 | 27 | 28 | ## Demonstration 29 | 30 | See the file `demo_fibonacci_ml.py`. 31 | 32 | ``` 33 | import os 34 | import datetime 35 | import yfinance as yf 36 | import pandas as pd 37 | import numpy as np 38 | import matplotlib.pyplot as plt 39 | import copy 40 | import re 41 | 42 | from fibonacci_ml.core import * 43 | 44 | # get timeseries for QQQ 45 | data = yf.download("QQQ") 46 | 47 | # initialize the FibonacciTechnicalAnalysis object 48 | fib_maker = FibonacciTechnicalAnalysis(data, drawdown_criteria=0.20, do_plot=False) 49 | 50 | # make the features 51 | features = fib_maker.make_fib_features() 52 | ``` 53 | Summary of features for `QQQ`. 54 | 55 | ``` 56 | count mean std min 25% 50% 75% max 57 | fib-1_max_drawdown_d 5578.0 0.440728 0.150744 0.204047 0.282772 0.530009 0.530974 0.767416 58 | fib-1_time_since_peak_d 5578.0 6.541665 0.969640 2.772589 5.937536 6.781058 7.321189 7.953318 59 | fib-1_duration_d 5578.0 6.031123 1.106973 2.772589 5.003946 6.389924 6.997596 7.254885 60 | fib-1_precovery_d 5578.0 -0.025882 0.352544 -1.090901 -0.201193 0.002114 0.178860 0.767416 61 | fib-1_fib_lev_d 5578.0 -0.114710 0.542956 -1.000000 -0.432416 -0.055316 0.098612 1.248340 62 | fib-1_box01_d 5578.0 0.458309 0.280091 0.000000 0.207130 0.451929 0.689416 0.999762 63 | fib-2_max_drawdown_d 5578.0 0.580211 0.178354 0.204047 0.530009 0.530974 0.799657 0.830577 64 | fib-2_time_since_peak_d 5578.0 7.759018 0.507455 6.265301 7.523076 7.824046 8.162516 8.511779 65 | fib-2_duration_d 5578.0 6.936482 0.810381 4.369448 6.837995 7.254885 7.254885 7.956126 66 | fib-2_precovery_d 5578.0 -0.346507 0.803978 -2.420680 -0.814035 -0.522701 0.585802 0.830577 67 | fib-2_fib_lev_d 5578.0 -0.019981 0.838561 -1.000000 -1.000000 0.098612 0.830339 1.248340 68 | fib-2_box01_d 5578.0 0.466216 0.273921 0.001096 0.245649 0.448293 0.700172 0.999992 69 | fib-3_max_drawdown_d 5578.0 0.759640 0.086926 0.653140 0.653140 0.830577 0.830577 0.830577 70 | fib-3_time_since_peak_d 5578.0 8.444213 0.243571 7.957527 8.308650 8.308650 8.645894 8.950403 71 | fib-3_duration_d 5578.0 8.099524 0.506587 7.518228 7.518228 8.206993 8.645894 8.694670 72 | fib-3_precovery_d 5578.0 -0.255281 0.599137 -1.905022 -0.711667 -0.526383 0.358030 0.782663 73 | fib-3_fib_lev_d 5578.0 -0.503271 0.543082 -1.000000 -1.000000 -0.613378 -0.055316 0.830339 74 | fib-3_box01_d 5578.0 0.471312 0.235317 0.000014 0.388402 0.459239 0.545765 0.999927 75 | ``` 76 | 77 | The pandas object `features` can then be used with price or other TA features in a machine-learning model for time-series analysis. That is what I do! 78 | 79 | ## Price Snaking Through Fib-Levels 80 | 81 | Here is the QQQ price as it snakes through various fibonacci-retracement and extension levels. 82 | 83 | ![](img/fib-snake.png?raw=true) 84 | 85 | 86 | ## Memory 87 | 88 | The `FibonacciTechnicalAnalysis` has 3 memories: it tracks three retracements in parallel so that price is "aware" of multiple drawdowns 89 | - Memory 1: the current/most recent drawdown. This is typically what most analysts focus on for short-term pivots 90 | - Memory 2: the previous drawdown. TA analysts often pay attention when the fib-levels from two different drawdowns align. 91 | - Long-term Memory: a model is used to track the long-term monster drawdowns, often spanning decades. E.g., some TA analysts refer to the 1999 Nasdaq drawdown for fib-extensions. 92 | 93 | The following two graphs compare the Memory-1 drawdowns vs the Long-Term Memory drawdowns. The blue-dash lines represent, at any given point in time, which fib-levels are in the current "memory" and thus exposed to price. Notice that Memory-1, the fib levels are changing with each new drawdown. However, the long-term memory seems to *only* can about the monster-drawdown in 1999. 94 | 95 | ### short-term memory: 96 | 97 | ![](img/shortterm-memory.png?raw=true) 98 | 99 | ### long-term memory: 100 | 101 | ![](img/longterm-memory.png?raw=true) 102 | 103 | -------------------------------------------------------------------------------- /subjective_drawdown_finder.py: -------------------------------------------------------------------------------- 1 | # classes and functions to automatically determine a good drawdown-criteria (% drawdown peak to tough) for making fibonacci-extensions/retractements 2 | # uses two boosted-trees models, plus some heuristics, in order to find a good criteria 3 | import os 4 | import datetime 5 | import yfinance as yf 6 | import pandas as pd 7 | import numpy as np 8 | import ta 9 | import matplotlib.pyplot as plt 10 | import copy 11 | import re 12 | import pickle 13 | from sklearn.tree import DecisionTreeRegressor 14 | 15 | from .variables import RECOVERY_CRITERIA, TARGET_DENSITY 16 | from .fib_utils import * 17 | 18 | CORE_PATH = os.path.abspath(os.path.dirname(__file__)) 19 | 20 | # main class of this file 21 | class SubjectiveDrawdown: 22 | """ 23 | models and functios to find optimal drawdown for making fibonacci extensions 24 | principal function is self.fit() 25 | """ 26 | def __init__(self, verbose =None, target_density=None, drawdown_cap = None, recovery_criteria=None, path_to_model_pred = None, path_to_model_refine = None): 27 | 28 | if verbose is None: 29 | verbose = False 30 | self.verbose = verbose 31 | 32 | # default target_density 33 | if target_density is None: 34 | target_density=TARGET_DENSITY 35 | self.target_density = target_density 36 | 37 | # cap the range of plausible drawdown criteria 38 | if drawdown_cap is None: 39 | drawdown_cap = [0.05, 0.7] 40 | self.drawdown_cap= drawdown_cap 41 | 42 | # criteria to judge when a retracement is finished (from peak) 43 | if recovery_criteria is None: 44 | recovery_criteria = RECOVERY_CRITERIA 45 | self.recovery_criteria = recovery_criteria 46 | 47 | # load the probabilistic models 48 | self.model = SubjectiveDrawdownModels(path_to_model_pred = path_to_model_pred, 49 | path_to_model_refine = path_to_model_refine, 50 | verbose=verbose) 51 | 52 | def prefeature_trend(self, data, focal_column=None): 53 | """ mean and std (around residuals)""" 54 | if focal_column is None: 55 | focal_column = 'Close' 56 | 57 | # y data 58 | y = np.log(np.clip(data[focal_column], a_min = 0.001, a_max = None)) 59 | y = ((y-y.mean()).values)#/y.std() 60 | 61 | # x data 62 | x = ((data.index - data.index.mean()).days).values/365 63 | 64 | # slope and intercept 65 | m = (len(x) * np.sum(x*y) - np.sum(x) * np.sum(y)) / (len(x)*np.sum(x*x) - np.sum(x) * np.sum(x)) # long-run log-linear increase 66 | b = (np.sum(y) - m *np.sum(x)) / len(x) 67 | 68 | # take the variance around the dominant trend 69 | residuals = y-(x*m+b) 70 | std_ = residuals.std() 71 | 72 | return m,std_ 73 | 74 | def prefreature_realizedvol(self, data, hlc_columns=['High', 'Low', 'Close']): 75 | """basically standard-deviation, notice we exclude open because of API issues 76 | instead of std from the mean price, we take it from the previous price 77 | """ 78 | 79 | # log the prices for highh low close 80 | y_hlc = [np.log(np.clip(data[col].values,a_min=0.01,a_max=None)) for col in hlc_columns] 81 | 82 | # split into hi-close and lo-close 83 | y_hc = np.concatenate([y_hlc[0].reshape(-1,1)]+[y_hlc[-1].reshape(-1,1)], axis=1).reshape(-1) 84 | y_lc = np.concatenate([y_hlc[1].reshape(-1,1)]+[y_hlc[-1].reshape(-1,1)], axis=1).reshape(-1) 85 | 86 | # difference between close and previous hi 87 | mean_realized_volatility = (((np.diff(y_hc)**2).sum() + (np.diff(y_lc)**2).sum())/(len(y_hc) + len(y_lc)-2))**0.5 88 | 89 | # same as above, but limited to only downsides 90 | y_close_diff = np.diff(y_hlc[-1]) 91 | y_close_downside_diff = y_close_diff[np.where(y_close_diff<=0)[0]] 92 | mean_downside_volatility = ((y_close_downside_diff**2).mean())**0.5 93 | 94 | # 95 | return mean_realized_volatility, mean_downside_volatility 96 | 97 | def _optimal_drawdown_for_fibs_probablistic_estimator(self, data, target_density): 98 | """estimate an initial drawdown criteria, through a probabilistic model""" 99 | # get features: trend and std 100 | ftrend,fstd = self.prefeature_trend(data) 101 | if (str(ftrend)=='nan') or str(fstd)=='nan': 102 | raise ValueError("trend or std") 103 | 104 | # get features: volatility and downside vol 105 | fvol, fvoldown = self.prefreature_realizedvol(data, hlc_columns=['High', 'Low', 'Close']) 106 | if (str(fvol)=='nan') or str(fvoldown)=='nan': 107 | raise ValueError("trend or std") 108 | 109 | # features must be ordered: ['drawdown_crit', 'trend', 'std', 'vol', 'vold'] 110 | drawdown_criterias = np.linspace(0.05, 0.5, 50).reshape(-1,1) 111 | X = np.concatenate([drawdown_criterias, np.array([ftrend]*50).reshape(-1,1), np.array([fstd]*50).reshape(-1,1), np.array([fvol]*50).reshape(-1,1), np.array([fvoldown]*50).reshape(-1,1)],axis=1) 112 | # pdensity 113 | pdensity = self.model.predict(X) 114 | 115 | # drawdown criteria suggested 116 | drawdown_crit_suggested = drawdown_criterias[np.argmin((pdensity - target_density)**2)][0] 117 | 118 | return drawdown_crit_suggested, [ftrend, fstd, fvol, fvoldown, target_density] 119 | 120 | def _get_fibs(self, data, drawdown_crit, recovery_criteria=None): 121 | """ wrapper for find_all_retracement_boxes and Fib to make a time-series of fibs""" 122 | if recovery_criteria is None: 123 | recovery_criteria = self.recovery_criteria 124 | 125 | fib_spans = find_all_retracement_boxes(data, drawdown_criteria=drawdown_crit) 126 | # Fib(fib_span=fib_span, data=self.data, drawdown_criteria=self.drawdown_criteria, fib_levels=sexlf.fib_levels, recovery_criteria = self.recovery_criteria, make_features = make_features) 127 | fib_series = [Fib(fib_span=fib_span, data=data, drawdown_criteria=drawdown_crit, recovery_criteria=0.02, fib_levels = [0,1,1.618]) for fib_span in fib_spans] 128 | # remove null fibs (must pass .is_fib) 129 | fib_series = [fib for fib in fib_series if fib.is_fib] 130 | return fib_series 131 | 132 | def _density_of_drawdowns_given_fibs(self, fib_series, data=None, delta_time=None): 133 | """estimates the annual density of fibs""" 134 | # total time duration of series 135 | if delta_time is None: 136 | delta_time = (data.index[-1] - data.index[0]).days/365 137 | return len(fib_series)/delta_time 138 | 139 | def _densities_by_kulling(self, fib_series, delta_time, orig_drawdown = None, results = None): 140 | """empirical calculation of the relationship between drawdown and densities, by progressivingly kulling drawdowns""" 141 | # maxdrawdowns 142 | if orig_drawdown is None: 143 | orig_drawdown = 0.2 144 | 145 | # results 146 | if results is None: 147 | results = pd.DataFrame({'drawdown_crit':[orig_drawdown], 'density':[len(fib_series)/delta_time]}) 148 | if len(fib_series)==0: 149 | return results 150 | 151 | max_drawdowns = [fib.features['max_drawdown'].max() for fib in fib_series] 152 | max_drawdowns = sorted(max_drawdowns) 153 | 154 | for i,drawdown_crit in enumerate(max_drawdowns): 155 | 156 | density_ =[len(max_drawdowns[(i+1):])/delta_time] 157 | 158 | crit_ = [drawdown_crit*1.001] 159 | 160 | if (np.abs(drawdown_crit*1.001 - results['drawdown_crit'].values).min() > 0.0005): 161 | results = results.append(pd.DataFrame({'drawdown_crit':crit_, 'density':density_})) 162 | 163 | return results 164 | 165 | def _drawdown_manual_finder(self, data, results, target_density, increment = None): 166 | """ 167 | uses recursion to find a target density 168 | increments a drawdown by 'increment' multiplicatively 169 | """ 170 | # do recursion if all results are 0, or no results are greater than target 171 | do_recursion = (results['density']==0).all() or (not (results['density'] >= target_density).any() ) 172 | 173 | if not do_recursion: 174 | return results 175 | 176 | if increment is None: 177 | increment = 0.95 178 | 179 | drawdown_crit_increment = increment*results['drawdown_crit'].min() 180 | 181 | fib_series = self._get_fibs(data, drawdown_crit_increment) 182 | delta_time = (data.index[-1] - data.index[0]).days/365 183 | 184 | # initial results 185 | results = results.append(pd.DataFrame({'drawdown_crit':[drawdown_crit_increment], 186 | 'density':[1.001*len(fib_series)/delta_time]})) 187 | # initial empirical results 188 | results = self._densities_by_kulling(fib_series, 189 | delta_time, 190 | orig_drawdown =drawdown_crit_increment, 191 | results = results) 192 | 193 | do_recursion = (results['density']==0).all() or (not (results['density'] >= target_density).any()) 194 | if do_recursion: 195 | return self._drawdown_manual_finder(data, results, target_density, increment) 196 | 197 | return results 198 | 199 | def fit(self, data, target_density=None, drawdown_cap=None): 200 | """estimate an initial drawdown criteria, through: 201 | - step1: a probabilistic model 202 | - step2: iterate through fibs and kull one-by-one, empirically measuring the densities""" 203 | assert type(data) == pd.core.frame.DataFrame 204 | assert 'Close' in data.columns 205 | 206 | if target_density is None: 207 | target_density = self.target_density 208 | 209 | if drawdown_cap is None: 210 | drawdown_cap = self.drawdown_cap 211 | 212 | drawdown_crit, X = self._optimal_drawdown_for_fibs_probablistic_estimator(data, target_density) 213 | # get fibs and calculate the density of drawdownd 214 | fibs = self._get_fibs(data, drawdown_crit) 215 | 216 | # calculate density and residuals 217 | delta_time = (data.index[-1] - data.index[0]).days/365 218 | realized_density = 1.001*len(fibs)/delta_time 219 | resid = target_density - realized_density 220 | 221 | if self.verbose: 222 | print("%s: DD1 %0.3f:%0.3f fibs/year" % (ticker, drawdown_crit, realized_density)) 223 | 224 | # initial results 225 | results = pd.DataFrame({'drawdown_crit':[drawdown_crit], 'density':[realized_density]}) 226 | 227 | # next estimate: trigger next model 228 | if realized_density < target_density: 229 | 230 | # run next model (refinement) 231 | X += [drawdown_crit, resid] 232 | drawdown_crit = self.model.refine(X)#[0] 233 | fibs = self._get_fibs(data, drawdown_crit) 234 | 235 | realized_density = 1.001*len(fibs)/delta_time 236 | results = pd.DataFrame({'drawdown_crit':[drawdown_crit], 'density':[realized_density]}) 237 | if self.verbose: 238 | print("%s: DD2 %0.3f:%0.3f fibs/year" % (ticker, drawdown_crit, realized_density)) 239 | 240 | # initial empirical results 241 | results = self._densities_by_kulling(fibs, delta_time, orig_drawdown =drawdown_crit, results = results) 242 | # recursively find drawdown closer to the target 243 | results = self._drawdown_manual_finder(data, results, target_density) 244 | 245 | # get what?: at least as great as the target_density, but closeest 246 | ix_meet_or_exceed_criteria = np.where(results.density >= target_density)[0] 247 | 248 | if len(ix_meet_or_exceed_criteria)>0: 249 | results_sub = results.iloc[ix_meet_or_exceed_criteria] 250 | else: 251 | results_sub = results 252 | 253 | drawdown_crit_suggested = results_sub.drawdown_crit.iat[np.argmin((results_sub.density - target_density)**2)] 254 | if self.verbose: 255 | print("%s: DD3 %0.3f FINAL" % (ticker, drawdown_crit_suggested)) 256 | 257 | # clip the drawdown output 258 | if drawdown_crit_suggested> max(drawdown_cap): 259 | drawdown_crit_suggested = max(drawdown_cap) 260 | elif drawdown_crit_suggested < min(drawdown_cap): 261 | drawdown_crit_suggested = min(drawdown_cap) 262 | 263 | return drawdown_crit_suggested, results 264 | 265 | class SubjectiveDrawdownModels: 266 | """container for two boosting models that predict drawdown-criterias""" 267 | def __init__(self, path_to_model_pred = None, path_to_model_refine = None, verbose=False, unit_test = True): 268 | self.verbose = verbose 269 | #print("current_path; %s" % current_path) 270 | if path_to_model_pred is None: 271 | path_to_model_pred = os.path.join(CORE_PATH, "subjective_drawdown_models/subjective_drawdown_model1.pkl") 272 | if path_to_model_refine is None: 273 | path_to_model_refine = os.path.join(CORE_PATH, "subjective_drawdown_models/subjective_drawdown_model2.pkl") 274 | 275 | self.path_to_model_pred = path_to_model_pred 276 | self.path_to_model_refine = path_to_model_refine 277 | 278 | # load the models 279 | self.load_model_pred() 280 | self.load_model_refine() 281 | 282 | # unit_test on load 283 | if unit_test: 284 | self.run_tests() 285 | 286 | def load_model_pred(self): 287 | """load the model one/predictor model (sklearn boosted regression trees) """ 288 | if self.verbose: 289 | print("loading drawdown prediction model 1 %s" % self.path_to_model_pred) 290 | with open(self.path_to_model_pred, 'rb') as pcon: 291 | mod_pred = pickle.load(pcon) 292 | self.mod_pred = mod_pred 293 | 294 | def load_model_refine(self): 295 | """load the model two/refiner model (sklearn boosted regression trees) """ 296 | if self.verbose: 297 | print("loading drawdown refinement model 2 %s" % self.path_to_model_refine) 298 | with open(self.path_to_model_refine, 'rb') as pcon: 299 | mod_refine = pickle.load(pcon) 300 | self.mod_refine = mod_refine 301 | 302 | def predict(self, X): 303 | """prediction from model one""" 304 | if isinstance(X, list): 305 | X = np.array(X).reshape(1,-1) 306 | 307 | return self.mod_pred.predict(X) 308 | 309 | def refine(self, X2): 310 | """conditional one the residuals from model one, and one data-download, """ 311 | if isinstance(X2, list): 312 | X2 = np.array(X2).reshape(1,-1) 313 | return self.mod_refine.predict(X2)[0] 314 | 315 | def run_tests(self): 316 | """ units tests on models""" 317 | p = self.predict(np.array([[0.58720078, 0.39931927, 0.2731371 , 0.04188929, 0.0312278 ]])) 318 | print("testing model 1 (predictor)") 319 | assert (p[0] - 0.28313829505556226) < 10**-6 320 | 321 | q = self.refine(np.array([[0.39931927012611657, 0.273137095058999, 0.041889285755502804, 0.03122779683661232, 0.26, 0.4908163265306123, -4.626334519569619e-05]])) 322 | print("testing model 2 (refiner)") 323 | assert (q - 0.46655745847591307) < 10**-6 324 | 325 | #foo = SubjectiveDrawdownModels(unit_test = True) 326 | #subjective_drawdown = SubjectiveDrawdown(verbose =True, target_density=0.25) 327 | 328 | -------------------------------------------------------------------------------- /fib_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | from scipy.stats import trim_mean 6 | from .variables import TARGET_DENSITY, RECOVERY_CRITERIA, FIB_LEVELS 7 | 8 | # Fib, find_all_retracement_boxes, find_retracement_boxes, get_highs, get_lows 9 | 10 | def get_highs(data): 11 | """takes average high and max(close,open). I.e., split the difference between a body-candle and a wick""" 12 | return (0.5*data['High'] +0.5*data[['Close','Open']].max(axis=1)) 13 | 14 | def get_lows(data): 15 | """takes average of low and low(close,open)""" 16 | return (0.5*data['Low'] +0.5*data[['Close','Open']].min(axis=1)) 17 | 18 | def find_retracement_boxes(data, drawdown_criteria,do_plot=False, offset=None, recovery_criteria=None): 19 | """when drawing fibonacci retrace/extensions, you need to draw a box from the local high to low; this function automatically finds such boxes on which to base the retracement/extension-levels; 20 | returns list of tuples [(m1,2),...]. m1:= start of retracement box; m2:=end of retrace box""" 21 | if offset is None: 22 | offset = 0 23 | 24 | if recovery_criteria is None: 25 | recovery_criteria = 0.02 26 | 27 | # cumulative highs during period 28 | vHighs = get_highs(data) 29 | cummax = vHighs.cummax() 30 | # vector of % draw-dwosn 31 | vDrawdowns = (cummax - get_lows(data))/cummax 32 | vRecovery = (cummax - vHighs)/cummax 33 | 34 | # crude binary indicator of whether or not price is in a drawdown 35 | #in_Drawdowns = vDrawdowns>=drawdown_criteria 36 | 37 | rDrawdowns = [] 38 | r_drawdown = 0 39 | #in_drawdown = 0 40 | # loop backwards 41 | for i in range(len(vDrawdowns)-1,-1,-1): 42 | #in_drawdown_lag = in_drawdown 43 | #in_drawdown = in_Drawdowns.iloc[i] 44 | #reset_ = (vDrawdowns.iloc[i]<=recovery_criteria) 45 | reset_ = (vRecovery.iloc[i]<=recovery_criteria) 46 | r_drawdown = max(r_drawdown, vDrawdowns.iloc[i]) if not reset_ else 0 47 | rDrawdowns.append(r_drawdown) 48 | 49 | rDrawdowns = np.array(rDrawdowns[::-1]) 50 | 51 | # putative fib periods (vector 52 | fib_periods = rDrawdowns*(rDrawdowns>=drawdown_criteria) + np.zeros(len(rDrawdowns)) 53 | 54 | # fib periods are useful for finding local fib periods 55 | # now split the fib periods into spans 56 | idx_fib_periods = np.where(np.diff(1*(fib_periods>0)))[0] 57 | if fib_periods[-1]>0: 58 | idx_fib_periods = np.concatenate((idx_fib_periods, np.array([len(fib_periods)]))) 59 | 60 | nmax = data.shape[0] 61 | #fib_spans = [(m1+1,min(m2+1,nmax )) for m1,m2 in zip(idx_fib_periods[:-1],idx_fib_periods[1:]) if all(fib_periods[(m1+1):(m2+1)]>0)] 62 | fib_spans = [(m1,min(m2+1,nmax )) for m1,m2 in zip(idx_fib_periods[:-1],idx_fib_periods[1:]) if all(fib_periods[(m1+1):(m2+1)]>0)] 63 | 64 | if do_plot: 65 | fig, axs = plt.subplots(3) 66 | axs[0].plot(np.arange(data.shape[0]),np.log(data['Close'])) 67 | axs[1].plot(np.arange(vDrawdowns.shape[0]),vDrawdowns) 68 | # plotting the cumsum 69 | axs[2].plot(np.arange(len(rDrawdowns)),rDrawdowns[::-1]) 70 | for s in fib_spans: 71 | axs[2].plot(np.arange(s[0],s[1]), [0.5]*(s[1]-s[0])) 72 | plt.show() 73 | 74 | if offset!=0: 75 | # adjust the spans by offset 76 | fib_spans = [(m1+offset,m2+offset) for m1,m2 in fib_spans] 77 | return fib_spans 78 | 79 | # finding smmaler-fib periods WITHIN giant fib periods 80 | def find_sub_drawdowns_within_a_giant_drawdown(data, fib_span, drawdown_criteria, START_LOOKING_AFTER_DAYS=None, POST_BUFFER_DAYS = None, RETRACE_MINIMUM=None, recovery_criteria=None): 81 | """ 82 | finds drawsdowns within larger drawdowns, using some criteria: 83 | - time: only starts looking for another drawdown after 1.5 years after the peak of the supra 84 | - minimum retrace: the supra must retrace to 0.5 level to qualify for looking for another drawdown (to prevent too many drawdowns that are just continuation of the primary trend 85 | """ 86 | if START_LOOKING_AFTER_DAYS is None: 87 | START_LOOKING_AFTER_DAYS = 252*1.5 88 | if POST_BUFFER_DAYS is None: 89 | POST_BUFFER_DAYS = 100 90 | if RETRACE_MINIMUM is None: 91 | RETRACE_MINIMUM=0.382 92 | if recovery_criteria is None: 93 | recovery_criteria=0.02 94 | 95 | start_iloc, stop_iloc = fib_span 96 | n_ = data.shape[0] 97 | if ((stop_iloc-start_iloc)<=START_LOOKING_AFTER_DAYS) or ((start_iloc+START_LOOKING_AFTER_DAYS)>= n_): 98 | # don't proceed if small retracement period 99 | return None 100 | 101 | # get the retracements: check if it has retrace at least X 102 | subdata = data.iloc[start_iloc:stop_iloc] 103 | cummax_ = get_highs(subdata).cummax() 104 | low_ = get_lows(subdata) 105 | cummin_ = low_.cummin() 106 | vDrawdowns_ = (cummax_ - low_)/cummax_ 107 | vDoes_retracement = subdata['Close']>=(((cummax_ - cummin_)*RETRACE_MINIMUM)+cummin_) 108 | if not vDoes_retracement.sum(): 109 | # no price is above the minium retracement 110 | return None 111 | 112 | # minimum place to start relooking for another drawdown 113 | ix_minrestart = vDoes_retracement.tolist().index(1) 114 | ix_minrestart = max(ix_minrestart, START_LOOKING_AFTER_DAYS) 115 | 116 | # new (crude) search box 117 | startsub_iloc = int(start_iloc + min(ix_minrestart,n_) ) 118 | stopsub_iloc = int(stop_iloc + min(POST_BUFFER_DAYS, n_)) 119 | fib_subspans = find_retracement_boxes(data.iloc[startsub_iloc:stopsub_iloc], drawdown_criteria, do_plot=False, offset = startsub_iloc, recovery_criteria = recovery_criteria) 120 | return fib_subspans 121 | 122 | # wrapper for find_retracement_boxes 123 | def find_all_retracement_boxes(data, drawdown_criteria=None, START_LOOKING_AFTER_DAYS=None, POST_BUFFER_DAYS = None, fib_spans=None, recovery_criteria=None): 124 | """ 125 | combines 'find_retracement_boxes' and 'find_sub_drawdowns_within_a_giant_drawdown' 126 | used recursively 127 | """ 128 | # big spans 129 | if fib_spans is None: 130 | fib_spans_big = find_retracement_boxes(data, drawdown_criteria,do_plot=False, recovery_criteria=recovery_criteria) 131 | else: 132 | fib_spans_big = fib_spans 133 | 134 | # find smaller subspans within big spans 135 | fib_spans = [] # container 136 | for i, fib_span in enumerate(fib_spans_big): 137 | 138 | fib_spans.append(fib_span) 139 | 140 | # find subsspan within fib_span 141 | fib_subspans = find_sub_drawdowns_within_a_giant_drawdown(data, fib_span, drawdown_criteria, START_LOOKING_AFTER_DAYS, POST_BUFFER_DAYS, recovery_criteria=recovery_criteria) 142 | # integrate 143 | if not (fib_subspans is None): 144 | fib_subspans = find_all_retracement_boxes(data, drawdown_criteria, START_LOOKING_AFTER_DAYS, POST_BUFFER_DAYS, fib_subspans, recovery_criteria = recovery_criteria) 145 | for subspan in fib_subspans: 146 | if subspan not in fib_spans: 147 | fib_spans.append(subspan) 148 | 149 | return fib_spans 150 | 151 | class Fib: 152 | """contains necessary data to make a fibonacci retracement into a feature for ML""" 153 | def __init__(self, fib_span, data, drawdown_criteria, fib_levels, recovery_criteria, make_features = True): 154 | self.drawdown_criteria = drawdown_criteria # what is considered a bear market crash? 155 | self.start = fib_span[0] # start of retracement 156 | self.end = fib_span[1] # end (no longer 20% drawdown) 157 | self.fib_levels = np.array(fib_levels) # fib numbers (0. 158 | self.recovery_criteria = recovery_criteria # percent to high that declares bear over 159 | 160 | # default: not a fib 161 | self.is_fib = False 162 | self.indx_start_of_credible_fib = None 163 | self.loc_start_of_credible_fib = None 164 | #self.loc_end = data.index[self.end] # how to use this????? because the actualy index is -1 165 | 166 | # get levels, as a numpy time-series 167 | fib_series, series_indices = self.calc_fib_series_on_span(data, fib_span, do_mask=True) 168 | self.is_fib = not (fib_series is None) 169 | 170 | # features for ML tool 171 | if make_features and self.is_fib: 172 | features = self._make_features(data) 173 | 174 | # 175 | self.n_total = data.shape[0] 176 | 177 | def calc_fibs(self, hi,lo): 178 | """given a high, and a low, get the fibinocci extensions and retracements. """ 179 | return (hi-lo)*self.fib_levels + lo 180 | 181 | def calc_fib_series_on_span(self, data, fib_span=None, do_mask=True, drawdown_criteria=None): 182 | """given a price series, and two indices that box-in the draw-down, it makes fibonnaci retracements""" 183 | if drawdown_criteria is None: 184 | drawdown_criteria = self.drawdown_criteria 185 | 186 | if fib_span is None: 187 | start_iloc, stop_iloc = self.start, self.end 188 | else: 189 | start_iloc, stop_iloc = fib_span 190 | 191 | subdata = data.iloc[start_iloc:stop_iloc] 192 | # get cummulative-high (notice it takes halfway between body-of-candle and wick 193 | cummax = get_highs(subdata).cummax() 194 | # get cummulative low 195 | cumlow = get_lows(subdata).cummin() 196 | # series of fibs 197 | fib_series_ = FibTimeSeries(cumlow, cummax, fib_levels = self.fib_levels, indices_extended = data.index[data.index>=subdata.index[0]]) 198 | indices = np.arange(start_iloc, stop_iloc) 199 | 200 | # mask 201 | if do_mask: 202 | # mask out all fibs BEFORE the 20% drawdown (because at those times, we wouldn't know we would soon be making fib-retracements 203 | if not (self.indx_start_of_credible_fib is None): 204 | indx_start_of_credible_fib = self.indx_start_of_credible_fib 205 | else: 206 | in_drawdown = 1*(((cummax - cumlow)/cummax)>=self.drawdown_criteria) 207 | if not in_drawdown.any(): 208 | return None,[] 209 | 210 | indx_start_of_credible_fib = in_drawdown.tolist().index(1) 211 | self.indx_start_of_credible_fib = indx_start_of_credible_fib 212 | self.loc_start_of_credible_fib = data.index[self.indx_start_of_credible_fib] 213 | 214 | # truncate series 215 | fib_series_.mask_out_predrawdown(indx_start_of_credible_fib) 216 | # new indices (after truncating for the first drawdown 217 | indices = indices[indx_start_of_credible_fib:] 218 | assert len(indices) == fib_series_.shape[-1] 219 | 220 | self.fib_series = fib_series_ 221 | self.series_indices = indices 222 | return fib_series_, indices 223 | 224 | def _make_features(self, data, fib_span=None, drawdown_criteria=None, recovery_criteria=None): 225 | """ makes primatives for calculating fib retracements and features like: 226 | - max drawdown 227 | - duration 228 | - ever-recovers? 229 | Returns data as a dict 230 | Returans two versions of the data: 231 | i) in_span: the values valid within the drawdown phase 232 | ii) extended: values extended beyond the span, to the end of the (global) time-series 233 | """ 234 | if recovery_criteria is None: 235 | recovery_criteria = self.recovery_criteria 236 | 237 | if drawdown_criteria is None: 238 | drawdown_criteria = self.drawdown_criteria 239 | 240 | if fib_span is None: 241 | fib_span = (self.start,self.end) 242 | 243 | # beginning and end of drawdown 244 | start_iloc, stop_iloc = fib_span 245 | 246 | subdata = data.iloc[start_iloc:stop_iloc] 247 | 248 | # size of span 249 | n = stop_iloc-start_iloc 250 | n_extended = data.shape[0]-stop_iloc 251 | 252 | # get cummulative-high (notice it takes halfway between body-of-candle and wick 253 | cummax = get_highs(subdata).cummax() 254 | # get last price-value at time of recovery 255 | final_price_at_recovery = cummax[-1] 256 | # cummax extended to end of dataseries 257 | cummax_extended = pd.DataFrame({'cummax':[final_price_at_recovery]*n_extended}, index = data.index[stop_iloc:])['cummax'] 258 | # cummulative lows (extended and in_span) 259 | low_extended = get_lows(data.iloc[start_iloc:]) # extended 260 | low_ = low_extended.iloc[:n] 261 | cumlow = low_.cummin() 262 | 263 | # r_drawdown: proportion drawdown 264 | vDrawdown = (cummax - low_)/cummax 265 | vDrawdown_extended = (cummax_extended - low_extended[n:])/cummax_extended 266 | vDrawdown_full = pd.concat((vDrawdown,vDrawdown_extended),axis=0) 267 | 268 | # is in drawdown? 269 | in_drawdown = 1*(((cummax - cumlow)/cummax)>=drawdown_criteria) 270 | if not in_drawdown.any(): 271 | return None 272 | 273 | # mask: in realtime, we only know we are in a fib if drawdown criteria is met 274 | self.indx_start_of_credible_fib = in_drawdown.values.argmax() 275 | self.loc_start_of_credible_fib = in_drawdown.index[self.indx_start_of_credible_fib] 276 | 277 | # feature: has recovered? 278 | feat_recovered_full = 1*(vDrawdown_full[self.indx_start_of_credible_fib:]=start) & (self.index<=end)] 393 | indx_to_return_fibs = np.where((self.index>=start) & (self.index<=end))[0] 394 | fib_series_to_return = self.fib_series[:,indx_to_return_fibs] 395 | # declare that for the extended data, the 'start' will be self.end_loc 396 | extended_start_loc =self.end_loc 397 | else: 398 | # macro-case, if the starting index is outside self.start_loc, self.end_loc) 399 | fib_series_to_return = np.array([[]]*self.n_fib_levels) # dummy to concatenate 400 | extended_start_loc = start + pd.Timedelta(-1, unit="day") 401 | if end > self.end_loc: 402 | # case: if the specified 'end' is greater than the time-series end-index, 403 | # ... then we must repeat the final fib_levels (at self.end_loc) for 404 | # ... the length of the extra time-indices 405 | # ... the extra time indices are NOT contiguous, so we need the user to set 406 | # ... self.indices_extended to know which dates are valid 407 | if self.indices_extended is None: 408 | raise ValueError("need to set 'self.indices_extended' to know for which dates to extrapolate the fib-extensions") 409 | indices_to_extrapolate_fibs = self.indices_extended[(self.indices_extended>extended_start_loc) & (self.indices_extended<=end)] 410 | # how much to extend 411 | n_extrapolate = len(indices_to_extrapolate_fibs) 412 | # extended series 413 | fib_series_extrapolate = np.array([self.fib_series[:,-1]]*n_extrapolate).T 414 | else: 415 | # none to extrapolate 416 | fib_series_extrapolate = np.array([[]]*self.n_fib_levels) # dummy to concatenate 417 | 418 | # concatenate the in-span and extrapolated 419 | return np.concatenate((fib_series_to_return, fib_series_extrapolate), axis=1) 420 | 421 | def mask_out_predrawdown(self, indx_start_of_credible_fib): 422 | """truncates the dataset for the initial drawdown where one couldn't have know that one would eventuallly be in a drawdown""" 423 | self.fib_series = self.fib_series[:,indx_start_of_credible_fib:] 424 | # correct the n_inspan 425 | self.n_inspan = self.fib_series.shape[-1] 426 | # correct the time-indices 427 | self.index = self.index[indx_start_of_credible_fib:] 428 | self.start_loc = self.index[0] 429 | # correct shape 430 | self.shape = self.fib_series.shape 431 | 432 | -------------------------------------------------------------------------------- /core.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | from scipy.stats import trim_mean 6 | from .variables import TARGET_DENSITY, RECOVERY_CRITERIA, FIB_LEVELS, GET_WEIGHTS_FOR_LONGTERM_MEMORY, DEFAULT_MEMORY_FEATURES 7 | 8 | # import Fib object, and retracement-box finders 9 | from .fib_utils import * 10 | 11 | # import functions to support automatic finding of drawdowns 12 | from .subjective_drawdown_finder import * 13 | 14 | subjective_drawdown = SubjectiveDrawdown(verbose =False, target_density=TARGET_DENSITY) 15 | 16 | # a module of fibonacci functions 17 | def calc_fibs(hi,lo): 18 | """given a high, and a low, get the fibinocci extensions and retracements. """ 19 | return (hi-lo)*np.array(FIB_LEVELS)+lo 20 | 21 | def calc_fib_series_on_span(data, start_iloc=None, stop_iloc=None, fib_span=None, do_mask=True, drawdown_criteria = 0.2): 22 | """given a price series, and two indices that box-in the draw-down, it makes fibonnaci retracements""" 23 | if not (fib_span is None): 24 | start_iloc, stop_iloc = fib_span 25 | subdata = data.iloc[start_iloc:stop_iloc] 26 | # get cummulative-high (notice it takes halfway between body-of-candle and wick 27 | cummax = get_highs(subdata).cummax() 28 | # get cummulative low 29 | cumlow = get_lows(subdata).cummin() 30 | # series of fibs 31 | fib_series_ = np.array([calc_fibs(hi,lo) for hi,lo in zip(cummax, cumlow)]).T 32 | # make indeices 33 | indices = np.arange(start_iloc, stop_iloc) 34 | if do_mask: 35 | # mask out all fibs BEFORE the 20% drawdown (because at those times, we wouldn't know we would soon be making fib-retracements 36 | in_drawdown = 1*(((cummax - cumlow)/cummax)>=drawdown_criteria) 37 | if not in_drawdown.any(): 38 | return None,[] 39 | 40 | indx_start_of_credible_fib = in_drawdown.tolist().index(1) 41 | # new indices 42 | indices = indices[indx_start_of_credible_fib:] 43 | # truncate 44 | fib_series_ = fib_series_[:,indx_start_of_credible_fib:] 45 | assert len(indices) == fib_series_.shape[-1] 46 | return fib_series_, indices 47 | 48 | 49 | # main maker of fibonacci-retracements & fibonacci-extensions 50 | class FibonacciTechnicalAnalysis: 51 | """Main object that performs all steps for the automatic Fibonacci TA 52 | - make_fib_series: used extracting fibonacci extensions/retracements 53 | - make_fib_features_from_fib_series: used to create features for ML analyses based on fibs 54 | - make_features: combines both of the above 55 | """ 56 | def __init__(self, data, drawdown_criteria, recovery_criteria=None, fib_levels=None, do_plot=False, plot_path = "/tmp/"): 57 | self.data = data 58 | 59 | # fiboanni levels, default 60 | if fib_levels is None: 61 | fib_levels = FIB_LEVELS #[0, 0.236, 0.382, 0.5, 0.618, 0.786, 1, 1.618, 2.618, 4.236, 6.854, 11.09] 62 | 63 | self.fib_levels = fib_levels 64 | 65 | # drawdown criteria to make a fibonacci retracement/extension (percent) 66 | if drawdown_criteria == 'auto': 67 | # find an automatic fibonacci criteria 68 | print("finding optimal drawdown criteria") 69 | try: 70 | optimal_drawdown_criteria,_ = subjective_drawdown.fit(data = data) 71 | 72 | except: 73 | print("failed optimizing drawdown criteria, setting to default") 74 | optimal_drawdown_criteria = DRAWDOWN_CRITERIA 75 | 76 | self.drawdown_criteria = optimal_drawdown_criteria 77 | 78 | else: 79 | self.drawdown_criteria = drawdown_criteria 80 | 81 | if isinstance(self.drawdown_criteria, str): 82 | raise ValueError("'drawdown_criteria' must be percentage 0-1 or 'auto'" ) 83 | 84 | # % criteria to judge when a drawdown has fully recovered 85 | if recovery_criteria is None: 86 | recovery_criteria = RECOVERY_CRITERIA 87 | self.recovery_criteria = recovery_criteria 88 | 89 | # whether to make plots, and the plot path 90 | self.do_plot = do_plot 91 | self.plot_path = plot_path 92 | 93 | def make_fib(self, fib_span, make_features=True): 94 | """returns a Fib object, given a span between peak and recovery""" 95 | return Fib(fib_span=fib_span, data=self.data, drawdown_criteria=self.drawdown_criteria, fib_levels=self.fib_levels, recovery_criteria = self.recovery_criteria, make_features = make_features) 96 | 97 | def make_fib_series(self, data = None, fib_spans=None, do_plot=None, plot_path = None): 98 | """ 99 | makes a list of Fibs for a dataset 100 | if pass a list of fib_spans, then make the series 101 | or, pass the data and the fib_spans will be calculated 102 | """ 103 | if do_plot is None: 104 | do_plot = self.do_plot 105 | 106 | if plot_path is None: 107 | plot_path = self.plot_path 108 | 109 | if (data is None) and (fib_spans is None): 110 | raise ValueError("either supply argument 'data' or 'fib_spans'. Both cannot be None") 111 | 112 | elif (fib_spans is None) and (not (data is None)): 113 | fib_spans = find_all_retracement_boxes(data, self.drawdown_criteria, recovery_criteria=self.recovery_criteria) 114 | 115 | # make a list of fibs 116 | fib_series = [self.make_fib(fib_span) for fib_span in fib_spans] 117 | 118 | # remove null fibs (must pass .is_fib) 119 | fib_series = [fib for fib in fib_series if fib.is_fib] 120 | if do_plot and (not (data is None)): 121 | 122 | # plot the price 123 | fig = plt.figure(figsize=(15,9)) 124 | axs0 = fig.add_subplot(2,1,1) 125 | #axs0.plot(np.arange(data.shape[0]),np.log(data['High'])) 126 | #axs0.plot(np.arange(data.shape[0]),np.log(data['Low'])) 127 | axs0.plot(data.index,np.log(data['High'])) 128 | axs0.plot(data.index,np.log(data['Low'])) 129 | for fib in fib_series: 130 | 131 | # black line showing the start 132 | axs0.plot([data.index[fib.series_indices[0]]]*len(fib.fib_levels), np.log(fib.fib_series[:,0]),'b-') 133 | for level_ in fib.fib_series: 134 | #axs[0].plot(fib.series_indices, np.log(level_)) 135 | axs0.plot(data.index[fib.series_indices], np.log(level_)) 136 | 137 | # also block the max drawdowns 138 | cummax = get_highs(data).cummax() 139 | vDrawdowns = (cummax - get_lows(data))/cummax 140 | 141 | axs1 = fig.add_subplot(2,1,2) 142 | #axs[1].plot(vDrawdowns) 143 | axs1.plot(vDrawdowns) 144 | for x in range(0,5): 145 | #axs[1].plot(data.index, [x/10]*data.shape[0]) 146 | axs1.plot(data.index, [x/10]*data.shape[0]) 147 | 148 | #plt.savefig(self.plot_path + 'fibonacci_timeseries.png') 149 | fig.savefig(self.plot_path + 'fibonacci_timeseries.png') 150 | plt.close() 151 | #plt.show() 152 | 153 | return fib_series 154 | 155 | def make_fib_features(self, fib_series=None, fib_spans=None, weights_for_longterm_memory = None, do_plot = None, plot_path = None, prefix = "", name_mod=None, feature_defaults = None, return_memory_vectors=False, return_empirical_study=False): 156 | """main function, creates fibonacci-based features for machine-learning analyses""" 157 | 158 | if name_mod is None: 159 | name_mod = "_d" 160 | 161 | if do_plot is None: 162 | do_plot = self.do_plot 163 | 164 | if plot_path is None: 165 | plot_path = self.plot_path 166 | 167 | if weights_for_longterm_memory is None: 168 | # criteria used for building the long-term memory 169 | #weights_for_longterm_memory = {'crit1':{'sd':1/365,'mu':0, 'p':1}, 'crit2':{'sd':1/self.drawdown_criteria,'mu':0, 'p':1.3}, 'crit3':{'sd':1/365,'mu':0, 'p':1}, 'crit4':{'sd':1/(365*self.drawdown_criteria),'mu':0,'p':1.2}, 'crit5':{'sd':1.3,'mu':-2.6, 'p':1}} 170 | weights_for_longterm_memory = GET_WEIGHTS_FOR_LONGTERM_MEMORY(self.drawdown_criteria) 171 | 172 | else: 173 | weights_for_longterm_memory['crit2']['sd'] = 1/self.drawdown_criteria 174 | weights_for_longterm_memory['crit4']['sd'] = 1/((1/weights_for_longterm_memory['crit4']['sd'])*self.drawdown_criteria) 175 | 176 | # fib_series: make if not present 177 | if fib_series is None: 178 | fib_series = self.make_fib_series(data = self.data, fib_spans=fib_spans, do_plot=do_plot, plot_path = plot_path) 179 | 180 | # create the FibFeatures Object 181 | fib_features = FibFeatures(data=self.data, fib_series=fib_series, weights_for_longterm_memory = weights_for_longterm_memory, do_plot = do_plot, plot_path = plot_path, prefix = prefix, name_mod=name_mod, feature_defaults = feature_defaults) 182 | 183 | # option to return the memory vectors for empirical analyses 184 | if return_memory_vectors: 185 | 186 | return fib_features.features(data = self.data, return_memory = return_memory_vectors) 187 | 188 | #make the features 189 | (master_features1, master_features2, master_features3), (featnames1, featnames2,featnames3) = fib_features.features(data = self.data, return_memory = return_memory_vectors) 190 | 191 | # return empiricallly derived defaults of features 192 | if return_empirical_study: 193 | return fib_features.empiricals 194 | 195 | # concatenate the features and convert to pd.data.frame 196 | pd_features = pd.DataFrame(np.concatenate((master_features1, master_features2, master_features3),axis=1), index = self.data.index, columns = featnames1+featnames2+featnames3) 197 | 198 | return pd_features 199 | 200 | # modified pd.get_dummies 201 | def get_dummies(series, columns=None): 202 | """ 203 | just a modified form of pd.get_dummies; allows for missing indices 204 | 'series' is a long vector of integers representing differennt columns of a (external) pd.DataFrame 205 | """ 206 | if isinstance(columns,int): 207 | # convert to list of integers 208 | columns = [i for i in range(columns)] 209 | if series.unique().tolist() == columns: 210 | # if the columns set and integer-set-in-series are the same, just run pd.get_dummies 211 | return pd.get_dummies(series, drop_first = False) 212 | 213 | #fake_series_list_expanded = series.tolist() + columns 214 | #fake_series_index_expanded = series.index.append(memory_mt.index[-1] + pd.timedelta_range(start='1 day',periods=len(columns))) 215 | series_expanded = series.append(pd.Series(columns, index = series.index[-1] + pd.timedelta_range(start='1 day',periods=len(columns)))) 216 | dummies = pd.get_dummies(series_expanded).iloc[:-len(columns)] 217 | assert dummies.shape[0] == series.shape[0] 218 | return dummies 219 | 220 | def mask_memory_based_on_indices_in_other_memory(pd_attr, memory_indices, fill): 221 | """this masks-out columns in pd_attr using the (column)indcies in 'memory_indices', filling them with 'fill'; 222 | if memory_indices is a pandas.core.series.Series, then it is the non-recursive format 223 | if memory_indices is a list, then it is the recursive application of them 224 | """ 225 | if isinstance(memory_indices, pd.core.series.Series): 226 | if fill ==0: 227 | #mask_ = 1-pd.get_dummies(memory_indices, drop_first=False) 228 | mask_ = 1-get_dummies(memory_indices, columns = pd_attr.shape[-1]) 229 | masked_attr = pd_attr*mask_[pd_attr.index[0]:] 230 | return masked_attr 231 | elif fill>0: 232 | #mask_ = pd.get_dummies(memory_indices, drop_first=False)*fill+1 233 | mask_ = get_dummies(memory_indices, columns = pd_attr.shape[-1])*fill+1 234 | masked_attr = pd_attr*mask_[pd_attr.index[0]:] 235 | return masked_attr 236 | elif fill<0: 237 | # if fill<0, then we must first zero-out the pd_attr-regions and then add/insert the desired-fil 238 | mask_ = get_dummies(memory_indices, columns = pd_attr.shape[-1]) 239 | mask_ = mask_[pd_attr.index[0]:] 240 | masked_attr = pd_attr*(1-mask_) + mask_*fill 241 | return masked_attr 242 | elif isinstance(memory_indices, list): 243 | # recursion: notice is it updating 'masked_attr' 244 | assert isinstance(memory_indices[0], pd.core.series.Series) 245 | masked_attr = pd_attr.copy() 246 | for memory_index in memory_indices: 247 | masked_attr = mask_memory_based_on_indices_in_other_memory(masked_attr, memory_index, fill) 248 | 249 | return masked_attr 250 | 251 | def rescale_criteria(criteria_vector, wt): 252 | """scales/weights the criteria when building indices that make Fib features""" 253 | criteria_vec_rescaled = (criteria_vector*wt['sd'] - wt['mu'])**wt['p'] 254 | # want to insert a time-multiplier 255 | return criteria_vec_rescaled 256 | 257 | #def rescale_criteria(criteria_vector, wt): 258 | # """scales/weights the criteria when building indices that make Fib features""" 259 | # return (criteria_vector*wt['sd'] - wt['mu'])**wt['p'] 260 | 261 | def tx_feature(x, feature_name): 262 | if (feature_name == 'duration') or (feature_name == 'time_since_peak'): 263 | return np.log(x+1) 264 | return x 265 | 266 | def numpy_trimmed_mean(array, trim=0.1): 267 | """ trim values and take mean""" 268 | # get bottom and top 10% quantiles 269 | #qlow, qhi = np.quantile(array, [trim,1-trim]) 270 | # 271 | # case1 if there is no diversity, then the qlow and qhi will be equal 272 | #if qlow == qhi: 273 | # return array.mean() 274 | # 275 | #return array[np.where((array > qlow) & (array < qhi))[0]].mean() 276 | return trim_mean(array, proportiontocut = trim) 277 | 278 | def fibs_get_default_features(names_of_features_to_return = None, which_memory=None): 279 | """returns default-values for ['max_drawdown', 'time_since_peak', 'duration', 'precovery'] for those price-points before any fibonacci-retracement/extension has happened 280 | these should be empiricially calculated based on "average" retracements/extensions 281 | """ 282 | if which_memory is None: 283 | which_memory = 1 284 | 285 | # extract default features (memory) for this memory 286 | default_features = {featnm:memory_values[which_memory] for featnm,memory_values in DEFAULT_MEMORY_FEATURES.items()} 287 | #default_features = {'max_drawdown': [0.2,0.2,0.2][which_memory], 'time_since_peak':[6.0,6.0,6.0][which_memory], 'duration':[3,3,3][which_memory], 'precovery':[-0.3,-0.3,-0.3][which_memory], 'fib_lev':[-1,-1,-1][which_memory], 'topdist':[0.22,0.22,0.22][which_memory], 'botdist':[0.091,0.091,0.091][which_memory]} 288 | 289 | if names_of_features_to_return is None: 290 | return default_features 291 | 292 | return {k:default_features[k] for k in names_of_features_to_return} 293 | 294 | #class 295 | class MemoryArray(np.ndarray): 296 | """subclass of numpy.array, but which a few more functions and attributes""" 297 | def __new__( 298 | cls, 299 | data, 300 | memory_sequence=1, 301 | credible_start = 10**10, 302 | credible_start_loc = None, 303 | time_indices = None, 304 | ): 305 | self = np.asarray(data).view(cls) 306 | self.memory_sequence = memory_sequence 307 | self.credible_start = credible_start 308 | self.credible_start_loc = credible_start_loc 309 | self.time_indices = time_indices 310 | 311 | return self 312 | 313 | def update_credible_start(self, credible_start, credible_start_loc=None): 314 | """track """ 315 | if isinstance(credible_start, float) or isinstance(credible_start, np.float64): 316 | credible_start = int(credible_start) 317 | 318 | if isinstance(credible_start, int) or isinstance(credible_start, np.int64): 319 | if self.credible_start is None: 320 | self.credible_start = int(credible_start) 321 | else: 322 | self.credible_start = min(self.credible_start, int(credible_start)) 323 | 324 | elif isinstance(credible_start, pd._libs.tslibs.timestamps.Timestamp): 325 | if self.credible_start_loc is None: 326 | self.credible_start_loc = credible_start 327 | else: 328 | self.credible_start_loc = min(self.credible_start_loc,credible_start) 329 | if not (credible_start_loc is None): 330 | if self.credible_start_loc is None: 331 | self.credible_start_loc = credible_start_loc 332 | else: 333 | self.credible_start_loc = min(self.credible_start_loc,credible_start_loc) 334 | 335 | def numpy(self): 336 | """ return as an np.array""" 337 | return np.asarray(self) 338 | 339 | 340 | class FeatureArray(np.ndarray): 341 | """subclass of numpy.array, but which a few more functions and attributes""" 342 | def __new__( 343 | cls, 344 | data, 345 | columns = None, 346 | memory_sequence=1, 347 | credible_start = 10**10, 348 | credible_start_loc = None, 349 | time_indices = None, 350 | ): 351 | self = np.asarray(data).view(cls) 352 | self.columns = columns 353 | self.memory_sequence = memory_sequence 354 | self.credible_start = credible_start 355 | self.credible_start_loc = credible_start_loc 356 | self.time_indices = time_indices 357 | 358 | return self 359 | 360 | def update_credible_start(self, credible_start, credible_start_loc=None): 361 | """track """ 362 | if isinstance(credible_start, int): 363 | self.credible_start = min(self.credible_start, credible_start) 364 | 365 | elif isinstance(credible_start, pd._libs.tslibs.timestamps.Timestamp): 366 | if self.credible_start_loc is None: 367 | self.credible_start_loc = credible_start 368 | else: 369 | self.credible_start_loc = min(self.credible_start_loc,credible_start) 370 | if not (credible_start_loc is None): 371 | if self.credible_start_loc is None: 372 | self.credible_start_loc = credible_start_loc 373 | else: 374 | self.credible_start_loc = min(self.credible_start_loc,credible_start_loc) 375 | 376 | def numpy(self): 377 | """ return as an np.array""" 378 | return np.asarray(self) 379 | 380 | def insert(self, data, iterable, column=None): 381 | if isinstance(column,int): 382 | self[iterable,column] = data 383 | elif isinstance(column,str) and (column in self.columns): 384 | self[iterable, self.columns.index(column)] = data 385 | 386 | def get(self, iterable, column=None): 387 | if isinstance(column,int): 388 | return self.numpy()[iterable,column] 389 | 390 | elif isinstance(column,str) and (column in self.columns): 391 | return self.numpy()[iterable, self.columns.index(column)] 392 | 393 | def make_default_features(nrows, ncols=None, default_features=None, which_memory=None, names_of_features_to_return = None): 394 | """ makes an empty numpy array with feature defaults""" 395 | if default_features is None: 396 | # fill with these defaults 397 | default_features = fibs_get_default_features(which_memory=which_memory) 398 | 399 | if names_of_features_to_return is None: 400 | names_of_features_to_return = ['max_drawdown', 'time_since_peak', 'duration', 'precovery', 'fib_lev', 'box01']#'topdist', 'botdist'] 401 | 402 | if ncols is None: 403 | ncols = len(names_of_features_to_return) 404 | 405 | assert ncols == len(names_of_features_to_return) 406 | 407 | # initialize new empty array 408 | features = np.empty((nrows,ncols),dtype=np.float64) 409 | 410 | # fill the columns with default values 411 | for i_col,featnm in enumerate(names_of_features_to_return): 412 | 413 | features[:,i_col].fill(default_features[featnm]) 414 | 415 | return FeatureArray(features, columns = names_of_features_to_return) 416 | 417 | class FibFeatures: 418 | """ """ 419 | def __init__(self, data, fib_series, weights_for_longterm_memory = None, do_plot = False, plot_path = "/tmp/", prefix = "", name_mod=None, feature_defaults = None, do_log_transform_fib_levels=True): 420 | """ 421 | fib_series: list of Fibs 422 | weights_for_longterm_memory: constants, used to classify which retracements 423 | """ 424 | # time indices, 425 | self.data_indices = data.index 426 | 427 | # total number of points 428 | self.n_total = data.shape[0] #fib_series[0].n_total 429 | 430 | # fib_series 431 | self.fib_series = fib_series 432 | 433 | # names of features 434 | self.nm_features = ['max_drawdown', 'time_since_peak', 'duration', 'precovery', 'fib_lev'] 435 | 436 | # suffix to modify feature names 437 | if name_mod is None: 438 | name_mod = "_d" 439 | self.name_mod=name_mod 440 | 441 | self.do_plot = do_plot 442 | 443 | # get the fibonacci fib_lelves 444 | self.fib_levels = FIB_LEVELS#fib_series[0].fib_levels 445 | 446 | self.n_levels = len(self.fib_levels) 447 | 448 | # number of fibs in fib_series 449 | self.n_fibs = len(fib_series) 450 | 451 | # whether to do a semi-log transformation of fib_levels indicator: 452 | # .. np.log(1+2* FIB_LEVEL)-1 # ensures -1 is base 453 | self.do_log_transform_fib_levels = do_log_transform_fib_levels 454 | 455 | # if there are any drawdowns/fib-retracements 456 | if self.n_fibs>0: 457 | 458 | # Recovery and Drawdown criteria: get from fib objects 459 | self.drawdown_criteria = fib_series[0].drawdown_criteria 460 | self.recovery_criteria = fib_series[0].recovery_criteria 461 | 462 | # default weighting-coefficients building the longterm memory 463 | if weights_for_longterm_memory is None: 464 | # criteria used for building the long-term memory 465 | #weights_for_longterm_memory = {'crit1':{'sd':1/365,'mu':0, 'p':1}, 'crit2':{'sd':1/self.drawdown_criteria,'mu':0, 'p':1.3}, 'crit3':{'sd':1/365,'mu':0, 'p':1}, 'crit4':{'sd':1/(365*self.drawdown_criteria),'mu':0,'p':1.2}, 'crit5':{'sd':1.3,'mu':-2.6, 'p':1}} 466 | weights_for_longterm_memory = GET_WEIGHTS_FOR_LONGTERM_MEMORY(self.drawdown_criteria) 467 | else: 468 | weights_for_longterm_memory['crit2']['sd'] = 1/self.drawdown_criteria 469 | weights_for_longterm_memory['crit4']['sd'] = 1/((1/weights_for_longterm_memory['crit4']['sd'])*self.drawdown_criteria) 470 | 471 | self.weights_for_longterm_memory = weights_for_longterm_memory 472 | 473 | # path to save plots 474 | self.plot_path = plot_path 475 | if (not (plot_path is None)) and self.do_plot: 476 | self.do_plot = os.path.isdir(plot_path) 477 | self.plot_path = os.path.join(self.plot_path, prefix) 478 | print('WARNING: making PLOTS in %s' % self.plot_path) 479 | 480 | # monitor empirical (mean) values 481 | self.empiricals = {'max_drawdown':[], 'time_since_peak':[], 'duration':[], 'precovery':[], 'box01':[]} #'topdist':[], 'botdist':[]} 482 | 483 | # set up features used to build of time-series of retracements 484 | self._collect_fib_attrs(fib_series, inplace=True) 485 | 486 | def _return_defaults(self, which_memory_return_default = None, return_memory_vectors=False): 487 | """returns default values for features, if the time-series doesn't have enough data to make fibonacci retracements & extensions""" 488 | if which_memory_return_default is None: 489 | which_memory_return_default = [1,2,3] 490 | 491 | if isinstance(which_memory_return_default,int): 492 | which_memory_return_default = [which_memory_return_default] 493 | 494 | # make empty (dummy) memories 495 | if return_memory_vectors: 496 | # make 497 | memory_arrays_defaults = [MemoryArray(data=-1*np.ones([self.n_levels, self.n_total]), memory_sequence = i) for i in which_memory_return_default] 498 | if len(memory_arrays_defaults)==1: 499 | return memory_arrays_defaults[0] 500 | return memory_arrays_defaults 501 | 502 | # make features with constant (default) values 503 | master_features_default = [make_default_features(nrows=self.n_total, default_features=fibs_get_default_features(which_memory=which_memory)) for which_memory in which_memory_return_default] 504 | 505 | # get features-names for defaults 506 | nm_features_default = [['fib-%d_%s%s' % (i,nm_,self.name_mod) for nm_ in fd.columns] for fd,i in zip(master_features_default, which_memory_return_default)] 507 | 508 | if len(master_features_default)==1: 509 | return master_features_default[0], nm_features_default[0] 510 | 511 | # return default features and names of features 512 | return master_features_default, nm_features_default 513 | 514 | def _collect_fib_attrs(self, fib_series, inplace=True): 515 | """collect features from a fibonacci-series""" 516 | # find which fibs are 1 2 or 3 517 | self.fib_attrs = {'max_drawdown':None, 'time_since_peak':None, 'recovered':None, 'precovery':None,'duration':None, 'volume':None} 518 | 519 | # loop through fibs, get the attrs 520 | for fibattr_ in self.fib_attrs.keys(): 521 | 522 | # collect the time series of attributes for all the Fibs in FibSeries 523 | self.fib_attrs[fibattr_] = pd.concat({i:fib.features[fibattr_] for i,fib in enumerate(fib_series)},axis=1) 524 | 525 | if not inplace: 526 | return fib_attrs 527 | 528 | def _get_shortterm_memory(self): 529 | """short-term memory: tracks the current Fibonacci""" 530 | # S/T criteria 1: lowest time to peak 531 | if 'memory_st' in dir(self): 532 | return self.memory_st 533 | 534 | if self.n_fibs>=1: 535 | # if there is at least one drawdown 536 | return self.fib_attrs['time_since_peak'].fillna(10**9).idxmin(axis=1) 537 | 538 | self.memory_st = self._return_defaults(1,True) 539 | return self.memory_st 540 | 541 | def _get_medterm_memory(self, memory_st=None): 542 | """med-term memory: tracks the 1-lag Fibonacci""" 543 | if 'memory_mt' in dir(self): 544 | return self.memory_mt 545 | 546 | # check that there are more fibs that 1 (otherwise, there can't be a medium-term memory 547 | if self.n_fibs>=2: 548 | 549 | # earliest qualifying date for a medterm_memory 550 | self.loc_credible_med_term = self.fib_series[1].loc_start_of_credible_fib 551 | 552 | memory_st = self._get_shortterm_memory() 553 | 554 | # indicator: medium term memory 555 | memory_mt = mask_memory_based_on_indices_in_other_memory( pd_attr=self.fib_attrs['time_since_peak'][self.loc_credible_med_term:].fillna(10**9),memory_indices=memory_st, fill=10**10).idxmin(axis=1) 556 | self.memory_mt = memory_mt 557 | return memory_mt 558 | 559 | else: 560 | self.memory_mt = self._return_defaults(2,True) 561 | return self.memory_mt 562 | 563 | def _get_longterm_crit1(self): 564 | """LT criteria 1: is NOT recovered and oldest-> take the oldest NOT recovered (weight) """ 565 | memory_st = self._get_shortterm_memory() 566 | memory_mt = self._get_medterm_memory() 567 | if self.n_fibs>=2: 568 | 569 | self.loc_credible_lt = self.fib_series[2].loc_start_of_credible_fib 570 | 571 | # check is not recovered from drawdown 572 | lt_crit1 = rescale_criteria((1-self.fib_attrs['recovered']).fillna(0)*(self.fib_attrs['time_since_peak'].fillna(0)), wt = self.weights_for_longterm_memory['crit1'])[self.loc_credible_lt:] 573 | 574 | # mask out: short-term memory 575 | lt_crit1 = mask_memory_based_on_indices_in_other_memory(pd_attr=lt_crit1, memory_indices=[memory_st, memory_mt], fill=0) 576 | 577 | if self.do_plot: 578 | # save criteria 1 as a plot to inspect 579 | plt.figure(figsize=(15,9)) 580 | plt.plot(lt_crit1) 581 | plt.legend([str(k) for k in lt_crit1.columns]); 582 | plt.savefig(self.plot_path + 'LTmem3_crit1.png') 583 | plt.close() 584 | 585 | return lt_crit1 586 | else: 587 | return None 588 | 589 | def _get_longterm_crit2(self): 590 | """ LONG-TERM CRITERIA 2: deep-draw down""" 591 | memory_st = self._get_shortterm_memory() 592 | memory_mt = self._get_medterm_memory() 593 | 594 | if self.n_fibs>=2: 595 | 596 | self.loc_credible_lt = self.fib_series[2].loc_start_of_credible_fib 597 | 598 | lt_crit2 = rescale_criteria(self.fib_attrs['max_drawdown'], wt = self.weights_for_longterm_memory['crit2'])[self.loc_credible_lt:] 599 | 600 | lt_crit2 = mask_memory_based_on_indices_in_other_memory(pd_attr=lt_crit2, memory_indices=[memory_st, memory_mt], fill=0) 601 | 602 | if self.do_plot: 603 | # save criteria 1 as a plot to inspect 604 | plt.figure(figsize=(15,9)) 605 | plt.plot(lt_crit2) 606 | plt.legend([str(k) for k in lt_crit2.columns]); 607 | plt.savefig(self.plot_path + 'LTmem3_crit2.png') 608 | plt.close() 609 | 610 | return lt_crit2 611 | else: 612 | return None 613 | 614 | def _get_longterm_crit3(self): 615 | """ LONG-TERM CRITERIA 3: long-duration draw-down""" 616 | memory_st = self._get_shortterm_memory() 617 | memory_mt = self._get_medterm_memory() 618 | 619 | if self.n_fibs>=2: 620 | 621 | self.loc_credible_lt = self.fib_series[2].loc_start_of_credible_fib 622 | 623 | lt_crit3 = rescale_criteria(self.fib_attrs['duration'], wt = self.weights_for_longterm_memory['crit3'])[self.loc_credible_lt:] 624 | 625 | lt_crit3 = mask_memory_based_on_indices_in_other_memory(pd_attr=lt_crit3, memory_indices=[memory_st,memory_mt], fill=0) 626 | 627 | if self.do_plot: 628 | # save criteria 1 as a plot to inspect 629 | plt.figure(figsize=(15,9)) 630 | plt.plot(lt_crit3) 631 | plt.legend([str(k) for k in lt_crit3.columns]); 632 | plt.savefig(self.plot_path + 'LTmem3_crit3.png') 633 | plt.close() 634 | 635 | return lt_crit3 636 | else: 637 | return None 638 | 639 | def _get_longterm_crit4(self): 640 | """ LONG-TERM CRITERIA 4: total volume (drawdown % x time-in-drawdown)""" 641 | memory_st = self._get_shortterm_memory() 642 | memory_mt = self._get_medterm_memory() 643 | 644 | if self.n_fibs>=2: 645 | 646 | self.loc_credible_lt = self.fib_series[2].loc_start_of_credible_fib 647 | 648 | lt_crit4 = rescale_criteria(self.fib_attrs['volume'], wt = self.weights_for_longterm_memory['crit4'])[self.loc_credible_lt:] 649 | lt_crit4 = mask_memory_based_on_indices_in_other_memory(pd_attr=lt_crit4, memory_indices=[memory_st, memory_mt], fill=0) 650 | 651 | if self.do_plot: 652 | # save criteria 1 as a plot to inspect 653 | plt.figure(figsize=(15,9)) 654 | plt.plot(lt_crit4) 655 | plt.legend([str(k) for k in lt_crit4.columns]); 656 | plt.savefig(self.plot_path + 'LTmem3_crit4.png') 657 | plt.close() 658 | 659 | return lt_crit4 660 | else: 661 | return None 662 | 663 | def _get_longterm_crit5(self): 664 | """ LONG-TERM CRITERIA 5: distance above recovery level, with decay by time""" 665 | memory_st = self._get_shortterm_memory() 666 | memory_mt = self._get_medterm_memory() 667 | 668 | if self.n_fibs>=2: 669 | 670 | # get the start of credible beginning of fib 671 | self.loc_credible_lt = self.fib_series[2].loc_start_of_credible_fib 672 | 673 | # get basis of the lt_crit5 674 | attr_precovery_decayed_by_time = self.fib_attrs['precovery'] 675 | 676 | # rescale lt_crit5 677 | lt_crit5 = rescale_criteria(attr_precovery_decayed_by_time, 678 | wt = self.weights_for_longterm_memory['crit5']) 679 | 680 | # smooth out the precovery by a rolling window 681 | lt_crit5 = lt_crit5.rolling(window= 4, center=False, min_periods =1).mean()[self.loc_credible_lt:] 682 | 683 | # get min to serve as fill 684 | min_to_fill_for_lt_crit5 = float(lt_crit5.min().min()) 685 | min_to_fill_for_lt_crit5 = min_to_fill_for_lt_crit5*1.3 if min_to_fill_for_lt_crit5<0 else min_to_fill_for_lt_crit5*0.75 686 | # mask out: med-term memory 687 | lt_crit5 = mask_memory_based_on_indices_in_other_memory(pd_attr = lt_crit5, memory_indices = [memory_st,memory_mt], fill = min_to_fill_for_lt_crit5) 688 | 689 | if self.do_plot: 690 | # save criteria 1 as a plot to inspect 691 | plt.figure(figsize=(15,9)) 692 | plt.plot(lt_crit5) 693 | plt.legend([str(k) for k in lt_crit5.columns]); 694 | plt.savefig(self.plot_path + 'LTmem3_crit5.png') 695 | plt.close() 696 | 697 | return lt_crit5 698 | else: 699 | return None 700 | 701 | def _get_longterm_memory(self, memory_st = None, memory_mt = None): 702 | """long-term memory: tracks the 2-lag Fibonacci""" 703 | if 'memory_lt' in dir(self): 704 | return self.memory_lt 705 | 706 | if self.n_fibs<3: 707 | # return defaults empty if less than 3 fib-retracements 708 | self.memory_lt = self._return_defaults(3,True) 709 | return self.memory_lt 710 | 711 | lt_crit1 = self._get_longterm_crit1() 712 | lt_crit2 = self._get_longterm_crit2() 713 | lt_crit3 = self._get_longterm_crit3() 714 | lt_crit4 = self._get_longterm_crit4() 715 | lt_crit5 = self._get_longterm_crit5() 716 | lt_crit = lt_crit1 +lt_crit2 + lt_crit3 + lt_crit4 + lt_crit5 717 | if self.do_plot: 718 | plt.figure(figsize=(15,9)) 719 | plt.plot(lt_crit) 720 | plt.legend([str(k) for k in lt_crit.columns]); 721 | plt.savefig(self.plot_path + 'LTmem3_crit.png') 722 | plt.close() 723 | 724 | # fill nas: notice I fill NAs with the minimum values (because we select by maximizing) 725 | memory_lt = lt_crit.fillna(float(lt_crit.min().min())).idxmax(axis=1) 726 | self.memory_lt = memory_lt 727 | 728 | if self.do_plot: 729 | plt.figure(figsize=(15,9)) 730 | plt.plot(memory_lt) 731 | plt.savefig(self.plot_path + 'LTmem3.png') 732 | plt.close() 733 | 734 | return memory_lt 735 | 736 | def _build_fib_timematrix(self, memory_vector=None, memory_identity=None): 737 | """creates a time-series matrix, for a given memory_vector (either s/t, med, or l/t""" 738 | 739 | if memory_vector is None: 740 | print("memory vector is None in '_build_fib_timematrix'") 741 | return None, None 742 | 743 | if memory_identity is None: 744 | # get the sequence/identity of the memory vector 745 | memory_identity = memory_vector.memory_sequence 746 | 747 | if self.n_fibs < memory_identity: 748 | # the number of fibonnaci extensions is less than this identity, return defaults 749 | print("number of fibs is less than the prescribed mem-identity %d" % memory_identity) 750 | dummy_features = self._return_defaults(memory_identity,False) 751 | return memory_vector, dummy_features 752 | 753 | # size 754 | n_ = memory_vector.shape[0] 755 | 756 | # number of levels 757 | n_levels = len(self.fib_levels) 758 | 759 | # split the memory vector 760 | subseq_changes = np.split(memory_vector, np.where(np.diff(memory_vector))[0]+1) 761 | 762 | # get the meta-data for each (uniform) subsequence 763 | subseqs_metadata = [{'start_loc':subseq.index[0], 'end_loc':subseq.index[-1], 'n':len(subseq), 'id':subseq.unique()[0]} for subseq in subseq_changes] 764 | 765 | # empty containers: memory-fib-levels 766 | master_memory = MemoryArray(data = np.zeros([n_levels, self.n_total]), memory_sequence = memory_identity) 767 | 768 | #nm_features_get = ['max_drawdown', 'time_since_peak', 'duration', 'precovery'] 769 | nm_features_get = self.nm_features 770 | 771 | # empty containers: features (filled with defaults 772 | #master_features = np.zeros([self.n_total, len(nm_features_get)]) # container for features-time-series 773 | master_features = make_default_features(nrows=self.n_total, default_features = fibs_get_default_features(which_memory = memory_identity)) 774 | # loop through fibonacci-sequences 775 | for i,subseq in enumerate(subseqs_metadata): 776 | # indices for insertion in 777 | span_to_insert = (np.where(self.data_indices==subseq['start_loc'])[0][0],(np.where(self.data_indices==subseq['end_loc'])[0][0]+1)) 778 | 779 | # data fib_series to insert 780 | data_to_insert = self.fib_series[subseq['id']].fib_series.get(start=subseq['start_loc'], end = subseq['end_loc']) 781 | 782 | # insert the data into the memory 783 | master_memory[:, span_to_insert[0]:span_to_insert[1]] = data_to_insert 784 | master_memory.update_credible_start(credible_start = span_to_insert[0]) 785 | 786 | # gather features 787 | feat_indx_insert = np.where((self.data_indices >= subseq['start_loc']) & (self.data_indices <= subseq['end_loc']))[0] 788 | feat_indx_get = np.where((self.fib_series[subseq['id']].features[nm_features_get[0]].index >= subseq['start_loc']) & (self.fib_series[subseq['id']].features[nm_features_get[0]].index <= subseq['end_loc']))[0] 789 | assert len(feat_indx_insert) == len(feat_indx_get) 790 | 791 | # loop through and get features 792 | for j,featnm in enumerate(nm_features_get[:-1]): 793 | 794 | #master_features[feat_indx_insert, j] = tx_feature(self.fib_series[subseq['id']].features[featnm].iloc[feat_indx_get], featnm) 795 | master_features.insert(data = tx_feature(self.fib_series[subseq['id']].features[featnm].iloc[feat_indx_get], featnm), 796 | iterable = feat_indx_insert, 797 | column = featnm) 798 | 799 | # get empirical values for 800 | for featnm in nm_features_get[:-1]: 801 | # '.get' function is used to get a subset of data, based on master-memory.credible_start, which is the first index of the first fibonacci retracement 802 | feature_empirical_mean = numpy_trimmed_mean(master_features.get(range(master_memory.credible_start,self.n_total), column = featnm)) 803 | self.empiricals[featnm] += [feature_empirical_mean] 804 | 805 | return master_memory, master_features 806 | 807 | def _build_features0(self, memory, master_features, data): 808 | """given a price (data['Close']) and master_memory of (selected) fib-levels, snake the price through the memory to get the relative price difference (as a feature)""" 809 | 810 | # identity (1,2,3) memory 811 | memory_identity = memory.memory_sequence 812 | 813 | # check if no fibs exist 814 | if self.n_fibs < memory_identity: 815 | return master_features 816 | 817 | # fill-value (just for the arithmetic/min/max-finding) 818 | fillvalue = data['Close'].max()*10 819 | 820 | # find the index-position (non-time) of the first-credible fib 821 | cred_start = memory.credible_start 822 | 823 | # snake through price : get the top fib snake 824 | snake1 = memory[:,cred_start:] - data['Close'].iloc[cred_start:].values 825 | snake_plus = np.clip(snake1,0,10**10) 826 | snake_plus[np.nonzero(snake_plus==0)]=fillvalue 827 | top_fib = snake_plus.argmin(axis=0) 828 | 829 | # snake through price: get the bottom fib snake 830 | snake_neg = np.clip(snake1,-10**10,0) 831 | snake_neg[np.nonzero(snake_neg==0)]=-fillvalue 832 | bottom_fib = snake_neg.argmax(axis=0) 833 | 834 | # snake through the fib levels 835 | snake_through_fib_levels = np.array([self.fib_levels[i] for i in bottom_fib], dtype=np.float64) 836 | if self.do_log_transform_fib_levels: 837 | # convert the fib_level to semi-log scale 838 | snake_through_fib_levels = np.log(1+2*snake_through_fib_levels)-1 839 | 840 | master_features.insert(data = snake_through_fib_levels, 841 | iterable = range(cred_start, master_features.shape[0]), 842 | column = 'fib_lev') 843 | 844 | # ensure that the topfib is at least as big as the bottom fib 845 | top_fib = np.maximum(bottom_fib, top_fib) 846 | 847 | # price at the fibs 848 | top_fib_price = memory[top_fib,np.arange(cred_start,memory.shape[-1])] 849 | bottom_fib_price = memory[bottom_fib,np.arange(cred_start,memory.shape[-1])] 850 | 851 | # get the price differences (between the price and the fibs) 852 | #fib_dist_to_top_fib = (top_fib_price-data['Close'].iloc[cred_start:].values)/data['Close'].iloc[cred_start:].values 853 | #fib_dist_to_bot_fib = (data['Close'].iloc[cred_start:].values-bottom_fib_price)/data['Close'].iloc[cred_start:].values 854 | # track the empirical values (for setting defaults) 855 | #self.empiricals['topdist'] += [numpy_trimmed_mean(fib_dist_to_top_fib, trim=0.1)] 856 | #self.empiricals['botdist'] += [numpy_trimmed_mean(fib_dist_to_bot_fib, trim=0.1)] 857 | 858 | # convert distance to fibs as a [0,1] indicator; if beyond, convert to % above 859 | z = data['Close'].iloc[cred_start:].values 860 | range_fib_price = top_fib_price - bottom_fib_price 861 | # 3 cases: if within [bottom, top], convert to [0,1] indicator 862 | # ... if above: convert to [1,+] logged 863 | # ... if below: convert to [-Inf, -0.0001] 864 | fib_box_01 = (z - bottom_fib_price)/(range_fib_price + (range_fib_price==0)) 865 | fib_box_above = 1+np.log( z / top_fib_price) 866 | fib_box_below = np.log(z / bottom_fib_price) 867 | # combine cases: bools 868 | is_case_01 = (z < top_fib_price) & (z > bottom_fib_price) 869 | is_case_above = z>=top_fib_price 870 | is_case_below = z<=bottom_fib_price 871 | # combine cases 872 | fib_box_dist = fib_box_01*is_case_01 + fib_box_above*is_case_above + fib_box_below*is_case_below 873 | # add to empiricals 874 | self.empiricals['box01'] += [numpy_trimmed_mean(fib_box_dist, trim=0.1)] 875 | 876 | # plot price snake through fib-boxes 877 | if self.do_plot: 878 | 879 | # plot the raw fib-time-series 880 | plt.figure(figsize=(15,9)) 881 | plt.plot(np.arange(len(data['Close'])),np.log(data['Close'])) 882 | plt.plot(np.log(memory.T), 'b--') 883 | for fib in self.fib_series: 884 | for level_ in fib.fib_series: 885 | plt.plot(fib.series_indices, np.log(level_)) 886 | 887 | plt.savefig(self.plot_path + 'price_and_fibs-%d.png' % (memory.memory_sequence)) 888 | plt.close() 889 | 890 | # plot the fib-feature (price snakes through fibs) 891 | plt.figure(figsize=(15,9)) 892 | plt.plot(data['Close'].iloc[cred_start:].index, np.log(top_fib_price)) 893 | plt.plot(data['Close'].iloc[cred_start:].index, np.log(bottom_fib_price)) 894 | plt.plot(data['Close'].iloc[cred_start:].index, np.log(data['Close'].iloc[cred_start:].values)) 895 | plt.savefig(self.plot_path + 'price-snake-through-fib-%d.png' % (memory.memory_sequence)) 896 | plt.close() 897 | 898 | # insert distance-to-top-fib into master-features 899 | master_features.insert(data = fib_box_dist, 900 | iterable = range(cred_start, master_features.shape[0]), 901 | column ='box01') 902 | #master_features.insert(data = fib_dist_to_top_fib, 903 | # iterable = range(cred_start, master_features.shape[0]), 904 | # column ='topdist') 905 | # 906 | # insert distance-to-bottom-fib into master-features 907 | #master_features.insert(data = fib_dist_to_bot_fib, 908 | # iterable = range(cred_start, master_features.shape[0]), 909 | # column ='botdist') 910 | # 911 | # feature names: update with name_mod 912 | nm_feat = ['fib-%d_%s%s' % (memory.memory_sequence, nm_, self.name_mod) for nm_ in master_features.columns] 913 | return master_features, nm_feat 914 | 915 | def features(self, data = None, return_memory = False): 916 | """ main function: 917 | wrapper for _build_fib_timematrix, runs on s/t,m/t and l/t memory 918 | ... and _build_features0 (which outputs final features) 919 | """ 920 | # features 1: most-recent drawdown 921 | memory_st = self._get_shortterm_memory() 922 | master_memory1, master_features1 = self._build_fib_timematrix(memory_vector=memory_st, memory_identity = 1) 923 | 924 | # features 2: previous drawdown 925 | memory_mt = self._get_medterm_memory(memory_st = memory_st) 926 | master_memory2, master_features2 = self._build_fib_timematrix(memory_vector=memory_mt, memory_identity = 2) 927 | 928 | # features 3: long-term drawdown 929 | memory_lt = self._get_longterm_memory(memory_st = memory_st, memory_mt = memory_mt) 930 | master_memory3, master_features3 = self._build_fib_timematrix(memory_vector=memory_lt, memory_identity = 3) 931 | 932 | if return_memory: 933 | # option to return the raws (memory vector) 934 | #return (master_memory1, master_memory2, master_memory3), (master_features1, master_features2, master_features3) 935 | return (memory_st, memory_mt, memory_lt), (master_features1, master_features2, master_features3) 936 | 937 | features1, names1 = self._build_features0(master_memory1, master_features1, data) 938 | features2, names2 = self._build_features0(master_memory2, master_features2, data) 939 | features3, names3 = self._build_features0(master_memory3, master_features3, data) 940 | 941 | return (features1, features2, features3), (names1, names2,names3) 942 | 943 | 944 | 945 | 946 | --------------------------------------------------------------------------------