├── .gitignore
├── img
    ├── fib-snake.png
    ├── longterm-memory.png
    ├── shortterm-memory.png
    └── fibonacci_timeseries.png
├── subjective_drawdown_models
    ├── subjective_drawdown_model1.pkl
    └── subjective_drawdown_model2.pkl
├── demo_fibonacci_ml.py
├── variables.py
├── readme.md
├── subjective_drawdown_finder.py
├── fib_utils.py
└── core.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | \#*\#
3 | .\#*
4 | fibonacci_ml_v0.py
5 | 


--------------------------------------------------------------------------------
/img/fib-snake.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faraway1nspace/fibonacci_ml/HEAD/img/fib-snake.png


--------------------------------------------------------------------------------
/img/longterm-memory.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faraway1nspace/fibonacci_ml/HEAD/img/longterm-memory.png


--------------------------------------------------------------------------------
/img/shortterm-memory.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faraway1nspace/fibonacci_ml/HEAD/img/shortterm-memory.png


--------------------------------------------------------------------------------
/img/fibonacci_timeseries.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faraway1nspace/fibonacci_ml/HEAD/img/fibonacci_timeseries.png


--------------------------------------------------------------------------------
/subjective_drawdown_models/subjective_drawdown_model1.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faraway1nspace/fibonacci_ml/HEAD/subjective_drawdown_models/subjective_drawdown_model1.pkl


--------------------------------------------------------------------------------
/subjective_drawdown_models/subjective_drawdown_model2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faraway1nspace/fibonacci_ml/HEAD/subjective_drawdown_models/subjective_drawdown_model2.pkl


--------------------------------------------------------------------------------
/demo_fibonacci_ml.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import datetime
 3 | import yfinance as yf
 4 | import pandas as pd
 5 | import numpy as np
 6 | import matplotlib.pyplot as plt
 7 | import copy
 8 | import re
 9 | 
10 | from fibonacci_ml.core import *
11 | 
12 | # get timeseries for QQQ
13 | data = yf.download("QQQ")
14 | 
15 | # initialize the FibonacciTechnicalAnalysis object
16 | fib_maker = FibonacciTechnicalAnalysis(data, drawdown_criteria=0.20, do_plot=False)
17 | 
18 | # make the features
19 | features = fib_maker.make_fib_features()
20 | 
21 | 


--------------------------------------------------------------------------------
/variables.py:
--------------------------------------------------------------------------------
 1 | TARGET_DENSITY = 0.235
 2 | DRAWDOWN_CRITERIA=0.2
 3 | RECOVERY_CRITERIA=0.02
 4 | FIB_LEVELS = [0, 0.236, 0.382, 0.5, 0.618, 0.786, 1, 1.618, 2.618, 4.236, 6.854, 11.09, 17.944]
 5 | 
 6 | def GET_WEIGHTS_FOR_LONGTERM_MEMORY_ORIG(drawdown_criteria=None):
 7 |     """default weights"""
 8 |     if drawdown_criteria is None:
 9 |         drawdown_criteria = DRAWDOWN_CRITERIA
10 |     
11 |     weights_for_longterm_memory = {'crit1':{'sd':1/365, 'mu':0, 'p':1}, 
12 |                                    'crit2':{'sd':1/drawdown_criteria, 'mu':0, 'p':1.3},
13 |                                    'crit3':{'sd':1/365, 'mu':0, 'p':1}, 
14 |                                    'crit4':{'sd':1/(365*drawdown_criteria), 'mu':0, 'p':1.2},
15 |                                    'crit5':{'sd':1.3, 'mu':-2.6, 'p':1}
16 |                                    }
17 |     return weights_for_longterm_memory
18 | 
19 | #  
20 | DEFAULT_MEMORY_FEATURES = {'max_drawdown': {1: 0.5150874123152283, 2: 0.566382779151033, 3: 0.6531398034264879},
21 |                            'time_since_peak': {1: 6.8089679500637335, 2: 7.651139423056396, 3: 8.308649817574022},
22 |                            'duration': {1: 6.389924016638725, 2: 6.837994549153265, 3: 7.518228268861231},
23 |                            'precovery': {1: -0.05884945292016635, 2: -0.525211373546828, 3: -0.7116667775865574},
24 |                            'fib_lev':{1: -1, 2: -1, 3: -1},
25 |                            'box01':{1: 0.45192856058112363, 2: 0.44829321386954174, 3: 0.45923949807178127}}
26 |                            #'topdist': {1: 0.2031, 2: 0.3948, 3: 0.8395},
27 |                            #'botdist': {1: 0.0992, 2: 0.0506, 3: 0.1165}} #
28 | 
29 | # best: (1.7585311911056594, 0.27694100799162485, 1.4815901831140343)
30 | #x0= np.array([ 3.45477257e+02,  0.00000000e+00,  9.95226003e-01,
31 | #  9.34217669e-02, 1.24536817e+00,
32 | #  3.79783410e+02, -3.04482610e+00,  1.00425408e+00,
33 | #  3.47102625e+02, -1.07730288e-01,  1.19487055e+00,
34 | #  1.39050476e+00, -2.69211513e+00,  1.00000000e+00])
35 | 
36 | def GET_WEIGHTS_FOR_LONGTERM_MEMORY(drawdown_criteria=None):
37 |     """default weights"""
38 |     if drawdown_criteria is None:
39 |         drawdown_criteria = DRAWDOWN_CRITERIA
40 |     
41 |     weights_for_longterm_memory = {'crit1':{'sd':1/345.477257, 'mu':0, 'p':0.995226003}, 
42 |                                    'crit2':{'sd':1/drawdown_criteria, 'mu':0.0934217669, 'p':1.24536}, 
43 |                                    'crit3':{'sd':1/379.78, 'mu':-3.0448261, 'p':1.00425408}, 
44 |                                    'crit4':{'sd':1/(347.10*drawdown_criteria), 'mu':-0.10773, 'p':1.19487055},
45 |                                    'crit5':{'sd':1.3905, 'mu':-2.692115, 'p':1}
46 |                                    }
47 |     return weights_for_longterm_memory
48 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | # Fibonacci ML: Automatic Fib Extensions/Retracements for Machine Learning
  2 | 
  3 | This repository contains code for automatically finding fibonacci-retracements in a price timeseries, and converting them _into features for statistical analysis_ (i.e., feature engineering), like "the percentage that a spot-price is between two fib-levels" (`fib-box`) to know whether the spot price is closer to a fib-level below or a fib-level above. See below for all features.
  4 | 
  5 | The project attempts to:
  6 | - remove the subjectiveness of drawing fibonacci retracements.
  7 | - convert fib-retracement & extensions (both short-term and long-term) into a time-series of features that can be easily loaded into a machine-learning model (e.g, to model prices according to fib-features). 
  8 | 
  9 | - Inputs:  
 10 | -- pandas dataframe of OHLC prices
 11 | - Outputs:  
 12 | -- pandas dataframe of features corresponding to a contiguous time-series representing *all* fib-retracements and extensions.
 13 | 
 14 | The main features and benefits of the approach are the following:
 15 | - automatically finds drawdowns with either:
 16 |     1. a user-specified `drawdown_criteria` (usually 0.2);
 17 |     2. automatically finds an appropriate criteria, adjusting for the volatility of the ticker under-analysis (e.g., we want a larger drawdown criteria for more volatile stocks).
 18 | - converts the fibs into a smooth contiguous timeseries of features of the drawdown (`max_drawdown`,`duration`, `precovery` (percent recovery), `fib_lev` (the fib-level), `time_since_peak_d`, and `fib-box01` (the % that price is between two fib-levels).
 19 | - tracks three fibonacci-retracements in parallel: i) most recent drawdown; ii) the previous drawdown; and iii) the "long-term memory" of any significant monster drawdowns (even decades earlier). For example, the dot-com dubble resulted in a moster drawdown in the Nasdaq and who's fibonacci extensions may still be important as features a decade later.
 20 | - doesn't "cheat": the features at time `t` are never calculated using information from the future, which many backtesters violate when they first make fib-retracements for an entire time-series, and then fail to mask/hide the future information from prices in the past (illegal!)
 21 | 
 22 | The figure below (top) shows the automatic finding of retracements and extensions for `QQQ`. Notice that the levels correspond to: `0, 0.236, 0.382, 0.5, 0.618, 0.786, 1, 1.618, 2.618, 4.236, 6.854, 11.09, 17.944`
 23 | 
 24 | ![](img/fibonacci_timeseries.png?raw=true)
 25 | 
 26 | 
 27 | 
 28 | ## Demonstration
 29 | 
 30 | See the file `demo_fibonacci_ml.py`.
 31 | 
 32 | ```
 33 | import os
 34 | import datetime
 35 | import yfinance as yf
 36 | import pandas as pd
 37 | import numpy as np
 38 | import matplotlib.pyplot as plt
 39 | import copy
 40 | import re
 41 | 
 42 | from fibonacci_ml.core import *
 43 | 
 44 | # get timeseries for QQQ
 45 | data = yf.download("QQQ")
 46 | 
 47 | # initialize the FibonacciTechnicalAnalysis object
 48 | fib_maker = FibonacciTechnicalAnalysis(data, drawdown_criteria=0.20, do_plot=False)
 49 | 
 50 | # make the features
 51 | features = fib_maker.make_fib_features()
 52 | ```
 53 | Summary of features for `QQQ`.
 54 | 
 55 | ```
 56 |                          count      mean       std       min       25%       50%       75%       max
 57 | fib-1_max_drawdown_d     5578.0  0.440728  0.150744  0.204047  0.282772  0.530009  0.530974  0.767416
 58 | fib-1_time_since_peak_d  5578.0  6.541665  0.969640  2.772589  5.937536  6.781058  7.321189  7.953318
 59 | fib-1_duration_d         5578.0  6.031123  1.106973  2.772589  5.003946  6.389924  6.997596  7.254885
 60 | fib-1_precovery_d        5578.0 -0.025882  0.352544 -1.090901 -0.201193  0.002114  0.178860  0.767416
 61 | fib-1_fib_lev_d          5578.0 -0.114710  0.542956 -1.000000 -0.432416 -0.055316  0.098612  1.248340
 62 | fib-1_box01_d            5578.0  0.458309  0.280091  0.000000  0.207130  0.451929  0.689416  0.999762
 63 | fib-2_max_drawdown_d     5578.0  0.580211  0.178354  0.204047  0.530009  0.530974  0.799657  0.830577
 64 | fib-2_time_since_peak_d  5578.0  7.759018  0.507455  6.265301  7.523076  7.824046  8.162516  8.511779
 65 | fib-2_duration_d         5578.0  6.936482  0.810381  4.369448  6.837995  7.254885  7.254885  7.956126
 66 | fib-2_precovery_d        5578.0 -0.346507  0.803978 -2.420680 -0.814035 -0.522701  0.585802  0.830577
 67 | fib-2_fib_lev_d          5578.0 -0.019981  0.838561 -1.000000 -1.000000  0.098612  0.830339  1.248340
 68 | fib-2_box01_d            5578.0  0.466216  0.273921  0.001096  0.245649  0.448293  0.700172  0.999992
 69 | fib-3_max_drawdown_d     5578.0  0.759640  0.086926  0.653140  0.653140  0.830577  0.830577  0.830577
 70 | fib-3_time_since_peak_d  5578.0  8.444213  0.243571  7.957527  8.308650  8.308650  8.645894  8.950403
 71 | fib-3_duration_d         5578.0  8.099524  0.506587  7.518228  7.518228  8.206993  8.645894  8.694670
 72 | fib-3_precovery_d        5578.0 -0.255281  0.599137 -1.905022 -0.711667 -0.526383  0.358030  0.782663
 73 | fib-3_fib_lev_d          5578.0 -0.503271  0.543082 -1.000000 -1.000000 -0.613378 -0.055316  0.830339
 74 | fib-3_box01_d            5578.0  0.471312  0.235317  0.000014  0.388402  0.459239  0.545765  0.999927
 75 | ```
 76 | 
 77 | The pandas object `features` can then be used with price or other TA features in a machine-learning model for time-series analysis. That is what I do!
 78 | 
 79 | ## Price Snaking Through Fib-Levels
 80 | 
 81 | Here is the QQQ price as it snakes through various fibonacci-retracement and extension levels.
 82 | 
 83 | ![](img/fib-snake.png?raw=true)
 84 | 
 85 | 
 86 | ## Memory
 87 | 
 88 | The `FibonacciTechnicalAnalysis` has 3 memories: it tracks three retracements in parallel so that price is "aware" of multiple drawdowns
 89 | - Memory 1: the current/most recent drawdown. This is typically what most analysts focus on for short-term pivots  
 90 | - Memory 2: the previous drawdown. TA analysts often pay attention when the fib-levels from two different drawdowns align.  
 91 | - Long-term Memory: a model is used to track the long-term monster drawdowns, often spanning decades. E.g., some TA analysts refer to the 1999 Nasdaq drawdown for fib-extensions.
 92 | 
 93 | The following two graphs compare the Memory-1 drawdowns vs the Long-Term Memory drawdowns. The blue-dash lines represent, at any given point in time, which fib-levels are in the current "memory" and thus exposed to price. Notice that Memory-1, the fib levels are changing with each new drawdown. However, the long-term memory seems to *only* can about the monster-drawdown in 1999.
 94 | 
 95 | ### short-term memory:
 96 | 
 97 | ![](img/shortterm-memory.png?raw=true)
 98 | 
 99 | ### long-term memory:
100 | 
101 | ![](img/longterm-memory.png?raw=true)
102 | 
103 | 


--------------------------------------------------------------------------------
/subjective_drawdown_finder.py:
--------------------------------------------------------------------------------
  1 | # classes and functions to automatically determine a good drawdown-criteria (% drawdown peak to tough) for making fibonacci-extensions/retractements
  2 | # uses two boosted-trees models, plus some heuristics, in order to find a good criteria
  3 | import os
  4 | import datetime
  5 | import yfinance as yf
  6 | import pandas as pd
  7 | import numpy as np
  8 | import ta
  9 | import matplotlib.pyplot as plt
 10 | import copy
 11 | import re
 12 | import pickle
 13 | from sklearn.tree import DecisionTreeRegressor
 14 | 
 15 | from .variables import RECOVERY_CRITERIA, TARGET_DENSITY
 16 | from .fib_utils import *
 17 | 
 18 | CORE_PATH = os.path.abspath(os.path.dirname(__file__))
 19 | 
 20 | # main class of this file
 21 | class SubjectiveDrawdown:
 22 |     """
 23 |     models and functios to find optimal drawdown for making fibonacci extensions 
 24 |     principal function is self.fit()
 25 |     """
 26 |     def __init__(self, verbose =None, target_density=None, drawdown_cap = None, recovery_criteria=None, path_to_model_pred = None, path_to_model_refine = None):
 27 |         
 28 |         if verbose is None:
 29 |             verbose = False
 30 |         self.verbose = verbose
 31 |         
 32 |         # default target_density
 33 |         if target_density is None:
 34 |             target_density=TARGET_DENSITY
 35 |         self.target_density = target_density
 36 |         
 37 |         # cap the range of plausible drawdown criteria
 38 |         if drawdown_cap is None:
 39 |             drawdown_cap = [0.05, 0.7]
 40 |         self.drawdown_cap= drawdown_cap
 41 |         
 42 |         # criteria to judge when a retracement is finished (from peak)
 43 |         if recovery_criteria is None:
 44 |             recovery_criteria = RECOVERY_CRITERIA
 45 |         self.recovery_criteria = recovery_criteria
 46 |         
 47 |         # load the probabilistic models
 48 |         self.model = SubjectiveDrawdownModels(path_to_model_pred = path_to_model_pred,
 49 |                                               path_to_model_refine = path_to_model_refine,
 50 |                                               verbose=verbose)            
 51 |     
 52 |     def prefeature_trend(self, data, focal_column=None):
 53 |         """ mean and std (around residuals)"""
 54 |         if focal_column is None:
 55 |             focal_column = 'Close'
 56 |         
 57 |         # y data
 58 |         y = np.log(np.clip(data[focal_column], a_min = 0.001, a_max = None))
 59 |         y = ((y-y.mean()).values)#/y.std()
 60 |         
 61 |         # x data
 62 |         x = ((data.index - data.index.mean()).days).values/365
 63 |         
 64 |         # slope and intercept
 65 |         m = (len(x) * np.sum(x*y) - np.sum(x) * np.sum(y)) / (len(x)*np.sum(x*x) - np.sum(x) * np.sum(x)) # long-run log-linear increase
 66 |         b = (np.sum(y) - m *np.sum(x)) / len(x)
 67 |         
 68 |         # take the variance around the dominant trend
 69 |         residuals = y-(x*m+b)
 70 |         std_ = residuals.std()
 71 |         
 72 |         return m,std_
 73 |     
 74 |     def prefreature_realizedvol(self, data, hlc_columns=['High', 'Low', 'Close']):
 75 |         """basically standard-deviation, notice we exclude open because of API issues
 76 |         instead of std from the mean price, we take it from the previous price
 77 |         """
 78 |         
 79 |         # log the prices for highh low close
 80 |         y_hlc = [np.log(np.clip(data[col].values,a_min=0.01,a_max=None)) for col in hlc_columns]
 81 |         
 82 |         # split into hi-close and lo-close
 83 |         y_hc = np.concatenate([y_hlc[0].reshape(-1,1)]+[y_hlc[-1].reshape(-1,1)], axis=1).reshape(-1)
 84 |         y_lc = np.concatenate([y_hlc[1].reshape(-1,1)]+[y_hlc[-1].reshape(-1,1)], axis=1).reshape(-1)
 85 |         
 86 |         # difference between close and previous hi
 87 |         mean_realized_volatility = (((np.diff(y_hc)**2).sum() + (np.diff(y_lc)**2).sum())/(len(y_hc) + len(y_lc)-2))**0.5
 88 |         
 89 |         # same as above, but limited to only downsides
 90 |         y_close_diff = np.diff(y_hlc[-1])
 91 |         y_close_downside_diff = y_close_diff[np.where(y_close_diff<=0)[0]]
 92 |         mean_downside_volatility = ((y_close_downside_diff**2).mean())**0.5
 93 |         
 94 |         #
 95 |         return mean_realized_volatility, mean_downside_volatility
 96 |     
 97 |     def _optimal_drawdown_for_fibs_probablistic_estimator(self, data, target_density):
 98 |         """estimate an initial drawdown criteria, through a probabilistic model"""
 99 |         # get features: trend and std
100 |         ftrend,fstd = self.prefeature_trend(data)
101 |         if (str(ftrend)=='nan') or str(fstd)=='nan':
102 |             raise ValueError("trend or std")
103 |         
104 |         # get features: volatility and downside vol
105 |         fvol, fvoldown = self.prefreature_realizedvol(data, hlc_columns=['High', 'Low', 'Close'])
106 |         if (str(fvol)=='nan') or str(fvoldown)=='nan':
107 |             raise ValueError("trend or std")
108 |         
109 |         # features must be ordered: ['drawdown_crit', 'trend', 'std', 'vol', 'vold']
110 |         drawdown_criterias = np.linspace(0.05, 0.5, 50).reshape(-1,1)
111 |         X = np.concatenate([drawdown_criterias, np.array([ftrend]*50).reshape(-1,1), np.array([fstd]*50).reshape(-1,1), np.array([fvol]*50).reshape(-1,1), np.array([fvoldown]*50).reshape(-1,1)],axis=1)
112 |         # pdensity
113 |         pdensity = self.model.predict(X)
114 |         
115 |         # drawdown criteria suggested
116 |         drawdown_crit_suggested = drawdown_criterias[np.argmin((pdensity - target_density)**2)][0]
117 |         
118 |         return drawdown_crit_suggested, [ftrend, fstd, fvol, fvoldown, target_density]
119 |     
120 |     def _get_fibs(self, data, drawdown_crit, recovery_criteria=None):
121 |         """ wrapper for find_all_retracement_boxes and Fib to make a time-series of fibs"""
122 |         if recovery_criteria is None:
123 |             recovery_criteria = self.recovery_criteria
124 |         
125 |         fib_spans = find_all_retracement_boxes(data, drawdown_criteria=drawdown_crit)
126 |         # Fib(fib_span=fib_span, data=self.data, drawdown_criteria=self.drawdown_criteria, fib_levels=sexlf.fib_levels, recovery_criteria = self.recovery_criteria, make_features = make_features)
127 |         fib_series = [Fib(fib_span=fib_span, data=data, drawdown_criteria=drawdown_crit, recovery_criteria=0.02, fib_levels = [0,1,1.618]) for fib_span in fib_spans]
128 |         # remove null fibs (must pass .is_fib)
129 |         fib_series = [fib for fib in fib_series if fib.is_fib]
130 |         return fib_series
131 |     
132 |     def _density_of_drawdowns_given_fibs(self, fib_series, data=None, delta_time=None):
133 |         """estimates the annual density of fibs"""
134 |         # total time duration of series
135 |         if delta_time is None:
136 |             delta_time = (data.index[-1] - data.index[0]).days/365
137 |         return len(fib_series)/delta_time
138 |     
139 |     def _densities_by_kulling(self, fib_series, delta_time, orig_drawdown = None, results = None):
140 |         """empirical calculation of the relationship between drawdown and densities, by progressivingly kulling drawdowns"""
141 |         # maxdrawdowns
142 |         if orig_drawdown is None:
143 |             orig_drawdown = 0.2
144 |         
145 |         # results
146 |         if results is None:
147 |             results = pd.DataFrame({'drawdown_crit':[orig_drawdown], 'density':[len(fib_series)/delta_time]})
148 |         if len(fib_series)==0:
149 |             return results
150 |         
151 |         max_drawdowns = [fib.features['max_drawdown'].max() for fib in fib_series]
152 |         max_drawdowns = sorted(max_drawdowns)
153 |         
154 |         for i,drawdown_crit in enumerate(max_drawdowns):
155 |             
156 |             density_ =[len(max_drawdowns[(i+1):])/delta_time]
157 |             
158 |             crit_ = [drawdown_crit*1.001]
159 |             
160 |             if (np.abs(drawdown_crit*1.001 - results['drawdown_crit'].values).min() > 0.0005):
161 |                 results = results.append(pd.DataFrame({'drawdown_crit':crit_, 'density':density_}))
162 |         
163 |         return results
164 |     
165 |     def _drawdown_manual_finder(self, data, results, target_density, increment = None):
166 |         """
167 |         uses recursion to find a target density
168 |         increments a drawdown by 'increment' multiplicatively
169 |         """
170 |         # do recursion if all results are 0, or no results are greater than target
171 |         do_recursion = (results['density']==0).all() or (not (results['density'] >= target_density).any() )
172 |         
173 |         if not do_recursion:
174 |             return results
175 |         
176 |         if increment is None:
177 |             increment = 0.95
178 |         
179 |         drawdown_crit_increment = increment*results['drawdown_crit'].min()
180 |         
181 |         fib_series = self._get_fibs(data, drawdown_crit_increment)
182 |         delta_time = (data.index[-1] - data.index[0]).days/365
183 |         
184 |         # initial results
185 |         results = results.append(pd.DataFrame({'drawdown_crit':[drawdown_crit_increment],
186 |                                            'density':[1.001*len(fib_series)/delta_time]}))
187 |         # initial empirical results
188 |         results = self._densities_by_kulling(fib_series,
189 |                                             delta_time,
190 |                                             orig_drawdown =drawdown_crit_increment,
191 |                                             results = results)
192 |         
193 |         do_recursion = (results['density']==0).all() or (not (results['density'] >= target_density).any())
194 |         if do_recursion:
195 |             return self._drawdown_manual_finder(data, results, target_density, increment)
196 |         
197 |         return results
198 |     
199 |     def fit(self, data, target_density=None, drawdown_cap=None):
200 |         """estimate an initial drawdown criteria, through:
201 |         - step1: a probabilistic model
202 |         - step2: iterate through fibs and kull one-by-one, empirically measuring the densities"""
203 |         assert type(data) == pd.core.frame.DataFrame
204 |         assert 'Close' in data.columns
205 |         
206 |         if target_density is None:
207 |             target_density = self.target_density
208 |         
209 |         if drawdown_cap is None:
210 |             drawdown_cap = self.drawdown_cap
211 |         
212 |         drawdown_crit, X = self._optimal_drawdown_for_fibs_probablistic_estimator(data, target_density)
213 |         # get fibs and calculate the density of drawdownd
214 |         fibs = self._get_fibs(data, drawdown_crit)
215 |         
216 |         # calculate density and residuals
217 |         delta_time = (data.index[-1] - data.index[0]).days/365
218 |         realized_density = 1.001*len(fibs)/delta_time
219 |         resid = target_density - realized_density
220 |         
221 |         if self.verbose:
222 |             print("%s: DD1 %0.3f:%0.3f fibs/year" % (ticker, drawdown_crit, realized_density))
223 |         
224 |         # initial results
225 |         results = pd.DataFrame({'drawdown_crit':[drawdown_crit], 'density':[realized_density]})
226 |         
227 |         # next estimate: trigger next model
228 |         if realized_density < target_density:
229 |             
230 |             # run next model (refinement)
231 |             X += [drawdown_crit, resid]
232 |             drawdown_crit = self.model.refine(X)#[0]
233 |             fibs = self._get_fibs(data, drawdown_crit)
234 |             
235 |             realized_density = 1.001*len(fibs)/delta_time
236 |             results = pd.DataFrame({'drawdown_crit':[drawdown_crit], 'density':[realized_density]})
237 |             if self.verbose:
238 |                 print("%s: DD2 %0.3f:%0.3f fibs/year" % (ticker, drawdown_crit, realized_density))
239 |         
240 |         # initial empirical results
241 |         results = self._densities_by_kulling(fibs, delta_time, orig_drawdown =drawdown_crit, results = results)
242 |         # recursively find drawdown closer to the target
243 |         results = self._drawdown_manual_finder(data, results, target_density)
244 |         
245 |         # get what?: at least as great as the target_density, but closeest
246 |         ix_meet_or_exceed_criteria = np.where(results.density >= target_density)[0]
247 |         
248 |         if len(ix_meet_or_exceed_criteria)>0:
249 |             results_sub = results.iloc[ix_meet_or_exceed_criteria]
250 |         else:
251 |             results_sub = results
252 |         
253 |         drawdown_crit_suggested = results_sub.drawdown_crit.iat[np.argmin((results_sub.density - target_density)**2)]
254 |         if self.verbose:
255 |             print("%s: DD3 %0.3f FINAL" % (ticker, drawdown_crit_suggested))
256 |         
257 |         # clip the drawdown output
258 |         if drawdown_crit_suggested> max(drawdown_cap):
259 |             drawdown_crit_suggested = max(drawdown_cap)
260 |         elif drawdown_crit_suggested < min(drawdown_cap):
261 |             drawdown_crit_suggested = min(drawdown_cap)
262 |         
263 |         return drawdown_crit_suggested, results
264 | 
265 | class SubjectiveDrawdownModels:
266 |     """container for two boosting models that predict drawdown-criterias"""
267 |     def __init__(self, path_to_model_pred = None, path_to_model_refine = None, verbose=False, unit_test = True):
268 |         self.verbose = verbose
269 |         #print("current_path; %s" % current_path)
270 |         if path_to_model_pred is None:
271 |             path_to_model_pred = os.path.join(CORE_PATH, "subjective_drawdown_models/subjective_drawdown_model1.pkl")
272 |         if path_to_model_refine is None:
273 |             path_to_model_refine = os.path.join(CORE_PATH, "subjective_drawdown_models/subjective_drawdown_model2.pkl")
274 |         
275 |         self.path_to_model_pred = path_to_model_pred
276 |         self.path_to_model_refine = path_to_model_refine
277 |         
278 |         # load the models
279 |         self.load_model_pred()
280 |         self.load_model_refine()
281 |         
282 |         # unit_test on load
283 |         if unit_test:
284 |             self.run_tests()
285 |     
286 |     def load_model_pred(self):
287 |         """load the model one/predictor model (sklearn boosted regression trees) """
288 |         if self.verbose:
289 |             print("loading drawdown prediction model 1 %s" % self.path_to_model_pred)
290 |         with open(self.path_to_model_pred, 'rb') as pcon:
291 |             mod_pred = pickle.load(pcon)
292 |         self.mod_pred = mod_pred
293 |     
294 |     def load_model_refine(self):
295 |         """load the model two/refiner model (sklearn boosted regression trees) """
296 |         if self.verbose:
297 |             print("loading drawdown refinement model 2 %s" % self.path_to_model_refine)
298 |         with open(self.path_to_model_refine, 'rb') as pcon:
299 |             mod_refine = pickle.load(pcon)
300 |         self.mod_refine = mod_refine
301 |     
302 |     def predict(self, X):
303 |         """prediction from model one"""
304 |         if isinstance(X, list):
305 |             X = np.array(X).reshape(1,-1)
306 |         
307 |         return self.mod_pred.predict(X)
308 |     
309 |     def refine(self, X2):
310 |         """conditional one the residuals from model one, and one data-download, """
311 |         if isinstance(X2, list):
312 |             X2 = np.array(X2).reshape(1,-1)
313 |         return self.mod_refine.predict(X2)[0]
314 |     
315 |     def run_tests(self):
316 |         """ units tests on models"""
317 |         p = self.predict(np.array([[0.58720078, 0.39931927, 0.2731371 , 0.04188929, 0.0312278 ]]))
318 |         print("testing model 1 (predictor)")   
319 |         assert (p[0] - 0.28313829505556226) < 10**-6
320 |         
321 |         q = self.refine(np.array([[0.39931927012611657, 0.273137095058999, 0.041889285755502804, 0.03122779683661232, 0.26, 0.4908163265306123, -4.626334519569619e-05]]))
322 |         print("testing model 2 (refiner)")
323 |         assert (q - 0.46655745847591307) < 10**-6
324 | 
325 | #foo = SubjectiveDrawdownModels(unit_test = True)
326 | #subjective_drawdown = SubjectiveDrawdown(verbose =True, target_density=0.25)
327 | 
328 | 


--------------------------------------------------------------------------------
/fib_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import pandas as pd
  4 | import matplotlib.pyplot as plt
  5 | from scipy.stats import trim_mean
  6 | from .variables import TARGET_DENSITY, RECOVERY_CRITERIA, FIB_LEVELS
  7 | 
  8 | # Fib, find_all_retracement_boxes, find_retracement_boxes, get_highs, get_lows
  9 | 
 10 | def get_highs(data):
 11 |     """takes average  high and max(close,open). I.e., split the difference between a body-candle and a wick"""
 12 |     return (0.5*data['High'] +0.5*data[['Close','Open']].max(axis=1))
 13 | 
 14 | def get_lows(data):
 15 |     """takes average of low and low(close,open)"""    
 16 |     return (0.5*data['Low'] +0.5*data[['Close','Open']].min(axis=1))
 17 | 
 18 | def find_retracement_boxes(data, drawdown_criteria,do_plot=False, offset=None, recovery_criteria=None):
 19 |     """when drawing fibonacci retrace/extensions, you need to draw a box from the local high to low; this function automatically finds such boxes on which to base the retracement/extension-levels;
 20 |     returns list of tuples [(m1,2),...]. m1:= start of retracement box; m2:=end of retrace box"""
 21 |     if offset is None:
 22 |         offset = 0
 23 |     
 24 |     if recovery_criteria is None:
 25 |         recovery_criteria = 0.02
 26 |     
 27 |     # cumulative highs during period
 28 |     vHighs = get_highs(data)
 29 |     cummax = vHighs.cummax()
 30 |     # vector of % draw-dwosn
 31 |     vDrawdowns = (cummax - get_lows(data))/cummax
 32 |     vRecovery = (cummax - vHighs)/cummax
 33 |     
 34 |     # crude binary indicator of whether or not price is in a drawdown
 35 |     #in_Drawdowns = vDrawdowns>=drawdown_criteria
 36 |     
 37 |     rDrawdowns = []
 38 |     r_drawdown = 0
 39 |     #in_drawdown = 0
 40 |     # loop backwards
 41 |     for i in range(len(vDrawdowns)-1,-1,-1):
 42 |         #in_drawdown_lag = in_drawdown
 43 |         #in_drawdown = in_Drawdowns.iloc[i]
 44 |         #reset_ = (vDrawdowns.iloc[i]<=recovery_criteria)
 45 |         reset_ = (vRecovery.iloc[i]<=recovery_criteria)
 46 |         r_drawdown = max(r_drawdown, vDrawdowns.iloc[i]) if not reset_ else 0
 47 |         rDrawdowns.append(r_drawdown)
 48 |     
 49 |     rDrawdowns = np.array(rDrawdowns[::-1])
 50 |     
 51 |     # putative fib periods (vector
 52 |     fib_periods = rDrawdowns*(rDrawdowns>=drawdown_criteria) + np.zeros(len(rDrawdowns))
 53 |     
 54 |     # fib periods are useful for finding local fib periods
 55 |     # now split the fib periods into spans
 56 |     idx_fib_periods = np.where(np.diff(1*(fib_periods>0)))[0]
 57 |     if fib_periods[-1]>0:
 58 |         idx_fib_periods = np.concatenate((idx_fib_periods, np.array([len(fib_periods)])))
 59 |     
 60 |     nmax = data.shape[0] 
 61 |     #fib_spans = [(m1+1,min(m2+1,nmax )) for m1,m2 in zip(idx_fib_periods[:-1],idx_fib_periods[1:]) if all(fib_periods[(m1+1):(m2+1)]>0)]
 62 |     fib_spans = [(m1,min(m2+1,nmax )) for m1,m2 in zip(idx_fib_periods[:-1],idx_fib_periods[1:]) if all(fib_periods[(m1+1):(m2+1)]>0)]
 63 |     
 64 |     if do_plot:
 65 |         fig, axs = plt.subplots(3)
 66 |         axs[0].plot(np.arange(data.shape[0]),np.log(data['Close']))    
 67 |         axs[1].plot(np.arange(vDrawdowns.shape[0]),vDrawdowns)
 68 |         # plotting the cumsum
 69 |         axs[2].plot(np.arange(len(rDrawdowns)),rDrawdowns[::-1])
 70 |         for s in fib_spans:
 71 |             axs[2].plot(np.arange(s[0],s[1]), [0.5]*(s[1]-s[0]))
 72 |         plt.show()
 73 |     
 74 |     if offset!=0:
 75 |         # adjust the spans by offset
 76 |         fib_spans = [(m1+offset,m2+offset) for m1,m2 in fib_spans]
 77 |     return fib_spans
 78 | 
 79 | # finding smmaler-fib periods WITHIN giant fib periods
 80 | def find_sub_drawdowns_within_a_giant_drawdown(data, fib_span, drawdown_criteria, START_LOOKING_AFTER_DAYS=None, POST_BUFFER_DAYS = None, RETRACE_MINIMUM=None, recovery_criteria=None):
 81 |     """
 82 |     finds drawsdowns within larger drawdowns, using some criteria:
 83 |     - time: only starts looking for another drawdown after 1.5 years after the peak of the supra
 84 |     - minimum retrace: the supra must retrace to 0.5 level to qualify for looking for another drawdown (to prevent too many drawdowns that are just continuation of the primary trend
 85 |     """
 86 |     if START_LOOKING_AFTER_DAYS is None:
 87 |         START_LOOKING_AFTER_DAYS = 252*1.5
 88 |     if POST_BUFFER_DAYS is None:
 89 |         POST_BUFFER_DAYS = 100
 90 |     if RETRACE_MINIMUM is None:
 91 |         RETRACE_MINIMUM=0.382
 92 |     if recovery_criteria is None:
 93 |         recovery_criteria=0.02
 94 |     
 95 |     start_iloc, stop_iloc = fib_span
 96 |     n_ = data.shape[0]
 97 |     if ((stop_iloc-start_iloc)<=START_LOOKING_AFTER_DAYS) or ((start_iloc+START_LOOKING_AFTER_DAYS)>= n_):
 98 |         # don't proceed if small retracement period
 99 |         return None
100 |     
101 |     # get the retracements: check if it has retrace at least X
102 |     subdata = data.iloc[start_iloc:stop_iloc]
103 |     cummax_ = get_highs(subdata).cummax()
104 |     low_ = get_lows(subdata)
105 |     cummin_ = low_.cummin()
106 |     vDrawdowns_ = (cummax_ - low_)/cummax_
107 |     vDoes_retracement = subdata['Close']>=(((cummax_ - cummin_)*RETRACE_MINIMUM)+cummin_)
108 |     if not vDoes_retracement.sum():
109 |         # no price is above the minium retracement
110 |         return None
111 |     
112 |     # minimum place to start relooking for another drawdown
113 |     ix_minrestart = vDoes_retracement.tolist().index(1)
114 |     ix_minrestart = max(ix_minrestart, START_LOOKING_AFTER_DAYS)
115 |     
116 |     # new (crude) search box
117 |     startsub_iloc = int(start_iloc + min(ix_minrestart,n_) )
118 |     stopsub_iloc = int(stop_iloc + min(POST_BUFFER_DAYS, n_))
119 |     fib_subspans = find_retracement_boxes(data.iloc[startsub_iloc:stopsub_iloc], drawdown_criteria, do_plot=False, offset = startsub_iloc, recovery_criteria = recovery_criteria)
120 |     return fib_subspans
121 | 
122 | # wrapper for find_retracement_boxes
123 | def find_all_retracement_boxes(data, drawdown_criteria=None, START_LOOKING_AFTER_DAYS=None, POST_BUFFER_DAYS = None, fib_spans=None, recovery_criteria=None):
124 |     """
125 |     combines 'find_retracement_boxes' and 'find_sub_drawdowns_within_a_giant_drawdown'
126 |     used recursively
127 |     """
128 |     # big spans
129 |     if fib_spans is None:
130 |         fib_spans_big = find_retracement_boxes(data, drawdown_criteria,do_plot=False, recovery_criteria=recovery_criteria)
131 |     else:
132 |         fib_spans_big = fib_spans
133 |     
134 |     # find smaller subspans within big spans
135 |     fib_spans = [] # container
136 |     for i, fib_span in enumerate(fib_spans_big):
137 |         
138 |         fib_spans.append(fib_span)
139 |         
140 |         # find subsspan within fib_span
141 |         fib_subspans = find_sub_drawdowns_within_a_giant_drawdown(data, fib_span, drawdown_criteria, START_LOOKING_AFTER_DAYS, POST_BUFFER_DAYS, recovery_criteria=recovery_criteria)
142 |         # integrate
143 |         if not (fib_subspans is None):
144 |             fib_subspans = find_all_retracement_boxes(data, drawdown_criteria, START_LOOKING_AFTER_DAYS, POST_BUFFER_DAYS, fib_subspans, recovery_criteria = recovery_criteria)
145 |             for subspan in fib_subspans:
146 |                 if subspan not in fib_spans:
147 |                     fib_spans.append(subspan)
148 |     
149 |     return fib_spans
150 | 
151 | class Fib:
152 |     """contains necessary data to make a fibonacci retracement into a feature for ML"""
153 |     def __init__(self, fib_span, data, drawdown_criteria, fib_levels, recovery_criteria, make_features = True):
154 |         self.drawdown_criteria = drawdown_criteria # what is considered a bear market crash?
155 |         self.start = fib_span[0] # start of retracement
156 |         self.end = fib_span[1]   # end (no longer 20% drawdown)
157 |         self.fib_levels = np.array(fib_levels) # fib numbers (0.
158 |         self.recovery_criteria = recovery_criteria # percent to high that declares bear over
159 |         
160 |         # default: not a fib 
161 |         self.is_fib = False
162 |         self.indx_start_of_credible_fib = None
163 |         self.loc_start_of_credible_fib = None
164 |         #self.loc_end = data.index[self.end] # how to use this????? because the actualy index is -1
165 |         
166 |         # get levels, as a numpy time-series
167 |         fib_series, series_indices = self.calc_fib_series_on_span(data, fib_span, do_mask=True)
168 |         self.is_fib = not (fib_series is None)
169 |         
170 |         # features for ML tool        
171 |         if make_features and self.is_fib:
172 |             features = self._make_features(data)
173 | 
174 |         #
175 |         self.n_total = data.shape[0]
176 |     
177 |     def calc_fibs(self, hi,lo):
178 |         """given a high, and a low, get the fibinocci extensions and retracements. """
179 |         return (hi-lo)*self.fib_levels + lo
180 |     
181 |     def calc_fib_series_on_span(self, data, fib_span=None, do_mask=True, drawdown_criteria=None):
182 |         """given a price series, and two indices that box-in the draw-down, it makes fibonnaci retracements"""
183 |         if drawdown_criteria is None:
184 |             drawdown_criteria = self.drawdown_criteria
185 |         
186 |         if fib_span is None:
187 |             start_iloc, stop_iloc = self.start, self.end
188 |         else:
189 |             start_iloc, stop_iloc = fib_span
190 |         
191 |         subdata = data.iloc[start_iloc:stop_iloc]
192 |         # get cummulative-high (notice it takes halfway between body-of-candle and wick
193 |         cummax = get_highs(subdata).cummax()
194 |         # get cummulative low
195 |         cumlow = get_lows(subdata).cummin()
196 |         # series of fibs
197 |         fib_series_ = FibTimeSeries(cumlow, cummax, fib_levels = self.fib_levels, indices_extended = data.index[data.index>=subdata.index[0]])
198 |         indices = np.arange(start_iloc, stop_iloc)
199 |         
200 |         # mask
201 |         if do_mask:
202 |             # mask out all fibs BEFORE the 20% drawdown (because at those times, we wouldn't know we would soon be making fib-retracements
203 |             if not (self.indx_start_of_credible_fib is None):
204 |                 indx_start_of_credible_fib = self.indx_start_of_credible_fib
205 |             else:
206 |                 in_drawdown = 1*(((cummax - cumlow)/cummax)>=self.drawdown_criteria)
207 |                 if not in_drawdown.any():
208 |                     return None,[]
209 |                 
210 |                 indx_start_of_credible_fib = in_drawdown.tolist().index(1)
211 |                 self.indx_start_of_credible_fib = indx_start_of_credible_fib
212 |                 self.loc_start_of_credible_fib = data.index[self.indx_start_of_credible_fib]
213 |             
214 |             # truncate series
215 |             fib_series_.mask_out_predrawdown(indx_start_of_credible_fib)
216 |             # new indices (after truncating for the first drawdown
217 |             indices = indices[indx_start_of_credible_fib:]
218 |             assert len(indices) == fib_series_.shape[-1]
219 |         
220 |         self.fib_series = fib_series_
221 |         self.series_indices = indices
222 |         return fib_series_, indices
223 |     
224 |     def _make_features(self, data, fib_span=None, drawdown_criteria=None, recovery_criteria=None):
225 |         """ makes primatives for calculating fib retracements and features like:
226 |         - max drawdown
227 |         - duration
228 |         - ever-recovers?
229 |         Returns data as a dict
230 |         Returans two versions of the data: 
231 |         i) in_span: the values valid within the drawdown phase
232 |         ii) extended: values extended beyond the span, to the end of the (global) time-series
233 |         """
234 |         if recovery_criteria is None:
235 |             recovery_criteria = self.recovery_criteria
236 |         
237 |         if drawdown_criteria is None:
238 |             drawdown_criteria = self.drawdown_criteria
239 |         
240 |         if fib_span is None:
241 |             fib_span = (self.start,self.end)
242 |         
243 |         # beginning and end of drawdown
244 |         start_iloc, stop_iloc = fib_span
245 |         
246 |         subdata = data.iloc[start_iloc:stop_iloc]
247 |         
248 |         # size of span
249 |         n = stop_iloc-start_iloc
250 |         n_extended = data.shape[0]-stop_iloc
251 |         
252 |         # get cummulative-high (notice it takes halfway between body-of-candle and wick
253 |         cummax = get_highs(subdata).cummax()
254 |         # get last price-value at time of recovery
255 |         final_price_at_recovery = cummax[-1]
256 |         # cummax extended to end of dataseries
257 |         cummax_extended = pd.DataFrame({'cummax':[final_price_at_recovery]*n_extended}, index = data.index[stop_iloc:])['cummax']
258 |         # cummulative lows (extended and in_span)
259 |         low_extended = get_lows(data.iloc[start_iloc:]) # extended
260 |         low_ = low_extended.iloc[:n]
261 |         cumlow = low_.cummin()
262 |         
263 |         # r_drawdown: proportion drawdown
264 |         vDrawdown = (cummax - low_)/cummax
265 |         vDrawdown_extended = (cummax_extended - low_extended[n:])/cummax_extended
266 |         vDrawdown_full = pd.concat((vDrawdown,vDrawdown_extended),axis=0)
267 |         
268 |         # is in drawdown?
269 |         in_drawdown = 1*(((cummax - cumlow)/cummax)>=drawdown_criteria)
270 |         if not in_drawdown.any():
271 |             return None
272 |         
273 |         # mask: in realtime, we only know we are in a fib if drawdown criteria is met
274 |         self.indx_start_of_credible_fib = in_drawdown.values.argmax()
275 |         self.loc_start_of_credible_fib = in_drawdown.index[self.indx_start_of_credible_fib]
276 |                 
277 |         # feature: has recovered?
278 |         feat_recovered_full = 1*(vDrawdown_full[self.indx_start_of_credible_fib:]<recovery_criteria).cummax()
279 |         feat_recovered = feat_recovered_full.iloc[:(n - self.indx_start_of_credible_fib)]
280 |         feat_recovered_extend = feat_recovered_full.iloc[(n - self.indx_start_of_credible_fib):]
281 |         # Done feature: has recovered
282 |         
283 |         # get the date-at-recovery
284 |         self.time_at_recovery = (feat_recovered_full==1).idxmax(axis=0) # when first recovered?
285 |         if self.time_at_recovery == feat_recovered_full.index[0]:
286 |             self.time_at_recovery = feat_recovered_full.index[-1] # set t_at_recovery to enddate
287 |         self.idx_at_recovery = np.where(data.index ==self.time_at_recovery)[0][0]
288 |         
289 |         # feature: time size peak
290 |         # ... get index of the peak (during the fib_span) (
291 |         self.loc_peak = cummax.idxmax() # index of peak
292 |         vTimeSincePeak = pd.DataFrame({'vTimeSincePeak':((subdata.index - self.loc_peak).days).values},index=subdata.index)['vTimeSincePeak']
293 |         vTimeSincePeak_extended = pd.DataFrame({'vTimeSincePeak':((data.iloc[fib_span[1]:].index - self.loc_peak).days).values},index = data.index[fib_span[1]:])['vTimeSincePeak']
294 |         vTimeSincePeak_full = pd.concat((vTimeSincePeak,vTimeSincePeak_extended),axis=0).iloc[self.indx_start_of_credible_fib:] 
295 |         # Done feature: time since peak
296 |         
297 |         # duration: how long was the drawdown (fixed)
298 |         duration_full = pd.concat((
299 |             pd.DataFrame({'duration':vTimeSincePeak_full.loc[:(self.time_at_recovery+pd.Timedelta(-1, unit="day"))]})['duration'],
300 |             pd.DataFrame({'duration':[vTimeSincePeak_full.loc[self.time_at_recovery]]*(data.shape[0]-self.idx_at_recovery)}, index = data.loc[self.time_at_recovery:].index)['duration']
301 |             ), axis=0)
302 |         
303 |         # feature: max drawdown
304 |         max_drawdown = ((cummax-cumlow)/cummax)
305 |         max_drawdown_extended = pd.DataFrame({'max_drawdown':[max_drawdown.max()]*n_extended},index=data.index[stop_iloc:])['max_drawdown']
306 |         max_drawdown_full = pd.concat((max_drawdown,max_drawdown_extended),axis=0).iloc[self.indx_start_of_credible_fib:] 
307 |         
308 |         # feature volume: cumsum(price below peak) summed from peak to end of recovery
309 |         # formula: ("is_recovered?") x (drawdown percentage) vDrawdown)
310 |         # ... the cumsum is the naive volume. To get actual volume, we need to multiply by the number of days inbetween points, then cumsum
311 |         volume_naive_pdf = (1-feat_recovered_full)*vDrawdown_full.iloc[self.indx_start_of_credible_fib:]
312 |         # ... do ('volume_naive_pdf' x diff(days)).cumsum() to get absolute volume
313 |         diffs_days = (volume_naive_pdf.index[1:] - volume_naive_pdf.index[:-1]).days.values
314 |         diffs_days = np.array([diffs_days.mean()] + diffs_days.tolist()) # 
315 |         volume = (volume_naive_pdf*diffs_days).cumsum()
316 |         
317 |         # collect features
318 |         self.features = {'max_drawdown':max_drawdown_full,
319 |                         'time_since_peak':vTimeSincePeak_full,
320 |                         'duration':duration_full,
321 |                         'recovered':feat_recovered_full,
322 |                         'precovery':vDrawdown_full.iloc[self.indx_start_of_credible_fib:],
323 |                          'volume': volume
324 |                         }
325 |         
326 |         assert self.features['max_drawdown'].shape[0] ==self.features['time_since_peak'].shape[0]
327 |         assert self.features['recovered'].shape[0] ==self.features['time_since_peak'].shape[0]
328 |         assert self.features['precovery'].shape[0] ==self.features['time_since_peak'].shape[0]
329 |         assert self.features['precovery'].shape[0] ==self.features['volume'].shape[0]
330 |         return self.features
331 | 
332 | # # class Fib Time Series: functions for manipulating 
333 | class FibTimeSeries:
334 |     def __init__(self, cumlow, cummax, fib_levels, indices_extended = None):
335 |         """basically a numpy array of fib retracement/extensions, plus some functions for extracting the information
336 |         argments: cumlow/cummax are cumulative lows and highs
337 |         fib_levels = [0, 0.236, 0.382, 0.5, 0.618, 0.786, 1, 1.618, 2.618, 4.236, 6.854, 11.09]
338 |         """
339 |         if isinstance(fib_levels, list):
340 |             self.fib_levels = np.array(fib_levels)
341 |         elif isinstance(fib_levels, np.ndarray):
342 |             self.fib_levels = fib_levels
343 |         assert 'fib_levels' in dir(self)
344 |         # size of series, within span
345 |         self.n_inspan = cumlow.shape[0]
346 |         self.n_fib_levels = len(self.fib_levels)
347 |         assert self.n_inspan == cummax.shape[0]
348 |         
349 |         # time-indices
350 |         self.index = cummax.index
351 |         # ... time at beginning
352 |         self.start_loc = self.index[0]
353 |         self.end_loc = self.index[-1]
354 |         
355 |         # time series of retracements
356 |         self.fib_series = self.calc_fibs(cumlow, cummax)
357 |         self.shape = self.fib_series.shape
358 |         
359 |         # indices used for extended the series into the future
360 |         self.indices_extended = indices_extended
361 |     
362 |     def calc_fib(self, lo, hi):
363 |         """given a high, and a low, get the fibinocci extensions and retracements. """
364 |         return (hi-lo)*self.fib_levels + lo
365 |     
366 |     def calc_fibs(self, cummax, cumlow):
367 |         return np.array([self.calc_fib(hi,lo) for hi,lo in zip(cummax, cumlow)]).T
368 |     
369 |     def numpy(self):
370 |         """default to returning the full series """
371 |         return self.fib_series
372 |     
373 |     def __getitem__(self, item):
374 |         """default getitem is to just return the numpy array __getitem__ """
375 |         return self.fib_series[item]
376 |     
377 |     def get(self, start=None, end=None, return_indices = False):
378 |         """elaborate getitem but allows time-index"""
379 |         if start is None:
380 |             if end is None:
381 |                 # case: if no indices specified, just return the numpy
382 |                 if return_indices:
383 |                     return self.numpy(), self.index
384 |                 return self.numpy()
385 |             start = self.start_loc
386 |         if end is None:
387 |             end = self.end_loc
388 |         # how to do this?
389 |         if (start <= self.end_loc):# and (end <= self.end_loc):
390 |             # (notice we catch the edge case where start_loc == end_loc
391 |             # macro-case, if the starting index is within (self.start_loc, self.end_loc)
392 |             #indx_to_return_fibs = self.index[(self.index>=start) & (self.index<=end)]
393 |             indx_to_return_fibs = np.where((self.index>=start) & (self.index<=end))[0]
394 |             fib_series_to_return = self.fib_series[:,indx_to_return_fibs]
395 |             # declare that for the extended data, the 'start' will be self.end_loc
396 |             extended_start_loc =self.end_loc
397 |         else:
398 |             # macro-case, if the starting index is outside self.start_loc, self.end_loc)
399 |             fib_series_to_return = np.array([[]]*self.n_fib_levels) # dummy to concatenate
400 |             extended_start_loc = start + pd.Timedelta(-1, unit="day")
401 |         if end > self.end_loc:
402 |             # case: if the specified 'end' is greater than the time-series end-index,
403 |             # ... then we must repeat the final fib_levels (at self.end_loc) for
404 |             # ... the length of the extra time-indices
405 |             # ... the extra time indices are NOT contiguous, so we need the user to set
406 |             # ... self.indices_extended to know which dates are valid
407 |             if self.indices_extended is None:
408 |                 raise ValueError("need to set 'self.indices_extended' to know for which dates to extrapolate the fib-extensions")
409 |             indices_to_extrapolate_fibs = self.indices_extended[(self.indices_extended>extended_start_loc) & (self.indices_extended<=end)]
410 |             # how much to extend
411 |             n_extrapolate = len(indices_to_extrapolate_fibs)
412 |             # extended series
413 |             fib_series_extrapolate = np.array([self.fib_series[:,-1]]*n_extrapolate).T
414 |         else:
415 |             # none to extrapolate
416 |             fib_series_extrapolate = np.array([[]]*self.n_fib_levels) # dummy to concatenate
417 |         
418 |         # concatenate the in-span and extrapolated
419 |         return np.concatenate((fib_series_to_return, fib_series_extrapolate), axis=1)
420 |     
421 |     def mask_out_predrawdown(self, indx_start_of_credible_fib):
422 |         """truncates the dataset for the initial drawdown where one couldn't have know that one would eventuallly be in a drawdown"""
423 |         self.fib_series = self.fib_series[:,indx_start_of_credible_fib:]
424 |         # correct the n_inspan
425 |         self.n_inspan = self.fib_series.shape[-1]
426 |         # correct the time-indices
427 |         self.index = self.index[indx_start_of_credible_fib:]
428 |         self.start_loc = self.index[0]
429 |         # correct shape
430 |         self.shape = self.fib_series.shape
431 |     
432 | 


--------------------------------------------------------------------------------
/core.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import pandas as pd
  4 | import matplotlib.pyplot as plt
  5 | from scipy.stats import trim_mean
  6 | from .variables import TARGET_DENSITY, RECOVERY_CRITERIA, FIB_LEVELS, GET_WEIGHTS_FOR_LONGTERM_MEMORY, DEFAULT_MEMORY_FEATURES
  7 | 
  8 | # import Fib object, and retracement-box finders
  9 | from .fib_utils import *
 10 | 
 11 | # import functions to support automatic finding of drawdowns
 12 | from .subjective_drawdown_finder import *
 13 | 
 14 | subjective_drawdown = SubjectiveDrawdown(verbose =False, target_density=TARGET_DENSITY)
 15 | 
 16 | # a module of fibonacci functions
 17 | def calc_fibs(hi,lo):
 18 |     """given a high, and a low, get the fibinocci extensions and retracements. """
 19 |     return (hi-lo)*np.array(FIB_LEVELS)+lo
 20 | 
 21 | def calc_fib_series_on_span(data, start_iloc=None, stop_iloc=None, fib_span=None, do_mask=True, drawdown_criteria = 0.2):
 22 |     """given a price series, and two indices that box-in the draw-down, it makes fibonnaci retracements"""
 23 |     if not (fib_span is None):
 24 |         start_iloc, stop_iloc = fib_span
 25 |     subdata = data.iloc[start_iloc:stop_iloc]
 26 |     # get cummulative-high (notice it takes halfway between body-of-candle and wick
 27 |     cummax = get_highs(subdata).cummax()
 28 |     # get cummulative low
 29 |     cumlow = get_lows(subdata).cummin()
 30 |     # series of fibs
 31 |     fib_series_ = np.array([calc_fibs(hi,lo) for hi,lo in zip(cummax, cumlow)]).T
 32 |     # make indeices
 33 |     indices = np.arange(start_iloc, stop_iloc) 
 34 |     if do_mask:
 35 |         # mask out all fibs BEFORE the 20% drawdown (because at those times, we wouldn't know we would soon be making fib-retracements
 36 |         in_drawdown = 1*(((cummax - cumlow)/cummax)>=drawdown_criteria)
 37 |         if not in_drawdown.any():
 38 |             return None,[]
 39 |         
 40 |         indx_start_of_credible_fib = in_drawdown.tolist().index(1)
 41 |         # new indices
 42 |         indices = indices[indx_start_of_credible_fib:]
 43 |         # truncate
 44 |         fib_series_ = fib_series_[:,indx_start_of_credible_fib:]
 45 |     assert len(indices) == fib_series_.shape[-1] 
 46 |     return fib_series_, indices
 47 | 
 48 | 
 49 | # main maker of fibonacci-retracements & fibonacci-extensions
 50 | class FibonacciTechnicalAnalysis:
 51 |     """Main object that performs all steps for the automatic Fibonacci TA
 52 |     - make_fib_series: used extracting fibonacci extensions/retracements
 53 |     - make_fib_features_from_fib_series: used to create features for ML analyses based on fibs
 54 |     - make_features: combines both of the above
 55 |     """
 56 |     def __init__(self, data, drawdown_criteria, recovery_criteria=None, fib_levels=None, do_plot=False, plot_path = "/tmp/"):
 57 |         self.data = data
 58 |         
 59 |         # fiboanni levels, default
 60 |         if fib_levels is None:
 61 |             fib_levels = FIB_LEVELS #[0, 0.236, 0.382, 0.5, 0.618, 0.786, 1, 1.618, 2.618, 4.236, 6.854, 11.09]
 62 |         
 63 |         self.fib_levels = fib_levels
 64 |         
 65 |         # drawdown criteria to make a fibonacci retracement/extension (percent)
 66 |         if drawdown_criteria == 'auto':
 67 |             # find an automatic fibonacci criteria
 68 |             print("finding optimal drawdown criteria")
 69 |             try:
 70 |                 optimal_drawdown_criteria,_ = subjective_drawdown.fit(data = data)
 71 |             
 72 |             except:
 73 |                 print("failed optimizing drawdown criteria, setting to default")
 74 |                 optimal_drawdown_criteria = DRAWDOWN_CRITERIA
 75 |             
 76 |             self.drawdown_criteria = optimal_drawdown_criteria
 77 |         
 78 |         else:    
 79 |             self.drawdown_criteria = drawdown_criteria
 80 |         
 81 |         if isinstance(self.drawdown_criteria, str):
 82 |             raise ValueError("'drawdown_criteria' must be percentage 0-1 or 'auto'" )
 83 |         
 84 |         # % criteria to judge when a drawdown has fully recovered
 85 |         if recovery_criteria is None:
 86 |             recovery_criteria = RECOVERY_CRITERIA
 87 |         self.recovery_criteria = recovery_criteria
 88 |         
 89 |         # whether to make plots, and the plot path
 90 |         self.do_plot = do_plot
 91 |         self.plot_path = plot_path
 92 |     
 93 |     def make_fib(self, fib_span, make_features=True):
 94 |         """returns a Fib object, given a span between peak and recovery"""
 95 |         return Fib(fib_span=fib_span, data=self.data, drawdown_criteria=self.drawdown_criteria, fib_levels=self.fib_levels, recovery_criteria = self.recovery_criteria, make_features = make_features)
 96 |     
 97 |     def make_fib_series(self, data = None, fib_spans=None, do_plot=None, plot_path = None):
 98 |         """
 99 |         makes a list of Fibs for a dataset
100 |         if pass a list of fib_spans, then make the series
101 |         or, pass the data and the fib_spans will be calculated
102 |         """
103 |         if do_plot is None:
104 |             do_plot = self.do_plot
105 |         
106 |         if plot_path is None:
107 |             plot_path = self.plot_path
108 |         
109 |         if (data is None) and (fib_spans is None):
110 |             raise ValueError("either supply argument 'data' or 'fib_spans'. Both cannot be None")
111 |         
112 |         elif (fib_spans is None) and (not (data is None)):
113 |             fib_spans = find_all_retracement_boxes(data, self.drawdown_criteria, recovery_criteria=self.recovery_criteria)
114 |         
115 |         # make a list of fibs
116 |         fib_series = [self.make_fib(fib_span) for fib_span in fib_spans]
117 |         
118 |         # remove null fibs (must pass .is_fib)
119 |         fib_series = [fib for fib in fib_series if fib.is_fib]
120 |         if do_plot and (not (data is None)):
121 |             
122 |             # plot the price
123 |             fig = plt.figure(figsize=(15,9))
124 |             axs0 = fig.add_subplot(2,1,1)
125 |             #axs0.plot(np.arange(data.shape[0]),np.log(data['High']))
126 |             #axs0.plot(np.arange(data.shape[0]),np.log(data['Low']))
127 |             axs0.plot(data.index,np.log(data['High']))
128 |             axs0.plot(data.index,np.log(data['Low']))
129 |             for fib in fib_series:
130 |                 
131 |                 # black line showing the start
132 |                 axs0.plot([data.index[fib.series_indices[0]]]*len(fib.fib_levels), np.log(fib.fib_series[:,0]),'b-')                
133 |                 for level_ in fib.fib_series:
134 |                     #axs[0].plot(fib.series_indices, np.log(level_))
135 |                     axs0.plot(data.index[fib.series_indices], np.log(level_))
136 |             
137 |             # also block the max drawdowns
138 |             cummax = get_highs(data).cummax()
139 |             vDrawdowns = (cummax - get_lows(data))/cummax
140 |             
141 |             axs1 = fig.add_subplot(2,1,2)
142 |             #axs[1].plot(vDrawdowns)
143 |             axs1.plot(vDrawdowns)
144 |             for x in range(0,5):
145 |                 #axs[1].plot(data.index, [x/10]*data.shape[0])
146 |                 axs1.plot(data.index, [x/10]*data.shape[0])
147 |             
148 |             #plt.savefig(self.plot_path + 'fibonacci_timeseries.png')
149 |             fig.savefig(self.plot_path + 'fibonacci_timeseries.png')
150 |             plt.close()
151 |             #plt.show()
152 |         
153 |         return fib_series
154 |     
155 |     def make_fib_features(self, fib_series=None, fib_spans=None,  weights_for_longterm_memory = None, do_plot = None, plot_path = None, prefix = "", name_mod=None, feature_defaults = None, return_memory_vectors=False, return_empirical_study=False):
156 |         """main function, creates fibonacci-based features for machine-learning analyses"""
157 |         
158 |         if name_mod is None:
159 |             name_mod = "_d"
160 |         
161 |         if do_plot is None:
162 |             do_plot = self.do_plot
163 |         
164 |         if plot_path is None:
165 |             plot_path = self.plot_path
166 |         
167 |         if weights_for_longterm_memory is None:
168 |             # criteria used for building the long-term memory
169 |             #weights_for_longterm_memory = {'crit1':{'sd':1/365,'mu':0, 'p':1}, 'crit2':{'sd':1/self.drawdown_criteria,'mu':0, 'p':1.3}, 'crit3':{'sd':1/365,'mu':0, 'p':1},  'crit4':{'sd':1/(365*self.drawdown_criteria),'mu':0,'p':1.2}, 'crit5':{'sd':1.3,'mu':-2.6, 'p':1}}            
170 |             weights_for_longterm_memory = GET_WEIGHTS_FOR_LONGTERM_MEMORY(self.drawdown_criteria)
171 |             
172 |         else:
173 |             weights_for_longterm_memory['crit2']['sd'] = 1/self.drawdown_criteria
174 |             weights_for_longterm_memory['crit4']['sd'] = 1/((1/weights_for_longterm_memory['crit4']['sd'])*self.drawdown_criteria)
175 |         
176 |         # fib_series: make if not present
177 |         if fib_series is None:
178 |             fib_series = self.make_fib_series(data = self.data, fib_spans=fib_spans, do_plot=do_plot, plot_path = plot_path)
179 |         
180 |         # create the FibFeatures Object
181 |         fib_features = FibFeatures(data=self.data, fib_series=fib_series, weights_for_longterm_memory = weights_for_longterm_memory, do_plot = do_plot, plot_path = plot_path, prefix = prefix, name_mod=name_mod, feature_defaults = feature_defaults)
182 |         
183 |         # option to return the memory vectors for empirical analyses
184 |         if return_memory_vectors:
185 |             
186 |             return fib_features.features(data = self.data, return_memory = return_memory_vectors)
187 |         
188 |         #make the features        
189 |         (master_features1, master_features2, master_features3), (featnames1, featnames2,featnames3) = fib_features.features(data = self.data, return_memory = return_memory_vectors)
190 |         
191 |         # return empiricallly derived defaults of features
192 |         if return_empirical_study:
193 |             return fib_features.empiricals
194 |         
195 |         # concatenate the features and convert to pd.data.frame
196 |         pd_features = pd.DataFrame(np.concatenate((master_features1, master_features2, master_features3),axis=1), index = self.data.index, columns = featnames1+featnames2+featnames3)
197 |         
198 |         return pd_features
199 | 
200 | # modified pd.get_dummies
201 | def get_dummies(series, columns=None):
202 |     """
203 |     just a modified form of pd.get_dummies; allows for missing indices
204 |     'series' is a long vector of integers representing differennt columns of a (external) pd.DataFrame
205 |     """
206 |     if isinstance(columns,int):
207 |         # convert to list of integers
208 |         columns = [i for i in range(columns)]
209 |     if series.unique().tolist() == columns:
210 |         # if the columns set and integer-set-in-series are the same, just run pd.get_dummies
211 |         return pd.get_dummies(series, drop_first = False)
212 |     
213 |     #fake_series_list_expanded = series.tolist() + columns
214 |     #fake_series_index_expanded = series.index.append(memory_mt.index[-1]  + pd.timedelta_range(start='1 day',periods=len(columns)))
215 |     series_expanded =  series.append(pd.Series(columns, index = series.index[-1]  + pd.timedelta_range(start='1 day',periods=len(columns))))
216 |     dummies = pd.get_dummies(series_expanded).iloc[:-len(columns)]
217 |     assert dummies.shape[0] == series.shape[0]
218 |     return dummies
219 | 
220 | def mask_memory_based_on_indices_in_other_memory(pd_attr, memory_indices, fill):
221 |     """this masks-out columns in pd_attr using the (column)indcies in 'memory_indices', filling them with 'fill';
222 |     if memory_indices is a pandas.core.series.Series, then it is the non-recursive format
223 |     if memory_indices is a list, then it is the recursive application of them
224 |     """
225 |     if isinstance(memory_indices, pd.core.series.Series):
226 |         if fill ==0:
227 |             #mask_ = 1-pd.get_dummies(memory_indices, drop_first=False)
228 |             mask_ = 1-get_dummies(memory_indices, columns = pd_attr.shape[-1])
229 |             masked_attr = pd_attr*mask_[pd_attr.index[0]:]
230 |             return masked_attr
231 |         elif fill>0:
232 |             #mask_ = pd.get_dummies(memory_indices, drop_first=False)*fill+1
233 |             mask_ = get_dummies(memory_indices, columns = pd_attr.shape[-1])*fill+1
234 |             masked_attr = pd_attr*mask_[pd_attr.index[0]:]
235 |             return masked_attr
236 |         elif fill<0:
237 |             # if fill<0, then we must first zero-out the pd_attr-regions and then add/insert the desired-fil
238 |             mask_ = get_dummies(memory_indices, columns = pd_attr.shape[-1])
239 |             mask_ = mask_[pd_attr.index[0]:]
240 |             masked_attr = pd_attr*(1-mask_) + mask_*fill
241 |             return masked_attr
242 |     elif isinstance(memory_indices, list):
243 |         # recursion: notice is it updating 'masked_attr'
244 |         assert isinstance(memory_indices[0], pd.core.series.Series)
245 |         masked_attr = pd_attr.copy()
246 |         for memory_index in memory_indices:
247 |             masked_attr = mask_memory_based_on_indices_in_other_memory(masked_attr, memory_index, fill)
248 |             
249 |         return masked_attr
250 | 
251 | def rescale_criteria(criteria_vector, wt):
252 |     """scales/weights the criteria when building indices that make Fib features"""
253 |     criteria_vec_rescaled = (criteria_vector*wt['sd'] - wt['mu'])**wt['p']
254 |     # want to insert a time-multiplier
255 |     return criteria_vec_rescaled
256 | 
257 | #def rescale_criteria(criteria_vector, wt):
258 | #    """scales/weights the criteria when building indices that make Fib features"""
259 | #    return (criteria_vector*wt['sd'] - wt['mu'])**wt['p']
260 | 
261 | def tx_feature(x, feature_name):
262 |     if (feature_name == 'duration') or (feature_name == 'time_since_peak'):
263 |         return np.log(x+1)
264 |     return x
265 | 
266 | def numpy_trimmed_mean(array, trim=0.1):
267 |     """ trim values and take mean"""
268 |     # get bottom and top 10% quantiles
269 |     #qlow, qhi = np.quantile(array, [trim,1-trim])
270 |     #
271 |     # case1 if there is no diversity, then the qlow and qhi will be equal
272 |     #if qlow == qhi:
273 |     #    return array.mean()
274 |     #
275 |     #return array[np.where((array > qlow) & (array < qhi))[0]].mean()
276 |     return trim_mean(array, proportiontocut = trim)
277 | 
278 | def fibs_get_default_features(names_of_features_to_return = None, which_memory=None):
279 |     """returns default-values for ['max_drawdown', 'time_since_peak', 'duration', 'precovery'] for those price-points before any fibonacci-retracement/extension has happened
280 |     these should be empiricially calculated based on "average" retracements/extensions 
281 |     """
282 |     if which_memory is None:
283 |         which_memory = 1
284 |     
285 |     # extract default features (memory) for this memory
286 |     default_features = {featnm:memory_values[which_memory] for featnm,memory_values in DEFAULT_MEMORY_FEATURES.items()}
287 |     #default_features = {'max_drawdown': [0.2,0.2,0.2][which_memory], 'time_since_peak':[6.0,6.0,6.0][which_memory],  'duration':[3,3,3][which_memory], 'precovery':[-0.3,-0.3,-0.3][which_memory], 'fib_lev':[-1,-1,-1][which_memory], 'topdist':[0.22,0.22,0.22][which_memory], 'botdist':[0.091,0.091,0.091][which_memory]}
288 |     
289 |     if names_of_features_to_return is None:
290 |         return default_features
291 |     
292 |     return {k:default_features[k] for k in names_of_features_to_return}
293 | 
294 | #class
295 | class MemoryArray(np.ndarray):
296 |     """subclass of numpy.array, but which a few more functions and attributes"""
297 |     def __new__(
298 |         cls,
299 |         data,
300 |         memory_sequence=1,
301 |         credible_start = 10**10,
302 |         credible_start_loc = None,
303 |         time_indices = None,
304 |         ):
305 |         self = np.asarray(data).view(cls)
306 |         self.memory_sequence = memory_sequence
307 |         self.credible_start = credible_start
308 |         self.credible_start_loc = credible_start_loc
309 |         self.time_indices = time_indices
310 |         
311 |         return self
312 |     
313 |     def update_credible_start(self, credible_start, credible_start_loc=None):
314 |         """track """
315 |         if isinstance(credible_start, float) or isinstance(credible_start, np.float64):
316 |             credible_start = int(credible_start)
317 |         
318 |         if isinstance(credible_start, int) or isinstance(credible_start, np.int64):
319 |             if self.credible_start is None:
320 |                 self.credible_start = int(credible_start)
321 |             else:
322 |                 self.credible_start = min(self.credible_start, int(credible_start))
323 |         
324 |         elif isinstance(credible_start, pd._libs.tslibs.timestamps.Timestamp):
325 |             if self.credible_start_loc is None:
326 |                 self.credible_start_loc = credible_start
327 |             else:
328 |                 self.credible_start_loc = min(self.credible_start_loc,credible_start)
329 |         if not (credible_start_loc is None):
330 |             if self.credible_start_loc is None:
331 |                 self.credible_start_loc = credible_start_loc
332 |             else:
333 |                 self.credible_start_loc = min(self.credible_start_loc,credible_start_loc)
334 |     
335 |     def numpy(self):
336 |         """ return as an np.array"""
337 |         return np.asarray(self)
338 | 
339 | 
340 | class FeatureArray(np.ndarray):
341 |     """subclass of numpy.array, but which a few more functions and attributes"""
342 |     def __new__(
343 |         cls,
344 |         data,
345 |         columns = None,
346 |         memory_sequence=1,
347 |         credible_start = 10**10,
348 |         credible_start_loc = None,
349 |         time_indices = None,
350 |         ):
351 |         self = np.asarray(data).view(cls)
352 |         self.columns = columns
353 |         self.memory_sequence = memory_sequence
354 |         self.credible_start = credible_start
355 |         self.credible_start_loc = credible_start_loc
356 |         self.time_indices = time_indices
357 |         
358 |         return self
359 |     
360 |     def update_credible_start(self, credible_start, credible_start_loc=None):
361 |         """track """
362 |         if isinstance(credible_start, int):
363 |             self.credible_start = min(self.credible_start, credible_start)
364 |         
365 |         elif isinstance(credible_start, pd._libs.tslibs.timestamps.Timestamp):
366 |             if self.credible_start_loc is None:
367 |                 self.credible_start_loc = credible_start
368 |             else:
369 |                 self.credible_start_loc = min(self.credible_start_loc,credible_start)
370 |         if not (credible_start_loc is None):
371 |             if self.credible_start_loc is None:
372 |                 self.credible_start_loc = credible_start_loc
373 |             else:
374 |                 self.credible_start_loc = min(self.credible_start_loc,credible_start_loc)
375 |     
376 |     def numpy(self):
377 |         """ return as an np.array"""
378 |         return np.asarray(self)
379 |     
380 |     def insert(self, data, iterable, column=None):
381 |         if isinstance(column,int):
382 |             self[iterable,column] = data
383 |         elif isinstance(column,str) and (column in self.columns):
384 |             self[iterable, self.columns.index(column)] = data
385 |     
386 |     def get(self, iterable, column=None):
387 |         if isinstance(column,int):
388 |             return self.numpy()[iterable,column]
389 |         
390 |         elif isinstance(column,str) and (column in self.columns):
391 |             return self.numpy()[iterable, self.columns.index(column)]
392 | 
393 | def make_default_features(nrows, ncols=None, default_features=None, which_memory=None, names_of_features_to_return = None):
394 |     """ makes an empty numpy array with feature defaults"""
395 |     if default_features is None:
396 |         # fill with these defaults
397 |         default_features = fibs_get_default_features(which_memory=which_memory)
398 |     
399 |     if names_of_features_to_return is None:
400 |         names_of_features_to_return = ['max_drawdown', 'time_since_peak', 'duration', 'precovery', 'fib_lev', 'box01']#'topdist', 'botdist']
401 |     
402 |     if ncols is None:
403 |         ncols = len(names_of_features_to_return)
404 |     
405 |     assert ncols ==  len(names_of_features_to_return)
406 |     
407 |     # initialize new empty array
408 |     features = np.empty((nrows,ncols),dtype=np.float64)
409 |     
410 |     # fill the columns with default values
411 |     for i_col,featnm in enumerate(names_of_features_to_return):
412 |         
413 |         features[:,i_col].fill(default_features[featnm])
414 |     
415 |     return FeatureArray(features, columns = names_of_features_to_return)
416 | 
417 | class FibFeatures:
418 |     """ """
419 |     def __init__(self, data, fib_series, weights_for_longterm_memory = None, do_plot = False, plot_path = "/tmp/", prefix = "", name_mod=None, feature_defaults = None, do_log_transform_fib_levels=True):
420 |         """
421 |         fib_series: list of Fibs
422 |         weights_for_longterm_memory: constants, used to classify which retracements 
423 |         """
424 |         # time indices,
425 |         self.data_indices = data.index
426 |         
427 |         # total number of points
428 |         self.n_total = data.shape[0] #fib_series[0].n_total
429 |         
430 |         # fib_series
431 |         self.fib_series = fib_series
432 |         
433 |         # names of features
434 |         self.nm_features = ['max_drawdown', 'time_since_peak', 'duration', 'precovery', 'fib_lev']
435 |         
436 |         # suffix to modify feature names
437 |         if name_mod is None:
438 |             name_mod = "_d"
439 |         self.name_mod=name_mod
440 |         
441 |         self.do_plot = do_plot
442 |         
443 |         # get the fibonacci fib_lelves
444 |         self.fib_levels = FIB_LEVELS#fib_series[0].fib_levels
445 |         
446 |         self.n_levels = len(self.fib_levels)
447 |         
448 |         # number of fibs in fib_series
449 |         self.n_fibs = len(fib_series)
450 |         
451 |         # whether to do a semi-log transformation of fib_levels indicator:
452 |         # .. np.log(1+2* FIB_LEVEL)-1 # ensures -1 is base
453 |         self.do_log_transform_fib_levels = do_log_transform_fib_levels
454 |             
455 |         # if there are any drawdowns/fib-retracements
456 |         if self.n_fibs>0:
457 |             
458 |             # Recovery and Drawdown criteria: get from fib objects
459 |             self.drawdown_criteria = fib_series[0].drawdown_criteria
460 |             self.recovery_criteria = fib_series[0].recovery_criteria
461 |             
462 |             # default weighting-coefficients  building the longterm memory
463 |             if weights_for_longterm_memory is None:
464 |                 # criteria used for building the long-term memory
465 |                 #weights_for_longterm_memory = {'crit1':{'sd':1/365,'mu':0, 'p':1},  'crit2':{'sd':1/self.drawdown_criteria,'mu':0, 'p':1.3}, 'crit3':{'sd':1/365,'mu':0, 'p':1},  'crit4':{'sd':1/(365*self.drawdown_criteria),'mu':0,'p':1.2}, 'crit5':{'sd':1.3,'mu':-2.6, 'p':1}}
466 |                 weights_for_longterm_memory = GET_WEIGHTS_FOR_LONGTERM_MEMORY(self.drawdown_criteria)
467 |             else:
468 |                 weights_for_longterm_memory['crit2']['sd'] = 1/self.drawdown_criteria
469 |                 weights_for_longterm_memory['crit4']['sd'] = 1/((1/weights_for_longterm_memory['crit4']['sd'])*self.drawdown_criteria)
470 |             
471 |             self.weights_for_longterm_memory = weights_for_longterm_memory
472 |             
473 |             # path to save plots
474 |             self.plot_path = plot_path
475 |             if (not (plot_path is None)) and self.do_plot:
476 |                 self.do_plot = os.path.isdir(plot_path)
477 |                 self.plot_path = os.path.join(self.plot_path, prefix)
478 |                 print('WARNING: making PLOTS in %s' % self.plot_path)
479 |             
480 |             # monitor empirical (mean) values
481 |             self.empiricals = {'max_drawdown':[], 'time_since_peak':[], 'duration':[], 'precovery':[], 'box01':[]} #'topdist':[], 'botdist':[]}
482 |             
483 |             # set up features used to build of time-series of retracements
484 |             self._collect_fib_attrs(fib_series, inplace=True)
485 |     
486 |     def _return_defaults(self, which_memory_return_default = None, return_memory_vectors=False):
487 |         """returns default values for features, if the time-series doesn't have enough data to make fibonacci retracements & extensions"""
488 |         if which_memory_return_default is None:
489 |             which_memory_return_default = [1,2,3]
490 |         
491 |         if isinstance(which_memory_return_default,int):
492 |             which_memory_return_default = [which_memory_return_default]
493 |         
494 |         # make empty (dummy) memories
495 |         if return_memory_vectors:
496 |             # make
497 |             memory_arrays_defaults = [MemoryArray(data=-1*np.ones([self.n_levels, self.n_total]), memory_sequence = i) for i in which_memory_return_default]
498 |             if len(memory_arrays_defaults)==1:
499 |                 return memory_arrays_defaults[0]
500 |             return memory_arrays_defaults
501 |         
502 |         # make features with constant (default) values
503 |         master_features_default = [make_default_features(nrows=self.n_total, default_features=fibs_get_default_features(which_memory=which_memory)) for which_memory in which_memory_return_default]
504 |         
505 |         # get features-names for defaults 
506 |         nm_features_default = [['fib-%d_%s%s' % (i,nm_,self.name_mod) for nm_ in fd.columns] for fd,i in zip(master_features_default, which_memory_return_default)]
507 |         
508 |         if len(master_features_default)==1:
509 |             return master_features_default[0], nm_features_default[0]
510 |         
511 |         # return default features and names of features
512 |         return master_features_default, nm_features_default
513 |     
514 |     def _collect_fib_attrs(self, fib_series, inplace=True):
515 |         """collect features from a fibonacci-series"""
516 |         # find which fibs are 1 2 or 3
517 |         self.fib_attrs = {'max_drawdown':None, 'time_since_peak':None, 'recovered':None, 'precovery':None,'duration':None, 'volume':None}
518 |         
519 |         # loop through fibs, get the attrs
520 |         for fibattr_ in self.fib_attrs.keys():
521 |             
522 |             # collect the time series of attributes for all the Fibs in FibSeries
523 |             self.fib_attrs[fibattr_] = pd.concat({i:fib.features[fibattr_] for i,fib in enumerate(fib_series)},axis=1)
524 |         
525 |         if not inplace:
526 |             return fib_attrs
527 |     
528 |     def _get_shortterm_memory(self):
529 |         """short-term memory: tracks the current Fibonacci"""
530 |         # S/T criteria 1: lowest time to peak
531 |         if 'memory_st' in dir(self):
532 |             return self.memory_st
533 |         
534 |         if self.n_fibs>=1:
535 |             # if there is at least one drawdown
536 |             return self.fib_attrs['time_since_peak'].fillna(10**9).idxmin(axis=1)
537 |         
538 |         self.memory_st = self._return_defaults(1,True)
539 |         return self.memory_st
540 |     
541 |     def _get_medterm_memory(self, memory_st=None):
542 |         """med-term memory: tracks the 1-lag Fibonacci"""
543 |         if 'memory_mt' in dir(self):
544 |             return self.memory_mt
545 |         
546 |         # check that there are more fibs that 1 (otherwise, there can't be a medium-term memory
547 |         if self.n_fibs>=2:
548 |             
549 |             # earliest qualifying date for a medterm_memory
550 |             self.loc_credible_med_term = self.fib_series[1].loc_start_of_credible_fib
551 |             
552 |             memory_st = self._get_shortterm_memory()
553 |             
554 |             # indicator: medium term memory 
555 |             memory_mt = mask_memory_based_on_indices_in_other_memory( pd_attr=self.fib_attrs['time_since_peak'][self.loc_credible_med_term:].fillna(10**9),memory_indices=memory_st, fill=10**10).idxmin(axis=1)
556 |             self.memory_mt = memory_mt
557 |             return memory_mt
558 |         
559 |         else:
560 |             self.memory_mt = self._return_defaults(2,True)
561 |             return self.memory_mt
562 |     
563 |     def _get_longterm_crit1(self):
564 |         """LT criteria 1: is NOT recovered and oldest-> take the oldest NOT recovered (weight) """
565 |         memory_st = self._get_shortterm_memory()
566 |         memory_mt = self._get_medterm_memory()            
567 |         if self.n_fibs>=2:
568 |             
569 |             self.loc_credible_lt = self.fib_series[2].loc_start_of_credible_fib
570 |             
571 |             # check is not recovered from drawdown
572 |             lt_crit1 = rescale_criteria((1-self.fib_attrs['recovered']).fillna(0)*(self.fib_attrs['time_since_peak'].fillna(0)), wt = self.weights_for_longterm_memory['crit1'])[self.loc_credible_lt:]
573 |             
574 |             # mask out: short-term memory
575 |             lt_crit1 = mask_memory_based_on_indices_in_other_memory(pd_attr=lt_crit1, memory_indices=[memory_st, memory_mt], fill=0)
576 |             
577 |             if self.do_plot:
578 |                 # save criteria 1 as a plot to inspect
579 |                 plt.figure(figsize=(15,9))
580 |                 plt.plot(lt_crit1)
581 |                 plt.legend([str(k) for k in lt_crit1.columns]);
582 |                 plt.savefig(self.plot_path + 'LTmem3_crit1.png')
583 |                 plt.close()
584 |             
585 |             return lt_crit1
586 |         else:
587 |             return None
588 |     
589 |     def _get_longterm_crit2(self):
590 |         """ LONG-TERM CRITERIA 2: deep-draw down"""
591 |         memory_st = self._get_shortterm_memory() 
592 |         memory_mt = self._get_medterm_memory()
593 |         
594 |         if self.n_fibs>=2:
595 |             
596 |             self.loc_credible_lt = self.fib_series[2].loc_start_of_credible_fib
597 |             
598 |             lt_crit2 = rescale_criteria(self.fib_attrs['max_drawdown'], wt = self.weights_for_longterm_memory['crit2'])[self.loc_credible_lt:]
599 |             
600 |             lt_crit2 = mask_memory_based_on_indices_in_other_memory(pd_attr=lt_crit2, memory_indices=[memory_st, memory_mt], fill=0)
601 |             
602 |             if self.do_plot:
603 |                 # save criteria 1 as a plot to inspect
604 |                 plt.figure(figsize=(15,9))
605 |                 plt.plot(lt_crit2)
606 |                 plt.legend([str(k) for k in lt_crit2.columns]);
607 |                 plt.savefig(self.plot_path + 'LTmem3_crit2.png')
608 |                 plt.close()
609 |             
610 |             return lt_crit2
611 |         else:
612 |             return None
613 |     
614 |     def _get_longterm_crit3(self):
615 |         """ LONG-TERM CRITERIA 3: long-duration draw-down"""
616 |         memory_st = self._get_shortterm_memory() 
617 |         memory_mt = self._get_medterm_memory()
618 |         
619 |         if self.n_fibs>=2:
620 |             
621 |             self.loc_credible_lt = self.fib_series[2].loc_start_of_credible_fib
622 |             
623 |             lt_crit3 = rescale_criteria(self.fib_attrs['duration'], wt = self.weights_for_longterm_memory['crit3'])[self.loc_credible_lt:]
624 |             
625 |             lt_crit3 = mask_memory_based_on_indices_in_other_memory(pd_attr=lt_crit3, memory_indices=[memory_st,memory_mt], fill=0)
626 |             
627 |             if self.do_plot:
628 |                 # save criteria 1 as a plot to inspect
629 |                 plt.figure(figsize=(15,9))
630 |                 plt.plot(lt_crit3)
631 |                 plt.legend([str(k) for k in lt_crit3.columns]);
632 |                 plt.savefig(self.plot_path + 'LTmem3_crit3.png')
633 |                 plt.close()
634 |             
635 |             return lt_crit3
636 |         else:
637 |             return None
638 |     
639 |     def _get_longterm_crit4(self):
640 |         """ LONG-TERM CRITERIA 4: total volume (drawdown % x time-in-drawdown)"""
641 |         memory_st = self._get_shortterm_memory() 
642 |         memory_mt = self._get_medterm_memory()
643 |         
644 |         if self.n_fibs>=2:
645 |             
646 |             self.loc_credible_lt = self.fib_series[2].loc_start_of_credible_fib
647 |             
648 |             lt_crit4 = rescale_criteria(self.fib_attrs['volume'], wt = self.weights_for_longterm_memory['crit4'])[self.loc_credible_lt:]
649 |             lt_crit4 = mask_memory_based_on_indices_in_other_memory(pd_attr=lt_crit4, memory_indices=[memory_st, memory_mt], fill=0)
650 |             
651 |             if self.do_plot:
652 |                 # save criteria 1 as a plot to inspect
653 |                 plt.figure(figsize=(15,9))
654 |                 plt.plot(lt_crit4)
655 |                 plt.legend([str(k) for k in lt_crit4.columns]);
656 |                 plt.savefig(self.plot_path + 'LTmem3_crit4.png')
657 |                 plt.close()
658 |             
659 |             return lt_crit4
660 |         else:
661 |             return None
662 |     
663 |     def _get_longterm_crit5(self):
664 |         """ LONG-TERM CRITERIA 5: distance above recovery level, with decay by time"""
665 |         memory_st = self._get_shortterm_memory() 
666 |         memory_mt = self._get_medterm_memory()
667 |         
668 |         if self.n_fibs>=2:
669 |             
670 |             # get the start of credible beginning of fib
671 |             self.loc_credible_lt = self.fib_series[2].loc_start_of_credible_fib
672 |             
673 |             # get basis of the lt_crit5
674 |             attr_precovery_decayed_by_time = self.fib_attrs['precovery']
675 |             
676 |             # rescale lt_crit5
677 |             lt_crit5 = rescale_criteria(attr_precovery_decayed_by_time,
678 |                                         wt = self.weights_for_longterm_memory['crit5'])
679 |             
680 |             # smooth out the precovery by a rolling window
681 |             lt_crit5 = lt_crit5.rolling(window= 4, center=False, min_periods =1).mean()[self.loc_credible_lt:]
682 |             
683 |             # get min to serve as fill
684 |             min_to_fill_for_lt_crit5 = float(lt_crit5.min().min())
685 |             min_to_fill_for_lt_crit5 = min_to_fill_for_lt_crit5*1.3 if min_to_fill_for_lt_crit5<0 else min_to_fill_for_lt_crit5*0.75
686 |             # mask out: med-term memory
687 |             lt_crit5 = mask_memory_based_on_indices_in_other_memory(pd_attr = lt_crit5, memory_indices = [memory_st,memory_mt], fill = min_to_fill_for_lt_crit5) 
688 |             
689 |             if self.do_plot:
690 |                 # save criteria 1 as a plot to inspect
691 |                 plt.figure(figsize=(15,9))
692 |                 plt.plot(lt_crit5)
693 |                 plt.legend([str(k) for k in lt_crit5.columns]);
694 |                 plt.savefig(self.plot_path + 'LTmem3_crit5.png')
695 |                 plt.close()
696 |             
697 |             return lt_crit5
698 |         else:
699 |             return None
700 |     
701 |     def _get_longterm_memory(self, memory_st = None, memory_mt = None):
702 |         """long-term memory: tracks the 2-lag Fibonacci"""
703 |         if 'memory_lt' in dir(self):
704 |             return self.memory_lt
705 |         
706 |         if self.n_fibs<3:
707 |             # return defaults empty if less than 3 fib-retracements
708 |             self.memory_lt = self._return_defaults(3,True)
709 |             return self.memory_lt
710 |         
711 |         lt_crit1 = self._get_longterm_crit1()
712 |         lt_crit2 = self._get_longterm_crit2()
713 |         lt_crit3 = self._get_longterm_crit3()
714 |         lt_crit4 = self._get_longterm_crit4()
715 |         lt_crit5 = self._get_longterm_crit5()
716 |         lt_crit = lt_crit1 +lt_crit2 + lt_crit3 + lt_crit4 + lt_crit5
717 |         if self.do_plot:
718 |             plt.figure(figsize=(15,9))
719 |             plt.plot(lt_crit)
720 |             plt.legend([str(k) for k in lt_crit.columns]);    
721 |             plt.savefig(self.plot_path + 'LTmem3_crit.png')
722 |             plt.close()
723 |             
724 |         # fill nas: notice I fill NAs with the minimum values (because we select by maximizing)
725 |         memory_lt = lt_crit.fillna(float(lt_crit.min().min())).idxmax(axis=1)
726 |         self.memory_lt = memory_lt
727 |         
728 |         if self.do_plot:
729 |             plt.figure(figsize=(15,9))
730 |             plt.plot(memory_lt)
731 |             plt.savefig(self.plot_path + 'LTmem3.png')
732 |             plt.close()
733 |         
734 |         return memory_lt
735 |     
736 |     def _build_fib_timematrix(self, memory_vector=None, memory_identity=None):
737 |         """creates a time-series matrix, for a given memory_vector (either s/t, med, or l/t"""
738 |         
739 |         if memory_vector is None:
740 |             print("memory vector is None in '_build_fib_timematrix'")
741 |             return None, None
742 |         
743 |         if memory_identity is None:
744 |             # get the sequence/identity of the memory vector
745 |             memory_identity = memory_vector.memory_sequence
746 |         
747 |         if self.n_fibs < memory_identity:
748 |             # the number of fibonnaci extensions is less than this identity, return defaults
749 |             print("number of fibs is less than the prescribed mem-identity %d" % memory_identity)
750 |             dummy_features = self._return_defaults(memory_identity,False)
751 |             return  memory_vector, dummy_features
752 |                 
753 |         # size
754 |         n_ = memory_vector.shape[0]
755 |         
756 |         # number of levels
757 |         n_levels = len(self.fib_levels)
758 |         
759 |         # split the memory vector
760 |         subseq_changes = np.split(memory_vector, np.where(np.diff(memory_vector))[0]+1)
761 |         
762 |         # get the meta-data for each (uniform) subsequence
763 |         subseqs_metadata = [{'start_loc':subseq.index[0], 'end_loc':subseq.index[-1], 'n':len(subseq), 'id':subseq.unique()[0]} for subseq in subseq_changes]
764 |         
765 |         # empty containers: memory-fib-levels
766 |         master_memory = MemoryArray(data = np.zeros([n_levels, self.n_total]), memory_sequence = memory_identity)
767 |         
768 |         #nm_features_get = ['max_drawdown', 'time_since_peak', 'duration', 'precovery']
769 |         nm_features_get = self.nm_features
770 |         
771 |         # empty containers: features (filled with defaults
772 |         #master_features = np.zeros([self.n_total, len(nm_features_get)]) # container for features-time-series
773 |         master_features = make_default_features(nrows=self.n_total, default_features = fibs_get_default_features(which_memory = memory_identity))
774 |         # loop through fibonacci-sequences
775 |         for i,subseq in enumerate(subseqs_metadata):
776 |             # indices for insertion in 
777 |             span_to_insert = (np.where(self.data_indices==subseq['start_loc'])[0][0],(np.where(self.data_indices==subseq['end_loc'])[0][0]+1))
778 |                  
779 |             # data fib_series to insert
780 |             data_to_insert = self.fib_series[subseq['id']].fib_series.get(start=subseq['start_loc'], end = subseq['end_loc'])
781 |             
782 |             # insert the data into the memory
783 |             master_memory[:, span_to_insert[0]:span_to_insert[1]] = data_to_insert
784 |             master_memory.update_credible_start(credible_start = span_to_insert[0])
785 |             
786 |             # gather features
787 |             feat_indx_insert = np.where((self.data_indices >= subseq['start_loc']) & (self.data_indices <= subseq['end_loc']))[0]
788 |             feat_indx_get = np.where((self.fib_series[subseq['id']].features[nm_features_get[0]].index >= subseq['start_loc']) & (self.fib_series[subseq['id']].features[nm_features_get[0]].index <= subseq['end_loc']))[0]
789 |             assert len(feat_indx_insert) == len(feat_indx_get)
790 |             
791 |             # loop through and get features
792 |             for j,featnm in enumerate(nm_features_get[:-1]):
793 |                 
794 |                 #master_features[feat_indx_insert, j] = tx_feature(self.fib_series[subseq['id']].features[featnm].iloc[feat_indx_get], featnm)
795 |                 master_features.insert(data = tx_feature(self.fib_series[subseq['id']].features[featnm].iloc[feat_indx_get], featnm),
796 |                                        iterable = feat_indx_insert,
797 |                                        column = featnm)
798 |         
799 |         # get empirical values for
800 |         for featnm in nm_features_get[:-1]:
801 |             # '.get' function is used to get a subset of data, based on master-memory.credible_start, which is the first index of the first fibonacci retracement
802 |             feature_empirical_mean = numpy_trimmed_mean(master_features.get(range(master_memory.credible_start,self.n_total), column = featnm))
803 |             self.empiricals[featnm] += [feature_empirical_mean] 
804 |         
805 |         return master_memory, master_features
806 |         
807 |     def _build_features0(self, memory, master_features, data):
808 |         """given a price (data['Close']) and master_memory of (selected) fib-levels, snake the price through the memory to get the relative price difference (as a feature)"""
809 |         
810 |         # identity (1,2,3) memory
811 |         memory_identity = memory.memory_sequence
812 |         
813 |         # check if no fibs exist
814 |         if self.n_fibs < memory_identity:
815 |             return master_features
816 |         
817 |         # fill-value (just for the arithmetic/min/max-finding)
818 |         fillvalue = data['Close'].max()*10
819 |         
820 |         # find the index-position (non-time) of the first-credible fib
821 |         cred_start = memory.credible_start
822 |         
823 |         # snake through price : get the top fib snake
824 |         snake1 = memory[:,cred_start:] - data['Close'].iloc[cred_start:].values
825 |         snake_plus = np.clip(snake1,0,10**10)
826 |         snake_plus[np.nonzero(snake_plus==0)]=fillvalue
827 |         top_fib = snake_plus.argmin(axis=0)
828 |         
829 |         # snake through price: get the bottom fib snake
830 |         snake_neg = np.clip(snake1,-10**10,0)
831 |         snake_neg[np.nonzero(snake_neg==0)]=-fillvalue
832 |         bottom_fib = snake_neg.argmax(axis=0)
833 |             
834 |         # snake through the fib levels
835 |         snake_through_fib_levels = np.array([self.fib_levels[i] for i in bottom_fib], dtype=np.float64)
836 |         if self.do_log_transform_fib_levels:
837 |             # convert the fib_level to semi-log scale
838 |             snake_through_fib_levels = np.log(1+2*snake_through_fib_levels)-1
839 |         
840 |         master_features.insert(data = snake_through_fib_levels,
841 |                                iterable = range(cred_start, master_features.shape[0]),
842 |                                column = 'fib_lev')
843 |         
844 |         # ensure that the topfib is at least as big as the bottom fib
845 |         top_fib = np.maximum(bottom_fib, top_fib)
846 |         
847 |         # price at the fibs
848 |         top_fib_price = memory[top_fib,np.arange(cred_start,memory.shape[-1])]
849 |         bottom_fib_price = memory[bottom_fib,np.arange(cred_start,memory.shape[-1])]
850 |         
851 |         # get the price differences (between the price and the fibs)
852 |         #fib_dist_to_top_fib = (top_fib_price-data['Close'].iloc[cred_start:].values)/data['Close'].iloc[cred_start:].values
853 |         #fib_dist_to_bot_fib = (data['Close'].iloc[cred_start:].values-bottom_fib_price)/data['Close'].iloc[cred_start:].values
854 |         # track the empirical values (for setting defaults)
855 |         #self.empiricals['topdist'] += [numpy_trimmed_mean(fib_dist_to_top_fib, trim=0.1)] 
856 |         #self.empiricals['botdist'] += [numpy_trimmed_mean(fib_dist_to_bot_fib, trim=0.1)]         
857 | 
858 |         # convert distance to fibs as a [0,1] indicator; if beyond, convert to % above
859 |         z = data['Close'].iloc[cred_start:].values
860 |         range_fib_price = top_fib_price - bottom_fib_price
861 |         # 3 cases: if within [bottom, top], convert to [0,1] indicator
862 |         # ... if above: convert to [1,+] logged
863 |         # ... if below: convert to [-Inf, -0.0001]
864 |         fib_box_01 = (z - bottom_fib_price)/(range_fib_price + (range_fib_price==0))
865 |         fib_box_above = 1+np.log( z / top_fib_price)
866 |         fib_box_below = np.log(z / bottom_fib_price)
867 |         # combine cases: bools
868 |         is_case_01 = (z < top_fib_price) & (z > bottom_fib_price)
869 |         is_case_above = z>=top_fib_price
870 |         is_case_below = z<=bottom_fib_price
871 |         # combine cases
872 |         fib_box_dist = fib_box_01*is_case_01 + fib_box_above*is_case_above + fib_box_below*is_case_below
873 |         # add to empiricals
874 |         self.empiricals['box01'] += [numpy_trimmed_mean(fib_box_dist, trim=0.1)]
875 |         
876 |         # plot price snake through fib-boxes
877 |         if self.do_plot:
878 |             
879 |             # plot the raw fib-time-series
880 |             plt.figure(figsize=(15,9))
881 |             plt.plot(np.arange(len(data['Close'])),np.log(data['Close']))
882 |             plt.plot(np.log(memory.T), 'b--')
883 |             for fib in self.fib_series:
884 |                 for level_ in fib.fib_series:
885 |                     plt.plot(fib.series_indices, np.log(level_))
886 |             
887 |             plt.savefig(self.plot_path + 'price_and_fibs-%d.png' % (memory.memory_sequence))
888 |             plt.close()
889 |             
890 |             # plot the fib-feature (price snakes through fibs)
891 |             plt.figure(figsize=(15,9))
892 |             plt.plot(data['Close'].iloc[cred_start:].index, np.log(top_fib_price))
893 |             plt.plot(data['Close'].iloc[cred_start:].index, np.log(bottom_fib_price))
894 |             plt.plot(data['Close'].iloc[cred_start:].index, np.log(data['Close'].iloc[cred_start:].values))
895 |             plt.savefig(self.plot_path + 'price-snake-through-fib-%d.png' % (memory.memory_sequence)) 
896 |             plt.close()
897 |         
898 |         # insert distance-to-top-fib into master-features
899 |         master_features.insert(data = fib_box_dist,
900 |                                iterable = range(cred_start, master_features.shape[0]),
901 |                                column ='box01')                    
902 |         #master_features.insert(data = fib_dist_to_top_fib,
903 |         #                       iterable = range(cred_start, master_features.shape[0]),
904 |         #                       column ='topdist')
905 |         # 
906 |         # insert distance-to-bottom-fib into master-features
907 |         #master_features.insert(data = fib_dist_to_bot_fib,
908 |         #                       iterable = range(cred_start, master_features.shape[0]),
909 |         #                       column ='botdist')
910 |         #
911 |         # feature names: update with name_mod
912 |         nm_feat = ['fib-%d_%s%s' % (memory.memory_sequence, nm_, self.name_mod) for nm_ in master_features.columns]
913 |         return master_features, nm_feat
914 |     
915 |     def features(self, data = None, return_memory = False):
916 |         """ main function:
917 |         wrapper for _build_fib_timematrix, runs on s/t,m/t and l/t memory
918 |         ... and _build_features0 (which outputs final features)
919 |         """
920 |         # features 1: most-recent drawdown
921 |         memory_st = self._get_shortterm_memory()
922 |         master_memory1, master_features1 = self._build_fib_timematrix(memory_vector=memory_st, memory_identity = 1)
923 |         
924 |         # features 2: previous drawdown
925 |         memory_mt = self._get_medterm_memory(memory_st = memory_st)
926 |         master_memory2, master_features2 = self._build_fib_timematrix(memory_vector=memory_mt, memory_identity = 2)
927 |         
928 |         # features 3: long-term drawdown
929 |         memory_lt = self._get_longterm_memory(memory_st = memory_st, memory_mt = memory_mt)
930 |         master_memory3, master_features3 = self._build_fib_timematrix(memory_vector=memory_lt, memory_identity = 3)
931 |         
932 |         if return_memory:
933 |             # option to return the raws (memory vector)
934 |             #return (master_memory1, master_memory2, master_memory3), (master_features1, master_features2, master_features3)
935 |             return (memory_st, memory_mt, memory_lt), (master_features1, master_features2, master_features3)
936 |         
937 |         features1, names1 = self._build_features0(master_memory1, master_features1, data)
938 |         features2, names2 = self._build_features0(master_memory2, master_features2, data)
939 |         features3, names3 = self._build_features0(master_memory3, master_features3, data)
940 |         
941 |         return (features1, features2, features3), (names1, names2,names3)
942 | 
943 | 
944 | 
945 |     
946 | 


--------------------------------------------------------------------------------