├── document.pdf ├── fredgraph.xls ├── Trust-Income.png ├── Trust-Inequality.png ├── document_gemini.pdf ├── Cox_Trading_System.pdf ├── KOFGI_2022_public.xlsx ├── sp500_temp_indpro.gdt ├── document_perplexity.pdf ├── gold_and_agriculture.xlsx ├── README.md ├── food_energy.py ├── the speculative neighbor.py ├── food_energy_output.py ├── USMV_SPHB_rotation.py ├── the-speculative-correlation.py ├── speculative_exponent.py ├── oracle.py ├── examples_fred_nn_forecaster.py ├── the lazy moments strategy.py ├── the osborne bubble.py ├── run little trader, run.py ├── speculative_rotation.py ├── patents_and_growth.py ├── the fractional trader.py ├── the speculative switch.py ├── the efficient speculator.py ├── the fractional osborne bubble.py ├── wheat_agrichem_energy.py ├── the random standardized speculator.py ├── the-speculative-bootstrap.py ├── the-speculative-system.py ├── summer speculations.py ├── us_inflation_and_russia_wars.py ├── the explosive speculator.py ├── globalization_and_economic_freedom.py ├── the random enforcer.py ├── the-μ-strategy.py ├── sp500_food_energy_output.py ├── the random speculator.py ├── trust.py ├── mean_and_probability.py ├── the speculative sectors.py ├── the-adaptive-mean.py ├── Cox_Trading_System_WFO.py ├── speculative_complexity.py ├── inflation_drivers.inp ├── speculative bias learning.py ├── money demand and inflation forecasting.py ├── supersized_volatility_and_volume.py ├── method in investment.py ├── complexity_weighted_forecasting.py ├── nn_tools.py ├── the-speculative-regression.py ├── rent or buy the profits run high.py ├── a simple measure of economic activity and the z-strategy.py ├── the-speculative-transform.py ├── the speculative unemployment proxies.py ├── speculative climate change.py ├── speculative sign smoothing and NN.py ├── the speculative probabilities.py ├── pre-election-Greece.py ├── competitive_speculator.py ├── the DSCP strategy.py ├── macroeconomic uncertainty and currency trading.py ├── automatic_window_selection.py └── realgdp.py /document.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dthomakos/Prognostikon-Code/HEAD/document.pdf -------------------------------------------------------------------------------- /fredgraph.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dthomakos/Prognostikon-Code/HEAD/fredgraph.xls -------------------------------------------------------------------------------- /Trust-Income.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dthomakos/Prognostikon-Code/HEAD/Trust-Income.png -------------------------------------------------------------------------------- /Trust-Inequality.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dthomakos/Prognostikon-Code/HEAD/Trust-Inequality.png -------------------------------------------------------------------------------- /document_gemini.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dthomakos/Prognostikon-Code/HEAD/document_gemini.pdf -------------------------------------------------------------------------------- /Cox_Trading_System.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dthomakos/Prognostikon-Code/HEAD/Cox_Trading_System.pdf -------------------------------------------------------------------------------- /KOFGI_2022_public.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dthomakos/Prognostikon-Code/HEAD/KOFGI_2022_public.xlsx -------------------------------------------------------------------------------- /sp500_temp_indpro.gdt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dthomakos/Prognostikon-Code/HEAD/sp500_temp_indpro.gdt -------------------------------------------------------------------------------- /document_perplexity.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dthomakos/Prognostikon-Code/HEAD/document_perplexity.pdf -------------------------------------------------------------------------------- /gold_and_agriculture.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dthomakos/Prognostikon-Code/HEAD/gold_and_agriculture.xlsx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Prognostikon-Code 2 | Publicly available Python and Gretl code from posts at my blog Prognostikon, https://prognostikon.wordpress.com/ (now legacy), 3 | and at the new and updated Prognostikon here, https://prognostikon.cce.uoa.gr/ 4 | 5 | Contents include time series analysis and forecasting methods & quantitative investment strategies methods and ideas. Enjoy! 6 | -------------------------------------------------------------------------------- /food_energy.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/04/16/long-term-bonds-food-energy/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import packages 12 | import matplotlib.pyplot as plt 13 | import pandas as pd 14 | import pandas_datareader as pdr 15 | 16 | # Tickers and data extraction 17 | tickers = ['IRLTLT01USM156N', 'PFOODINDEXM', 'PNRGINDEXM'] 18 | data = pdr.fred.FredReader(tickers, start='2013-01-1').read() 19 | data.columns = ['10Y Yield', 'Food', 'Energy'] 20 | 21 | # Optionally plot the data 22 | # data.plot(secondary_y='10Y Yield', grid='both') 23 | # plt.show() 24 | 25 | # Add the price of food deflated by the energy index 26 | data = data.assign(Food_Energy = data['Food']/data['Energy']) 27 | data.columns = ['10Y Yield', 'Food', 'Food', 'Food/Energy'] 28 | 29 | # Need only two series to plot 30 | data_plot = data[['10Y Yield', 'Food/Energy']] 31 | 32 | # and plot them... 33 | data_plot.plot(color=['green', 'blue'], title='10Y US Yield vs. Global Food Index deflated by Global Energy Index', xlabel='Date', secondary_y='Food/Energy') 34 | ax1, ax2 = plt.gcf().get_axes() 35 | ax1.set_ylabel('percent') 36 | ax2.set_ylabel('ratio of indexes') 37 | plt.show() 38 | 39 | -------------------------------------------------------------------------------- /the speculative neighbor.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/07/29/the-speculative-neighbor/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the required libraries 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import scipy as sp 16 | import yfinance as yf 17 | import nn_tools as nn 18 | 19 | # Download some data 20 | ticker = 'VXX' 21 | data = yf.download(ticker, period='max', interval='1wk')['Adj Close'].dropna() 22 | # Get percent returns, select period of estimation/evaluation 23 | y = data.pct_change().dropna()#.loc['2018-01-01':] 24 | 25 | # Compute the recursive NN forecast 26 | ini_obs = 3 27 | nobs = y.shape[0] 28 | set_p = 1 29 | set_alpha = 1 30 | set_beta = 1 31 | set_nn_type = 'forward' 32 | # 33 | rr = pd.DataFrame(data=None, index=y.index, columns=['Buy & Hold ', 'The Speculative Neighbor']) 34 | 35 | # 36 | for i in np.arange(ini_obs, nobs, 1): 37 | yi = y.iloc[:i] 38 | id = nn.get_nn(yi.to_numpy(), p=set_p, alpha=set_alpha, step=1) 39 | fi = nn.get_nn_forecast(yi.to_numpy(), id, beta=set_beta, nn_type=set_nn_type)[0] 40 | rr.iloc[i] = np.c_[y.iloc[i], np.sign(fi)*y.iloc[i]] 41 | 42 | # Do a nice plot 43 | tr = ((rr+1).cumprod()-1)*100 44 | tr.plot(title='Total trading returns of the speculative neighbor strategy in '+ticker, color=['red', 'blue'], ylabel='return in percent') 45 | plt.grid(visible=True, which='both') 46 | plt.show() 47 | # and print the total trading return 48 | print(tr.iloc[-1]) -------------------------------------------------------------------------------- /food_energy_output.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/04/27/output-capacity-food-energy/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import pandas_datareader as pdr 16 | import yfinance as yahoo 17 | from scipy import optimize as opt 18 | from statsmodels.tsa.stattools import ccf 19 | 20 | # Tickers, start date, end date and data extraction 21 | tickers = ['INDPRO', 'MCUMFN', 'CPIUFDSL', 'CPIENGSL'] 22 | start_date = '2012-03-01' 23 | end_date = '2023-03-31' 24 | econdata = pdr.fred.FredReader(tickers, start=start_date, end=end_date).read() 25 | econdata.columns = ['Output', 'Capacity', 'Food', 'Energy'] 26 | 27 | # Get the two ratios 28 | econdata = econdata.assign(Index1 = econdata['Food']/econdata['Energy'], 29 | Index2 = econdata['Output']/econdata['Capacity']) 30 | econdata.columns = ['Output', 'Capacity', 'Food', 'Energy', 'Food/Energy', 'Output/Capacity'] 31 | 32 | # Do the plot 33 | indices = econdata[['Output/Capacity', 'Food/Energy']].apply(np.log).diff(periods=12).dropna()*100 34 | indices.plot(secondary_y='Food/Energy', color=['blue', 'green'], xlabel='Year', 35 | title='Output-to-Capacity ratio vs. Food-to-Energy ratio, US monthly data') 36 | ax1, ax2 = plt.gcf().get_axes() 37 | ax1.set_ylabel('annual log-growth, percent') 38 | ax2.set_ylabel('annual log-growth, percent') 39 | plt.show() 40 | 41 | # and compute the correlation 42 | print(indices.corr()) 43 | 44 | -------------------------------------------------------------------------------- /USMV_SPHB_rotation.py: -------------------------------------------------------------------------------- 1 | # 2 | # Based on an idea and the assets from Kurtis Hemmerling, see this LinkedIn post 3 | # 4 | # https://www.linkedin.com/posts/hemmerlingkurtis_etfs-investingstrategy-longshort-activity-7057062449618817024-RdoE?utm_source=share&utm_medium=member_desktop 5 | # 6 | # Import packages 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | import pandas as pd 10 | import yfinance as yahoo 11 | 12 | # Download data, using monthly returns for a quick evaluation... 13 | data = yahoo.download(['USMV', 'SPHB', 'SPY'], period='max', interval='1mo')['Adj Close'].dropna() 14 | # Compute percent returns and cumulative returns 15 | rets = data.pct_change().dropna() 16 | cret = (rets+1).cumprod() 17 | 18 | # Compute the difference of cumulative returns 19 | dret = cret['USMV']-cret['SPHB'] 20 | # The signal is based on the sign of the previous month's rolling mean 21 | signal = dret.rolling(window=12).mean().shift(periods=1).apply(np.sign) 22 | 23 | # Compute the strategy's returns: long USMV if the signal is negative, long SPHB otherwise 24 | sret = rets['USMV']*(signal <= 0) + rets['SPHB']*(signal > 0) 25 | 26 | # Crop returns to approximately match the published post in LinkedIn 27 | new_sret = sret.loc['2020-05-01':] 28 | # Compute cumulative returns for the strategy and the index 29 | tret = (new_sret+1).cumprod()-1 30 | tspy = (rets['SPY']+1).loc[tret.index].cumprod()-1 31 | # Put together, nice names and plot! 32 | tall = pd.concat([tspy, tret], axis=1)*100 33 | tall.columns = ['SPY', 'USMV-SPHB'] 34 | # 35 | tall.plot(title='A simple version of USMV-SPHB rotation, monthly data - based on an idea from Kurtis Hemmerling', 36 | xlabel='Time', ylabel='percent', grid='both') 37 | plt.show() 38 | # 39 | print(tall) 40 | -------------------------------------------------------------------------------- /the-speculative-correlation.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/10/28/the-speculative-correlation/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the required libraries 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import yfinance as yf 16 | import statsmodels.api as sm 17 | from statsmodels.tsa.api import VAR 18 | 19 | # Download some data, for the post I used 20 | ticker = 'GLD' 21 | data = yf.download(ticker, period='max', interval='1wk')['Adj Close'].dropna() 22 | r = data.pct_change().dropna().loc['2020-01-01':] # you can change this of course 23 | dr = r.diff() 24 | # Put together returns and difference in returns 25 | z = pd.concat([r, dr], axis=1).dropna() 26 | z.columns = [ticker, 'Δ-'+ticker] 27 | 28 | # Set observations and rolling window 29 | nobs = z.shape[0] 30 | roll = 2 31 | 32 | # and initialize storage 33 | store = pd.DataFrame(data=None, index=z.index, columns=[ticker, 'Speculative Correlation', 'Signal']) 34 | 35 | # Run a simple loop to get the signals and the strategy returns 36 | for i in np.arange(0, nobs-roll, 1): 37 | z_i = z.iloc[i:(i+roll),:] 38 | bnh = z.iloc[i+roll, 0] 39 | z_f = z_i.corr().iloc[1, 0] 40 | stg = bnh*np.sign(z_f) 41 | store.iloc[i+roll, :] = np.hstack([bnh, stg, z_f]) 42 | 43 | # Compute the cumulative return and plot performance 44 | cret = ((store.iloc[:,[0,1]] + 1).cumprod() - 1)*100 45 | cret.plot(grid='both', title='The speculative correlation strategy for '+ticker+' using weekly returns', xlabel='Date', ylabel='return in percent') 46 | plt.savefig(ticker+'.png') 47 | plt.show() 48 | # 49 | print(cret.iloc[-1]) 50 | -------------------------------------------------------------------------------- /speculative_exponent.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/07/09/the-probable-speculative-constant/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the required libraries 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import scipy as sp 16 | import yfinance as yf 17 | 18 | # Define the weighted mean 19 | def weighted_mean(x, alpha=1.0): 20 | w = np.arange(1, x.shape[0]+1, 1) ** alpha 21 | m = (x * w).sum()/w.sum() 22 | return m 23 | 24 | # Download some data 25 | ticker = 'VXX' 26 | data = yf.download(ticker, period='max', interval='1d')['Adj Close'].dropna() 27 | # Get percent returns, select period of estimation/evaluation 28 | y = data.pct_change().dropna().loc['2022-01-01':] 29 | 30 | # Select the rolling window 31 | roll = 63 32 | # Set the exponent 33 | set_alpha = 7 34 | # Compute the rolling mean and the weighted rolling means 35 | mu0 = y.rolling(window=roll).mean() 36 | mu1 = y.rolling(window=roll).apply(weighted_mean, args=(1.0,)) 37 | mu2 = y.rolling(window=roll).apply(weighted_mean, args=(set_alpha,)) 38 | 39 | # Compute the signs and trade 40 | s0 = mu0.apply(np.sign) 41 | s1 = mu1.apply(np.sign) 42 | s2 = mu2.apply(np.sign) 43 | # 44 | bench = y.iloc[roll:] 45 | r0 = (y.iloc[1:]*s0.shift(periods=1)).dropna() 46 | r1 = (y.iloc[1:]*s1.shift(periods=1)).dropna() 47 | r2 = (y.iloc[1:]*s2.shift(periods=1)).dropna() 48 | 49 | # Put together, cumulate and plot 50 | all = pd.concat([bench, r0, r1, r2], axis=1) 51 | all.columns = [ticker, 'Spec. Mean', 'Spec. WMean', 'Spec. Exponent'] 52 | # 53 | tr_all = ((all+1).cumprod()-1)*100 54 | # 55 | tr_all.plot(title='Total trading returns of the speculative constant & exponent strategies in '+ticker, color=['red', 'blue', 'green', 'orange'], ylabel='return in percent') 56 | plt.grid(visible=True, which='both') 57 | plt.show() 58 | # and print the total trading return 59 | print(roll) 60 | print(tr_all.iloc[-1]) -------------------------------------------------------------------------------- /oracle.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/04/22/the-oracle-as-risk-predictability/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import statsmodels.api as sm 16 | import yfinance as yf 17 | from scipy import stats 18 | from scipy import optimize as opt 19 | 20 | # Get some data 21 | data = yf.download('FXE', period='max', interval='1d').dropna()['Adj Close'] 22 | # Convert to log returns, get the right dates 23 | rets = data.apply(np.log).diff().dropna() 24 | rets = rets.loc['2022-01-01':'2023-03-31'] 25 | 26 | # Compute the oracle returns 27 | ro = rets.apply(np.abs) 28 | # and the oracle signs 29 | so = rets.apply(np.sign) 30 | 31 | # Fit the exponential distribution 32 | out1 = stats.expon.fit(ro) 33 | 34 | # Fit the cumulative oracle returns line 35 | x = sm.add_constant(np.arange(0, ro.shape[0])) 36 | out2 = sm.OLS(ro.cumsum(), x).fit() 37 | 38 | # Perform a KS test of goodness of fit for the exponential distribution 39 | out3 = stats.kstest(ro, 'expon', args=(out1)) 40 | 41 | # Write a function to find the parameters that would have maximized the p-value of the KS test 42 | def min_expon_pvalue(theta, data, fix_mean=None): 43 | if fix_mean == None: 44 | mu = theta[0] 45 | sigma = theta[1] 46 | else: 47 | mu = fix_mean 48 | sigma = theta[0] 49 | out = stats.kstest(data, 'expon', args=(mu, sigma)) 50 | return 1/out[1] 51 | 52 | # and apply the function 53 | set_fix_mean = True 54 | 55 | if set_fix_mean: 56 | out4 = opt.minimize(min_expon_pvalue, out1[1], args=(ro, out1[0]), method='SLSQP', bounds=((0, None),)) 57 | out5 = stats.kstest(ro, 'expon', args=(out1[0], out4.x[0])) 58 | else: 59 | out4 = opt.minimize(min_expon_pvalue, out1, args=(ro,), method='SLSQP', bounds=((0, None), (0, None))) 60 | out5 = stats.kstest(ro, 'expon', args=(out4.x)) 61 | 62 | # Raw print results... 63 | print(ro.mean()) 64 | print(out1) 65 | print(out2.summary()) 66 | print(out3) 67 | print(out4) 68 | print(out5) -------------------------------------------------------------------------------- /examples_fred_nn_forecaster.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code for nearest neighbors forecasting, adapted from the following post 3 | # 4 | # https://prognostikon.wordpress.com/2023/05/06/peaks-and-troughs-forecasting-us-real-gdp-growth/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | # This file contains three examples on the usage of the function in fred_nn_forecaster.py 11 | # 12 | 13 | # Import packages 14 | import matplotlib.pyplot as plt 15 | import numpy as np 16 | import pandas as pd 17 | import pandas_datareader as pdr 18 | from dateutil.relativedelta import relativedelta 19 | import fred_nn_forecaster as nn 20 | 21 | # Example #1: replication of the results in the blog post, GDP growth forecasting, quarterly frequency 22 | set_ticker = 'GDPC1' 23 | set_name = 'Growth' 24 | set_start = '1947-01-01' 25 | set_end = '2023-03-31' 26 | set_log = True 27 | set_diff = 4 28 | set_peak = 8 29 | set_nn = 2 30 | set_h = 8 31 | set_freq = 'quarterly' 32 | set_lookback = [6, 40, 95] # overwrite the set_nn=3 setting for the peak-based NNP 33 | do_plot = True 34 | do_save = False 35 | out1 = nn.fred_nn_forecaster(set_ticker, set_name, set_start, set_end, 36 | set_log, set_diff, set_peak, set_nn, set_h, set_freq, set_lookback, do_plot, do_save) 37 | 38 | # Example #2: inflation forecasting, monthly frequency 39 | set_ticker = 'CPIAUCSL' 40 | set_name = 'Inflation' 41 | set_start = '1947-01-01' 42 | set_end = '2023-03-31' 43 | set_log = True 44 | set_diff = 12 45 | set_peak = 10 46 | set_nn = 4 47 | set_h = 10 48 | set_freq = 'monthly' 49 | set_lookback = [404, 4, 328, 38, 406] # overwrite the set_nn=3 setting for the peak-based NNP 50 | do_plot = True 51 | do_save = False 52 | out2 = nn.fred_nn_forecaster(set_ticker, set_name, set_start, set_end, 53 | set_log, set_diff, set_peak, set_nn, set_h, set_freq, set_lookback, do_plot, do_save) 54 | 55 | # Example #3: unemployment forecasting, monthly frequency 56 | set_ticker = 'UNRATE' 57 | set_name = 'Unemployment' 58 | set_start = '1947-01-01' 59 | set_end = '2023-04-30' 60 | set_log = False 61 | set_diff = 0 62 | set_peak = 16 63 | set_nn = 5 64 | set_h = 9 65 | set_freq = 'monthly' 66 | set_lookback = None #[229, 208, 833, 612] #, 830] # overwrite the set_nn=5 setting for the peak-based NNP 67 | do_plot = True 68 | do_save = False 69 | out3 = nn.fred_nn_forecaster(set_ticker, set_name, set_start, set_end, 70 | set_log, set_diff, set_peak, set_nn, set_h, set_freq, set_lookback, do_plot, do_save) 71 | 72 | -------------------------------------------------------------------------------- /the lazy moments strategy.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/the-lazy-moments-strategy/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import yfinance as yf 16 | 17 | # Select ticker and frequency of rebalancing 18 | ticker = 'DBC' 19 | freq = '1wk' 20 | data = yf.download(ticker, period='max', interval=freq)['Adj Close'].dropna() 21 | rets = data.pct_change().dropna() 22 | # Crop data if desired 23 | start_date = '2018-01-01' 24 | if start_date is not None: 25 | rets = rets.loc[start_date:] 26 | 27 | # Select range of rolling windows and delay of signal 28 | roll_range = np.arange(3, 25, 1) 29 | 30 | # Roll over the delays 31 | for delay in np.arange(1, 7, 1): 32 | 33 | # Initialize storage 34 | store = pd.DataFrame(data=None, index=rets.index, columns=roll_range) 35 | 36 | # Compute strategy returns for all rolling windows 37 | for roll in roll_range: 38 | signal = rets.rolling(window=roll).apply(lambda x: x.mean()/x.median()).shift(periods=delay).apply(np.sign) 39 | strategy = rets*signal 40 | store[roll] = strategy 41 | 42 | # Crop returns, compute cumulative returns, find the best rolling window each period 43 | store = store.dropna() 44 | cumret = ((store+1).cumprod()-1)*100 45 | idx_col = cumret.apply(np.argmax, axis=1).to_numpy() 46 | # Initialize storage for wealth rotation 47 | rotation = pd.DataFrame(data=None, index=cumret.index, columns=['Rotation']) 48 | rotation.loc[cumret.index[0]] = store.iloc[0, idx_col[0]] 49 | 50 | # Compute the rotation's returns, carefull to use store and not cumret here! 51 | for i in range(cumret.shape[0]-1): 52 | rotation.loc[cumret.index[i+1]] = store.iloc[i+1, idx_col[i]] 53 | 54 | # add to all results 55 | cr_rotation = ((rotation+1).cumprod()-1)*100 56 | cumret = pd.concat([cumret, cr_rotation], axis=1) 57 | 58 | # Add the benchmark, plot and print 59 | cumret[ticker] = ((rets.loc[store.index]+1).cumprod()-1)*100 60 | tr = cumret.iloc[-1] 61 | to_plot = cumret[[roll_range[tr[roll_range].argmax()], 'Rotation', ticker]] 62 | to_plot.plot(title='The lazy moments strategy for '+ticker+', weekly rebalancing for delay d='+str(delay), xlabel='Date', ylabel='returns in percent', grid='both') 63 | plt.show() 64 | # 65 | print('Delay =', delay) 66 | print(to_plot.iloc[-1]) -------------------------------------------------------------------------------- /the osborne bubble.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/gbm-and-the-osborne-bubble/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | ## Import the packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import yfinance as yf 16 | 17 | ## Define the trading function 18 | def osborne(x, kappa=-1): 19 | nu = len(x) 20 | s2 = x.var() 21 | br = (x+1).prod()-1 22 | er = br - 0.5*s2*nu 23 | if kappa == -2: 24 | threshold = np.sqrt(s2) 25 | elif kappa == -1: 26 | threshold = s2 27 | elif kappa >= 0: 28 | threshold = kappa 29 | # 30 | if er > threshold: 31 | return -1.0 32 | else: 33 | return +1.0 34 | 35 | ## Get some data 36 | ticker = 'GLD' 37 | start_from = '2022-01-01' 38 | freq = '1d' 39 | data = yf.download(ticker, period='max', interval=freq)['Adj Close'].dropna() 40 | rets = data.pct_change().dropna() 41 | rets = rets.loc[start_from:] 42 | 43 | ## Set the parameters and initialize storage 44 | kappa = [-2, -1, 0.0, 0.05, 0.1, 0.2, 0.3, 0.5, 1, 2, 3] 45 | 46 | ## Do a loop over the values of kappa 47 | for set_kappa in kappa: 48 | min_tau = 3 49 | max_tau = 26 50 | tau = np.arange(min_tau, max_tau+1, 1) 51 | store = pd.DataFrame(data=None, index=rets.index, columns=tau) 52 | signals = pd.DataFrame(data=None, index=rets.index, columns=tau) 53 | avg_signal = pd.DataFrame(data=None, index=rets.index, columns=['Avg']) 54 | avg = 0 55 | 56 | # Get the signals and the average signal 57 | for i in tau: 58 | si = rets.rolling(window=i).apply(osborne, args=(set_kappa,)) 59 | signals[i] = si 60 | store[i] = si.shift(periods=1)*rets 61 | avg = avg + si 62 | avg_signal = avg 63 | 64 | # Compute the average signal correctly, get the benchmark 65 | avg_rets = (avg_signal/(max_tau-min_tau+1)).shift(periods=1)*rets 66 | both = pd.concat([rets, avg_rets], axis=1) 67 | both.columns = [ticker, 'Avg'] 68 | 69 | # Compute total returns 70 | ir = ((store + 1).cumprod()-1)*100 71 | tr = ((both + 1).cumprod()-1)*100 72 | 73 | # Compute descriptives on excess returns 74 | er = ir.iloc[-1] - tr.iloc[-1,0] 75 | ds = er.describe() 76 | pr_er = (ir.iloc[-1] > tr.iloc[-1,0]).mean()*100 77 | 78 | # Print results 79 | print('kappa = ', set_kappa) 80 | print('Range of tau = ', min_tau, max_tau) 81 | print(er) 82 | print(round(ds,2)) 83 | print(round(pr_er, 2)) 84 | 85 | -------------------------------------------------------------------------------- /run little trader, run.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/run-little-trader-run/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | import matplotlib.pyplot as plot 11 | import numpy as np 12 | import pandas as pd 13 | import yfinance as yf 14 | 15 | # Define a function to compute runs and reversals and associated probabilities 16 | def runs_to_reversals(x): 17 | n = len(x) 18 | runs = 1 19 | 20 | for i in np.arange(1, n, 1): 21 | if x.iloc[i] != x.iloc[i-1]: 22 | runs = runs + 1 23 | 24 | pruns = runs/n 25 | prevr = 1 - pruns 26 | 27 | return pruns/prevr, pruns, prevr 28 | 29 | # Select a ticker to analyze, starting and ending dates, frequency of rebalancing,the rolling window and the alpha for the runs-to-reversals ratio 30 | ticker = 'OIH' 31 | start_date = '2010-01-01' 32 | end_date = '2024-02-29' 33 | freq = '1d' 34 | # roll = 12 35 | # alpha = 0.75 36 | if freq == '1d': 37 | freq_name = 'daily' 38 | roll_set = [14, 21, 30, 63] 39 | elif freq == '1wk': 40 | freq_name = 'weekly' 41 | roll_set = [12, 26, 52] 42 | elif freq == '1mo': 43 | freq_name = 'monthly' 44 | roll_set = [12, 24, 36] 45 | 46 | # I made a loop to find the best values of the pair (roll, alpha) below and you can easily adapt the code to your liking! 47 | for roll in roll_set: 48 | for alpha in [0.25, 0.5, 0.75, 1.0, 1.05, 1.15, 1.25, 2]: 49 | # Download the data, compute returns and signs, compute the rolling signal variable 50 | data = yf.download(ticker, start=start_date, end=end_date, interval=freq)['Adj Close'].dropna() 51 | y = data.pct_change().dropna() 52 | s = np.sign(y) 53 | rr = s.rolling(window=21).apply(lambda x: runs_to_reversals(x)[0]) 54 | signal = (rr > alpha).astype(float) - (rr <= alpha).astype(float) 55 | 56 | # Compute the strategy's returns, the cumulative returns, plot and print - that's it! 57 | r2r = y*(signal.shift(periods=1).apply(np.sign)) 58 | both = pd.concat([y, r2r], axis=1).iloc[(roll+1):] 59 | both.columns = [ticker, 'R2R strategy'] 60 | # 61 | cr = ((both+1).cumprod()-1)*100 62 | # Print only the meaningful cases 63 | if cr.iloc[-1,1] > cr.iloc[-1,0]: 64 | print('roll = ',roll,'alpha = ',alpha) 65 | print(cr.iloc[-1]) 66 | # and plot if desired! 67 | # if (roll == 63) and (alpha == 1.05): 68 | # cr.plot(title='The Runs-to-Reversals Strategy for '+ticker+' for '+freq_name+' data', xlabel='Date', ylabel='return in percent', grid='both') 69 | # plot.show() 70 | 71 | -------------------------------------------------------------------------------- /speculative_rotation.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/08/31/the-speculative-rotation/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the required libraries 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import scipy as sp 16 | import yfinance as yf 17 | 18 | # Define the weighted mean 19 | def weighted_mean(x, alpha=1.0): 20 | w = np.arange(1, x.shape[0]+1, 1) ** alpha 21 | m = (x * w).sum()/w.sum() 22 | return m 23 | 24 | # Download some data 25 | ticker1 = 'BTC-USD' 26 | ticker2 = 'SPY' 27 | tickers = [ticker1, ticker2] 28 | data = yf.download(tickers, period='max', interval='1d')['Adj Close'].dropna() 29 | # Get percent returns, select period of estimation/evaluation 30 | r = data.pct_change().dropna().loc['2022-08-01':] 31 | # Define the difference-in-returns 32 | y = r[ticker1]-r[ticker2] 33 | 34 | # Selec the rolling window 35 | roll = 63 36 | # Set the exponent 37 | set_alpha = 7 38 | # Short? set to -1 for shorting the second asset 39 | set_short = 1 40 | # Compute the rolling mean and the weighted rolling means 41 | mu0 = y.rolling(window=roll).mean() 42 | mu1 = y.rolling(window=roll).apply(weighted_mean, args=(1.0,)) 43 | mu2 = y.rolling(window=roll).apply(weighted_mean, args=(set_alpha,)) 44 | 45 | # Compute the signs and trade 46 | s0 = mu0.apply(np.sign) 47 | s1 = mu1.apply(np.sign) 48 | s2 = mu2.apply(np.sign) 49 | # Get the benchmark right 50 | bench = r.iloc[roll:] 51 | # And do the rotation below 52 | if set_short == 1: 53 | r0 = (r[ticker1]*(s0.shift(periods=1) >= 0) + r[ticker2]*(s0.shift(periods=1) < 0)).dropna() 54 | r1 = (r[ticker1]*(s1.shift(periods=1) >= 0) + r[ticker2]*(s1.shift(periods=1) < 0)).dropna() 55 | r2 = (r[ticker1]*(s2.shift(periods=1) >= 0) + r[ticker2]*(s2.shift(periods=1) < 0)).dropna() 56 | elif set_short == -1: 57 | r0 = (y*(s0.shift(periods=1) >= 0) - y*(s0.shift(periods=1) < 0)).dropna() 58 | r1 = (y*(s1.shift(periods=1) >= 0) - y*(s1.shift(periods=1) < 0)).dropna() 59 | r2 = (y*(s2.shift(periods=1) >= 0) - y*(s2.shift(periods=1) < 0)).dropna() 60 | 61 | # Put together, cumulate and plot 62 | all = pd.concat([bench, r0, r1, r2], axis=1).dropna() 63 | all.columns = [ticker1, ticker2, 'Spec. Mean', 'Spec. WMean', 'Spec. Exponent'] 64 | # 65 | tr_all = ((all+1).cumprod()-1)*100 66 | # 67 | set_title = 'Total trading return of the rotating strategy between '+ticker1+' and '+ticker2 68 | set_assets = [ticker1, ticker2, 'Spec. Exponent'] 69 | tr_all[set_assets].plot(title=set_title, color=['orange', 'red', 'green'], ylabel='return in percent', xlabel='Date', fontsize=10) 70 | plt.grid(visible=True, which='both') 71 | plt.show() 72 | # and print the total trading return 73 | print(roll) 74 | print(tr_all.iloc[-1]) -------------------------------------------------------------------------------- /patents_and_growth.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/06/10/patents-and-economic-growth/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the necessary packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import pandas_datareader as pdr 16 | 17 | # Download the data on total patents - convert to annual growth rates 18 | data_patents = pdr.fred.FredReader(['PATENTUSALLTOTAL', 'PATENT4NCNTOTAL', 'PATENT4NILTOTAL', 19 | 'PATENT4NJPTOTAL', 'PATENT4NDETOTAL', 'PATENT4NKRTOTAL', 'PATENT4NTWTOTAL', 'PATENT4NINTOTAL', 20 | 'PATENT4NGBTOTAL'], start='1992-01-01', end='2022-12-01').read().pct_change().dropna() 21 | data_patents.columns = ['Pat-US', 'Pat-CN', 'Pat-IL', 'Pat-JP', 'Pat-DE', 22 | 'Pat-KR', 'Pat-TW', 'Pat-IN', 'Pat-UK'] 23 | 24 | # Download the data on real GDP - convert to annual growth rates 25 | data_growth = pdr.fred.FredReader(['RGDPNAUSA666NRUG', 'RGDPNACNA666NRUG', 'RGDPNAILA666NRUG', 'RGDPNAJPA666NRUG', 'RGDPNADEA666NRUG', 'RGDPNAKRA666NRUG', 'RGDPNATWA666NRUG', 'RGDPNAINA666NRUG', 'RGDPNAGBA666NRUG'], start='1992-01-01', end='2022-12-01').read().pct_change().dropna() 26 | data_growth.columns = ['Grw-US', 'Grw-CN', 'Grw-IL', 'Grw-JP', 'Grw-DE', 27 | 'Grw-KR', 'Grw-TW', 'Grw-IN', 'Grw-UK'] 28 | 29 | # Quickly get and print the delays and cross-correlations between the variables for each country 30 | print("-----") 31 | 32 | # First from lagged patents to growth 33 | for i in range(6): 34 | set_lag = i 35 | data_both = pd.concat([data_patents.shift(periods=set_lag), data_growth], axis=1).dropna() 36 | corr = data_both.corr().loc[data_growth.columns, data_patents.columns] 37 | grw_patents = pd.DataFrame(np.diag(corr), index=corr.columns, columns=[set_lag]) 38 | print(grw_patents) 39 | 40 | print("-----") 41 | 42 | # Then from lagged growth to patents 43 | for i in range(6): 44 | set_lag = i 45 | data_both = pd.concat([data_patents, data_growth.shift(periods=set_lag)], axis=1).dropna() 46 | corr = data_both.corr().loc[data_growth.columns, data_patents.columns] 47 | grw_patents = pd.DataFrame(np.diag(corr), index=corr.columns, columns=[set_lag]) 48 | print(grw_patents) 49 | 50 | # I looked at the data from their printouts and collected them into a new dataframe 51 | countries = ['US', 'CN', 'IL', 'JP', 'DE', 'KR', 'TW', 'IN', 'UK'] 52 | # Patents-to-growth 53 | ptg = np.array([[4, 0.2683], [4, 0.3768], [4, 0.1176], [4, 0.4384], [4, 0.2324], 54 | [1, 0.3832], [4, 0.5923], [1, 0.2163], [4, 0.2116]]) 55 | # to dataframe... 56 | ptg = pd.DataFrame(data=ptg, index=countries, columns=['Lag', 'Cross-Correlation']) 57 | # Growth-to-patents 58 | gtp = np.array([[0, 0.1245], [0, 0.0882], [3, 0.3966], [2, 0.1653], [2, 0.2908], [2, 0.3964], 59 | [2, 0.3332], [3, 0.3711], [0, 0.0770]]) 60 | # to dataframe... 61 | gtp = pd.DataFrame(data=gtp, index=countries, columns=['Lag', 'Cross-Correlation']) 62 | 63 | # The figures in the post were then made with Libre Office... 64 | -------------------------------------------------------------------------------- /the fractional trader.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code adding for the post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/the-fractional-trader-supercharged/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | ## Import the packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import yfinance as yf 16 | from scipy.fft import fft, fftfreq 17 | from scipy.optimize import minimize 18 | 19 | ## Define the Whittle log-likelihood for the estimation of the fractional order 20 | def loglf_Whittle(d, x): 21 | n = len(x) 22 | m = n//2 23 | y = fft(x) 24 | f = fftfreq(n)[:m] 25 | P = (2.0/n) * np.abs(y[:m]) 26 | gd = 4.0*(np.sin(np.pi*f) ** 2) 27 | sd = np.mean((gd ** d)*P) 28 | loglf = m*np.log(sd) + d*np.sum(np.log(gd[1:])) + m 29 | return loglf 30 | 31 | ## Estimate the fractional order 32 | def fractional_order(x): 33 | cr = (x+1).cumprod().to_numpy() 34 | out = minimize(loglf_Whittle, 0.5, method='SLSQP', bounds=((0, 1),), args=(cr, )) 35 | d = out.x[0] 36 | return d 37 | 38 | ## Get some data 39 | ticker = 'TNA' 40 | start_from = '2000-01-01' 41 | freq = '1mo' 42 | data = yf.download(ticker, period='max', interval=freq)['Adj Close'].dropna() 43 | rets = data.pct_change().dropna() 44 | rets = rets.loc[start_from:'2024-12-02'] 45 | 46 | ## Set the parameters 47 | set_delay = 2 48 | set_min_roll = 8 49 | set_max_roll = 26 50 | roll_range = np.arange(set_min_roll, set_max_roll+1, 1) 51 | kappa_range = [0.5, 0.6, 0.7] 52 | total_cases = len(kappa_range)*len(roll_range) 53 | 54 | # Superchange? Works for TNA, DBC, OIH! 55 | supercharge = True 56 | 57 | ## Initialize storage 58 | store_er = pd.DataFrame(data=None, index=range(total_cases), columns=['Excess Return', 'd-order']) 59 | counter = 0 60 | 61 | ## Now a double loop for evaluation 62 | for set_roll in roll_range: 63 | for set_kappa in kappa_range: 64 | 65 | ## Get the signals 66 | dhat = rets.rolling(window=set_roll).apply(fractional_order) 67 | signals = (dhat < set_kappa).astype(float) - (dhat >= set_kappa).astype(float) 68 | if supercharge: 69 | threshold = dhat.expanding().quantile(0.95) 70 | signals = (dhat < threshold).astype(float) - (dhat >= threshold).astype(float) 71 | strategy = rets*signals.shift(periods=set_delay) 72 | 73 | ## Merge, process and store 74 | both = pd.concat([rets, strategy], axis=1).iloc[set_roll:] 75 | dhat = dhat.iloc[set_roll:] 76 | # 77 | tr = ((both + 1).cumprod()-1)*100 78 | tr.columns = [ticker, 'Fractional trader'] 79 | all = pd.concat([tr, dhat], axis=1) 80 | all.columns = [ticker, 'Strategy', 'd-order'] 81 | # 82 | # all.plot(secondary_y='d-order') 83 | # plt.show() 84 | er = tr.iloc[-1,1] - tr.iloc[-1,0] 85 | store_er.iloc[counter] = np.hstack([er, dhat.mean()]) 86 | counter = counter + 1 87 | 88 | ## Print the summary 89 | print(store_er.astype(float).describe()) 90 | print((store_er['Excess Return'] > 0).mean()) -------------------------------------------------------------------------------- /the speculative switch.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/the-speculative-switch/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import yfinance as yf 16 | 17 | # Select ticker and frequency of rebalancing 18 | ticker = 'ETH-USD' 19 | freq = '1d' 20 | # Get the data and crop appropriately 21 | data = yf.download(ticker, period='max', interval=freq)['Adj Close'].dropna() 22 | rets = data.pct_change().dropna() 23 | rets = rets.loc['2022-01-01':] 24 | 25 | # Initialize parameters storage 26 | nobs = rets.shape[0] 27 | switch = 1 28 | counter = 0 29 | store = None 30 | ch = 0 31 | 32 | # A triple loop over rolling windows, delay values and the evaluation over observations 33 | for roll in np.arange(3, 23, 1): 34 | for delay in np.arange(0, 4, 1): 35 | 36 | # Initialize storage 37 | strategy = pd.DataFrame(data=None, index=rets.index, columns=[ticker, 'S']) 38 | 39 | for i in np.arange(roll, nobs, 1): 40 | # Get the trading sign 41 | mu = rets.iloc[(i-roll-delay):(i-delay)].mean() 42 | if switch > 0: 43 | if np.sign(mu) > 0: 44 | trade = 1 45 | else: 46 | trade = -1 47 | else: 48 | if np.sign(mu) > 0: 49 | trade = -1 50 | else: 51 | trade = 1 52 | # Store performances 53 | strategy.iloc[i, :] = np.hstack([rets.iloc[i], rets.iloc[i]*trade]) 54 | # Apply the switching rule, note the use of the last 3 observations!! 55 | if i > 0: 56 | tr = (strategy.iloc[(i-2):i]+1).prod() 57 | if ((tr[ticker] > tr['S']) and (trade > 0)) or ((tr[ticker] < tr['S']) and (trade < 0)): 58 | switch = -switch 59 | 60 | ch = ch + 1 61 | # Compute performance and save accordingly 62 | tr_all = (strategy+1).prod() 63 | if (tr_all[ticker] < tr_all['S']) and (tr_all['S'] > 1): 64 | excess_return = tr_all['S'] - tr_all[ticker] 65 | new = pd.DataFrame(data=np.hstack([roll, delay, excess_return]).reshape(1, 3), index=[counter], columns=['roll', 'delay', 'excess return']) 66 | store = pd.concat([store, new], axis=0) 67 | # print('roll=',roll,'delay=',delay) 68 | # print(tr_all) 69 | # Update the counter 70 | counter = counter + 1 71 | 72 | # Compute the average excess return per delay value 73 | m0 = store.loc[store['delay']==0].mean() 74 | m1 = store.loc[store['delay']==1].mean() 75 | m2 = store.loc[store['delay']==2].mean() 76 | m3 = store.loc[store['delay']==3].mean() 77 | # 78 | m_all = np.hstack([m0.iloc[2], m1.iloc[2], m2.iloc[2], m3.iloc[2]]) 79 | m_all = pd.DataFrame(data=m_all, index=np.arange(0, 4, 1), columns=['Average Excess Return']) 80 | # Print 81 | print(store) 82 | # 83 | print(store.shape[0]/ch) 84 | # 85 | print(m_all) 86 | # 87 | print(store.loc[store['excess return']==store['excess return'].max()]) 88 | print(store.loc[store['excess return']==store['excess return'].min()]) -------------------------------------------------------------------------------- /the efficient speculator.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/the-efficient-speculator/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | import pandas as pd 13 | import yfinance as yf 14 | 15 | # Import a dependent and an explanatory assets 16 | # 17 | # For the post I used the following pairs: (QQQ, AAPL), (QQQ, MSFT), (SPY, EEM), (OIH, DBA), (OIH, GLD) 18 | yname = 'GLD' 19 | xname = 'OIH' 20 | tickers = [xname, yname] 21 | 22 | # Download the data, you can change the dates and the frequency 23 | data = yf.download(tickers, start='2022-01-01', end='2024-01-31', interval='1d')['Adj Close'].dropna() 24 | rets = data.pct_change().dropna() 25 | nobs = rets.shape[0] 26 | 27 | # Set rolling window and maximum cross-correlation lag 28 | roll = 7 29 | max_lag = 3 30 | set_lags = np.arange(1, max_lag+1, 1) 31 | # Prepare storage 32 | store = pd.DataFrame(data=None, index=rets.index, columns=[yname, 'Corr(min). sign', 'd(min)-1', 'd(min)', 'd(min)+1', 'Corr(max). sign', 'd(max)-1', 'd(max)', 'd(max)+1']) 33 | 34 | # Loop over the rolling windows, do the computations, trade and save results 35 | for i in np.arange(0, nobs-roll, 1): 36 | ri = rets.iloc[i:(roll+i)] 37 | cor = pd.Series(data=None, index=[set_lags], name='IFE', dtype='float64') 38 | for j in set_lags: 39 | crj = pd.concat([ri[xname], ri[yname].shift(periods=j)], axis=1).dropna().corr().iloc[1, 0] 40 | cor.loc[j] = crj 41 | ife = 1 - np.sqrt(1 - cor**2) 42 | min_ife = ife.idxmin()[0] 43 | max_ife = ife.idxmax()[0] 44 | bench = rets[yname].iloc[roll+i] 45 | str1_max = bench*np.sign(cor.loc[max_ife]) 46 | str2_max = bench*np.sign(rets[xname].iloc[roll+i-max_ife-1]) 47 | str3_max = bench*np.sign(rets[xname].iloc[roll+i-max_ife]) 48 | str1_min = bench*np.sign(cor.loc[min_ife]) 49 | str2_min = bench*np.sign(rets[xname].iloc[roll+i-min_ife-1]) 50 | str3_min = bench*np.sign(rets[xname].iloc[roll+i-min_ife]) 51 | if min_ife != 1: 52 | str4_min = bench*np.sign(rets[xname].iloc[roll+i-min_ife+1]) 53 | else: 54 | str4_min = bench 55 | if max_ife != 1: 56 | str4_max = bench*np.sign(rets[xname].iloc[roll+i-max_ife+1]) 57 | else: 58 | str4_max = bench 59 | store.iloc[roll+i] = np.hstack([bench, str1_min, str2_min, str3_min, str4_min, str1_max, str2_max, str3_max, str4_max]) 60 | 61 | # Compute the combination strategy 62 | store = store.dropna() 63 | cr0 = (store+1).cumprod() 64 | cr1 = cr0.shift(periods=1).fillna(value=0).apply(lambda x: np.argmax(x), axis=1) 65 | cr2 = pd.Series(data=None, index=cr0.index, name='Combo', dtype='float64') 66 | for i in range(cr2.shape[0]): 67 | cr2.iloc[i] = (store+1).iloc[i, cr1.iloc[i]] 68 | cr2 = cr2.cumprod() 69 | cr0['Wealth Rotation'] = cr2 70 | 71 | # Do a nice plot, for the top 2 performers 72 | str_idx = cr0.columns[(cr0.iloc[-1].rank() >= 10)].to_list() 73 | str_idx = np.unique(np.hstack([str_idx, yname])) 74 | tot_ret = (cr0[str_idx]-1)*100 75 | tot_ret.plot(title='The efficient speculator strategy for '+yname+', weekly data', ylabel='total return in percent', xlabel='Date', grid='both') 76 | plt.show() 77 | 78 | # and print to discuss 79 | print(tot_ret.iloc[-1]) -------------------------------------------------------------------------------- /the fractional osborne bubble.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code adding material for the results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/gbm-and-the-osborne-bubble/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | ## Import the packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import yfinance as yf 16 | from scipy.fft import fft, fftfreq 17 | from scipy.optimize import minimize 18 | 19 | ## Define the Whittle log-likelihood for the estimation of the fractional order 20 | def loglf_Whittle(d, x): 21 | n = len(x) 22 | m = n//2 23 | y = fft(x) 24 | f = fftfreq(n)[:m] 25 | P = (2.0/n) * np.abs(y[:m]) 26 | gd = 4.0*(np.sin(np.pi*f) ** 2) 27 | sd = np.mean((gd ** d)*P) 28 | loglf = m*np.log(sd) + d*np.sum(np.log(gd[1:])) + m 29 | return loglf 30 | 31 | ## Define the trading function 32 | def fractional_trader(x, kappa=0.5): 33 | cr = (x+1).cumprod().to_numpy() 34 | out = minimize(loglf_Whittle, 0.5, method='SLSQP', bounds=((0, 1),), args=(cr, )) 35 | d = out.x[0] 36 | # 37 | if (d > kappa): 38 | return -1.0 39 | else: 40 | return +1.0 41 | 42 | ## Get some data 43 | ticker = 'OIH' 44 | start_from = '2000-01-01' 45 | freq = '1mo' 46 | data = yf.download(ticker, period='max', interval=freq)['Adj Close'].dropna() 47 | rets = data.pct_change().dropna() 48 | rets = rets.loc[start_from:'2024-12-02'] 49 | 50 | ## Set the parameters and initialize storage 51 | kappa = [0.3] # [0.5, 0.6, 0.7, 0.8] 52 | 53 | ## Do a loop over the values of kappa 54 | for set_kappa in kappa: 55 | min_tau = 4 56 | max_tau = 36 57 | tau = np.arange(min_tau, max_tau+1, 4) 58 | store = pd.DataFrame(data=None, index=rets.index, columns=tau) 59 | signals = pd.DataFrame(data=None, index=rets.index, columns=tau) 60 | avg_signal = pd.DataFrame(data=None, index=rets.index, columns=['Avg']) 61 | avg = 0 62 | 63 | # Get the signals and the average signal 64 | for i in tau: 65 | si = rets.rolling(window=i).apply(fractional_trader, args=(set_kappa,)) 66 | signals[i] = si 67 | store[i] = si.shift(periods=1)*rets 68 | avg = avg + si 69 | avg_signal = avg 70 | 71 | # Compute the average signal correctly, get the benchmark 72 | avg_rets = (avg_signal/(max_tau-min_tau+1)).shift(periods=1)*rets 73 | both = pd.concat([rets, avg_rets], axis=1) 74 | both.columns = [ticker, 'Avg'] 75 | 76 | # Compute total returns 77 | ir = ((store + 1).cumprod()-1)*100 78 | tr = ((both + 1).cumprod()-1)*100 79 | 80 | # Compute descriptives on excess returns 81 | er = ir.iloc[-1] - tr.iloc[-1,0] 82 | ds = er.describe() 83 | pr_er = (ir.iloc[-1] > tr.iloc[-1,0]).mean()*100 84 | 85 | # Print results 86 | print('kappa = ', set_kappa) 87 | print('Range of tau = ', min_tau, max_tau) 88 | print(er) 89 | print(round(ds,2)) 90 | print(round(pr_er, 2)) 91 | 92 | # A plot for the post in LinkedIn 93 | etr = pd.concat([ir[24], tr.iloc[:,0]], axis=1).dropna() 94 | etr.plot(title='The Fractional Trader for '+ticker+' and for threshold d=0.3 \n for monthly returns and a 24 month rolling window', xlabel='Date', ylabel='Total return in %', grid='both') 95 | plt.show() 96 | 97 | -------------------------------------------------------------------------------- /wheat_agrichem_energy.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/09/04/trading-wheat-inflation/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import pandas_datareader as pdr 16 | 17 | # Download the data from FRED 18 | data = pdr.fred.FredReader(['DNRGRC1M027SBEA', 'PCU32533253', 'WPU0121'], start='1990-01-01').read()# 19 | data.columns = ['Energy', 'AgriChem', 'Wheat'] 20 | 21 | # Select whether to use returns (True) or levels (False) 22 | do_pct_change = True 23 | 24 | # Prepare the data and the parametrization accordingly - NOTE: changing the rolling window, max lag and starting date 25 | # will produce different results; you have to experiment with all of them!!! 26 | if do_pct_change: 27 | data['AgriChem-Energy'] = (data['AgriChem']/data['Energy']).pct_change() 28 | roll = 24 29 | max_lag = 12 30 | data = data.loc['2005-01-01':] 31 | else: 32 | data['AgriChem-Energy'] = (data['AgriChem']/data['Energy']) 33 | roll = 12 34 | max_lag = 6 35 | data = data.loc['2007-01-01':] 36 | # Get sample size and the benchmark wheat inflation 37 | nobs = data.shape[0] 38 | data['Wheat-Returns'] = data['Wheat'].pct_change() 39 | 40 | # Initialize storage 41 | rets = pd.DataFrame(data=None, index=data.index, columns = ['Benchmark', 'AgriChem-Energy']) 42 | 43 | # Do the rolling window loop for trading 44 | for i in np.arange(0, nobs-roll, 1): 45 | # Crop the data 46 | use_data = data[['Wheat', 'AgriChem-Energy', 'Wheat-Returns']].iloc[i:i+roll] 47 | 48 | # Compute the cross-correlations 49 | cross_corr = pd.DataFrame(data=None, index=np.arange(0, max_lag+1, 1), columns=['Wheat-AgriChem-Energy']) 50 | for s in np.arange(0, max_lag+1, 1): 51 | if do_pct_change: 52 | cross_corr.iloc[s, 0] = use_data['Wheat-Returns'].corr(use_data['AgriChem-Energy'].shift(periods=s)) 53 | else: 54 | cross_corr.iloc[s, 0] = use_data['Wheat'].corr(use_data['AgriChem-Energy'].shift(periods=s)) 55 | 56 | # Find the maximum absolute cross-correlation 57 | imax = cross_corr.apply(np.abs).apply(np.argmax, axis=0).iloc[0] 58 | smax = cross_corr.iloc[imax, 0] 59 | 60 | # Trade with the sign of the cross-correlation times the sign of the data at the optimal lag 61 | # (note that for levels the data are always positive so you trade the sign of smax essentially) 62 | tmax = np.sign(use_data['AgriChem-Energy'].iloc[-imax+1]*smax) 63 | 64 | # Find the next value and evaluate your trade 65 | nextr = data['Wheat-Returns'].iloc[roll+i] 66 | rets.iloc[roll+i] = np.c_[nextr, nextr*tmax] 67 | 68 | # Drop the missing values from the dataframe of the strategy 69 | rets = rets.dropna() 70 | crets = ((rets+1).cumprod()-1)*100 71 | 72 | # and plot... 73 | if do_pct_change: 74 | crets.plot(title='Total Return from Trading Wheat Inflation with AgriChem & Energy \n (trading signals are from growth rates)', ylabel='return in percent', xlabel='Time', color=['red', 'green'], grid='both', fontsize=12) 75 | else: 76 | crets.plot(title='Total Return from Trading Wheat Inflation with AgriChem & Energy \n (trading signals are from levels)', ylabel='return in percent', xlabel='Time', color=['red', 'green'], grid='both', fontsize=12) 77 | # 78 | plt.show() 79 | -------------------------------------------------------------------------------- /the random standardized speculator.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2024/02/12/the-random-standardized-speculator/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | import pandas as pd 13 | import yfinance as yf 14 | from statsmodels.tsa.stattools import acf 15 | 16 | # Define the standardizing and acf-minimizing function 17 | def minimize_rho(y, B=100, upper_bound=0.1, rho=1, set_seed=-1): 18 | nobs = y.shape[0] 19 | store_acf = pd.DataFrame(data=None, index=range(B), columns=[y.name, 'Z', 'mZ']) 20 | if set_seed > 0: 21 | np.random.seed = set_seed 22 | for i in range(B): 23 | e = pd.Series(np.random.uniform(size=nobs, low=0, high=upper_bound), index=y.index, name=y.name) 24 | z = y.div(e, axis='index') 25 | mz = z.mean() 26 | a1 = acf(y)[1:(rho+1)].sum() 27 | a2 = acf(z)[1:(rho+1)].sum() 28 | store_acf.iloc[i] = np.hstack([a1, a2, mz]) 29 | idxmin = store_acf['Z'].abs().to_numpy().argmin() 30 | bm = (upper_bound/2.0) 31 | z_min = store_acf['mZ'].iloc[idxmin] 32 | return store_acf, bm*z_min 33 | 34 | # Get some data 35 | tickers = ['BTC-USD', 'SPY', 'QQQ', 'IWF', 'DBA', 'DBC', 'OIH', 'GLD', 'EEM', 'TLT', 'TNA'] 36 | 37 | # loop over the tickers and values of the upper bound and rolling window 38 | for ticker in tickers: 39 | data = yf.download(ticker, start='2022-01-01', end='2024-01-31', interval='1wk')['Adj Close'].dropna() 40 | y = data.pct_change().dropna() 41 | nobs = y.shape[0] 42 | y.name = ticker 43 | print('Now doing ticker=',ticker) 44 | 45 | # Set the upper bound here 46 | bounds = np.array([0.1, 0.25, 0.5, 0.75, 1, 2]) 47 | # and the rolling window here 48 | rolls = np.array([4, 5, 6, 8, 12, 14]) 49 | # Choose a seed for the random number generator 50 | seed = 123 51 | # and the number of replications 52 | repls = 200 53 | 54 | # Loop over the bounds and rolling windows 55 | for bound in bounds: 56 | for roll in rolls: 57 | # 58 | store_frc = pd.DataFrame(data=None, index=y.index, columns=['Bench', 'Idea']) 59 | store_err = pd.DataFrame(data=None, index=y.index, columns=['Bench', 'Idea']) 60 | store_ret = pd.DataFrame(data=None, index=y.index, columns=['B&H', 'Bench', 'Idea']) 61 | # Do the computations and store 62 | for i in np.arange(0, nobs-roll-1, 1): 63 | yi = y.iloc[i:(i+roll)] 64 | ya = y.iloc[i+roll] 65 | f0 = yi.mean() 66 | store, f1 = minimize_rho(yi, B=repls, upper_bound=bound, set_seed=seed) 67 | ff = np.hstack([f0, f1]) 68 | store_frc.iloc[i+roll] = ff 69 | store_err.iloc[i+roll] = np.sign(ya) - np.sign(ff) 70 | store_ret.iloc[i+roll] = np.hstack([ya, ya*np.sign(ff)]) 71 | # Crop... 72 | store_frc = store_frc.dropna() 73 | store_err = store_err.dropna() 74 | store_ret = store_ret.dropna() 75 | 76 | # Compute the total return and print only if better than the benchmark 77 | tot_ret = (store_ret+1).prod()-1 78 | if (tot_ret['Idea'] > tot_ret['B&H']): 79 | print('Bound=',bound) 80 | print('Roll=',roll) 81 | print((store_ret+1).prod()-1) -------------------------------------------------------------------------------- /the-speculative-bootstrap.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/12/16/the-speculative-bootstrap/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | import pandas as pd 13 | import yfinance as yf 14 | 15 | # Define the trading function, a simple mean defines the forecast 16 | def trade_the_mean(y, rwind): 17 | frc = y.rolling(window=rwind).mean().apply(np.sign) 18 | ret = (frc.shift(periods=1))*y 19 | ben = y.loc[ret.index] 20 | xrt = ret 21 | return xrt 22 | 23 | # Define the boostrapped trades 24 | def boot_the_mean(y, rwind, B=1000, n=30): 25 | nobs = y.shape[0] 26 | store = pd.DataFrame(data=None, index=range(B), columns=['Bootstraps']) 27 | xrt0 = trade_the_mean(y, rwind).dropna() 28 | tst0 = (xrt0+1).prod() 29 | for i in range(B): 30 | t0 = np.random.randint(low=0, high=nobs-1, size=1)[0] 31 | t1 = t0 + n 32 | if t1 > nobs-1: 33 | t1 = nobs 34 | yi = y.iloc[t0:t1] 35 | xrti = trade_the_mean(yi, rwind) 36 | tsti = (xrti+1).prod() 37 | store.iloc[i] = tsti 38 | return xrt0, tst0, store 39 | 40 | # Define the distribution of bootstrapped trades over rolling windows 41 | def boot_the_window(y, nrwind, boot=1000, size=30): 42 | rseq = np.arange(nrwind[0], nrwind[-1]+1, 1) 43 | tstar = pd.DataFrame(data=None, index=rseq, columns=['tstar']) 44 | for i in rseq: 45 | u = boot_the_mean(y, i, B=boot, n=size) 46 | tstar.loc[i] = np.mean(u[2] < (u[1] ** (1/size)), axis=0)[0] 47 | return tstar 48 | 49 | # Get some data, daily from 2023-01-01 50 | ticker = 'UNG' 51 | data = yf.download(ticker, period='max', interval='1d')['Adj Close'].dropna().loc['2023-01-01':] 52 | rets = data.pct_change().dropna() 53 | nobs = rets.shape[0] 54 | 55 | # Define the training and evaluation split dates 56 | xdates = ['2023-03-31', '2023-04-30', '2023-05-31', '2023-06-30', '2023-07-31', '2023-08-31', '2023-09-30', '2023-10-30'] 57 | ydates = ['2023-04-01', '2023-05-01', '2023-06-01', '2023-07-01', '2023-08-01', '2023-09-01', '2023-10-01', '2023-11-01'] 58 | 59 | # A simple outer loop over these days 60 | for d in range(len(xdates)): 61 | # Get the right dates 62 | xd = xdates[d] 63 | yd = ydates[d] 64 | print('----------------------------------------') 65 | print('Now doing asset', ticker) 66 | print('Evaluation starts', yd) 67 | print('----------------------------------------') 68 | # Split into training and testing sample 69 | x = rets.loc[:xd] 70 | y = rets.loc[yd:] 71 | 72 | # Define sequence of rolling windows 73 | set_nroll = [2, 14] 74 | 75 | # Define sequence of bootstraps 76 | set_boot = [25, 50, 150] 77 | 78 | # Define sequence of trading days to boostrap 79 | set_n_size = [7, 10, 14] 80 | 81 | # A simple loop evaluates the results 82 | for b in set_boot: 83 | for n in set_n_size: 84 | train_rwind = boot_the_window(x, set_nroll, boot=b, size=n) 85 | rstar = train_rwind.index[np.where(train_rwind.max() == train_rwind)[0]][0] 86 | sb = trade_the_mean(y, rstar) 87 | all = (pd.concat([y, sb], axis=1).dropna()+1).cumprod() 88 | all.columns = [ticker, 'Speculative Bootstrap'] 89 | print('Now doing combination of b =',b,' and n =',n) 90 | print(round(all.iloc[-1, 1], 5), round(all.iloc[-1, 0], 5)) 91 | -------------------------------------------------------------------------------- /the-speculative-system.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/11/11/the-speculative-system/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the required libraries 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import yfinance as yf 16 | import statsmodels.api as sm 17 | from statsmodels.tsa.api import VAR 18 | from statsmodels.tools.sm_exceptions import ValueWarning 19 | import warnings 20 | warnings.simplefilter('ignore', ValueWarning) 21 | 22 | # Download some data, for the post I used 23 | # 24 | # GLD (1, True, 12) and set_fcst_type = 0 25 | # DBA (18, True, 6) and set_fcst_type = 2 26 | # WEAT (12, True, 12) and set_fcst_type = 0 27 | # SH (1, False, 1) and set_fcst_type = 2 28 | # SPY (3, False, 36) and set_fcst_type = 0 29 | # FXE (2, True, 24) and set_fcst_type = 2 30 | ticker = 'FXE' 31 | data = yf.download(ticker, period='max', interval='1mo')['Adj Close'].dropna() 32 | r = data.pct_change().dropna()#.loc['2020-01-01':] # you can change this of course 33 | dr = r.diff() 34 | # Get the threshold variables 35 | z1p = ((dr > 0) & (r > 0)).astype(float) 36 | z2p = ((dr <= 0) & (r > 0)).astype(float) 37 | z1n = ((dr <= 0) & (r <= 0)).astype(float) 38 | z2n = ((dr > 0) & (r <= 0)).astype(float) 39 | # Convert the zeroes to -ones, this is important!! 40 | z1p.iloc[z1p == 0] = -1.0 41 | z2p.iloc[z2p == 0] = -1.0 42 | z1n.iloc[z1n == 0] = -1.0 43 | z2n.iloc[z2n == 0] = -1.0 44 | # Put together 45 | z = pd.concat([r, dr, z1p, z2p, z1n, z2n], axis=1).dropna() 46 | z.columns = [ticker, 'D-'+ticker, 'Z1+', 'Z2+', 'Z1-', 'Z2-'] 47 | 48 | # Number of observations and initial window 49 | nobs = z.shape[0] 50 | ini_wind = 60 51 | set_lag = 2 52 | use_exog = True 53 | set_fcst_type = 2 54 | train_every = 24 55 | 56 | # and initialize storage 57 | store = pd.DataFrame(data=None, index=z.index, columns=[ticker, 'Speculative System']) 58 | 59 | # Run a simple loop to get the signals and the strategy returns 60 | for i in np.arange(0, nobs-ini_wind, 1): 61 | z_i = z.iloc[:(i+ini_wind),:] # this is recursive estimation, change to i:(i+ini_wind) for rolling!! 62 | # Use the threshold variables? 63 | if use_exog: 64 | y_i = z_i.iloc[1:,[0,1]] 65 | x_i = z_i.iloc[:-1,2:].apply(np.sign) 66 | x_i.index = y_i.index 67 | if (i%train_every) == 0: 68 | model = VAR(endog=y_i, exog=x_i).fit(set_lag) 69 | x_f = z_i.iloc[-1,2:].values.reshape(1, 4) 70 | fcst = model.forecast(y=y_i.values[-set_lag:], steps=1, exog_future=x_f)[0] 71 | # Or not? 72 | else: 73 | y_i = z_i.iloc[:,[0,1]] 74 | if (i%train_every) == 0: 75 | model = VAR(endog=y_i).fit(set_lag) 76 | fcst = model.forecast(y=y_i.values[-set_lag:], steps=1)[0] 77 | # Comptue the forecast 78 | if set_fcst_type == 0: 79 | if (fcst[0] > 0) | (fcst[1] > 0): 80 | z_f = 1.0 81 | elif (fcst[0] < 0) | (fcst[1] < 0): 82 | z_f = -1.0 83 | elif set_fcst_type == 1: 84 | z_f = np.sign(np.mean(fcst)) 85 | elif set_fcst_type == 2: 86 | z_f = np.sign(fcst[0]) 87 | # 88 | bnh = z.iloc[i+ini_wind, 0] 89 | stg = bnh*z_f 90 | store.iloc[i+ini_wind, :] = np.hstack([bnh, stg]) 91 | 92 | # Compute the cumulative return and plot 93 | cret = ((store + 1).cumprod() - 1)*100 94 | cret.plot(grid='both', title='The speculative system strategy for '+ticker+' using monthly returns', xlabel='Date', ylabel='return in percent') 95 | plt.savefig(ticker+'.png') 96 | plt.show() 97 | # 98 | print(cret.iloc[-1]) -------------------------------------------------------------------------------- /summer speculations.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/summer-speculations/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import yfinance as yf 16 | 17 | # Select ticker and frequency of trading 18 | ticker = 'GLD' 19 | freq = '1d' 20 | 21 | # Get the data, section if needed 22 | data = yf.download(ticker, period='max', interval=freq)['Adj Close'] 23 | rets = data.pct_change().dropna() 24 | start_from = '2022-01-01' 25 | if start_from is not None: 26 | rets = rets.loc[start_from:] 27 | 28 | # Select maximum for the parameters 29 | delay_max = 6 30 | ind_roll_max = 14 31 | est_roll_max = 21 32 | 33 | # Initialize a counter and storage of results 34 | counter = 0 35 | store = pd.DataFrame(data=np.zeros([1, 7]), index=[counter], columns=['Delay', 'Ind Roll', 'Est Roll', 'S00', 'S01', 'S02', 'S03']) 36 | 37 | # A triple search loop 38 | for delay in np.arange(2, delay_max+1, 1): 39 | for ind_roll in np.arange(-1, ind_roll_max+1, 1): 40 | for est_roll in np.arange(2, est_roll_max+1, 1): 41 | 42 | # Compute the indicator 43 | if ind_roll > 0: 44 | indicator = rets.rolling(window=ind_roll).sum().shift(periods=delay) 45 | else: 46 | indicator = rets.shift(periods=delay) 47 | 48 | # Compute the signals as per the blog 49 | signal_00 = 1 - 2*(rets.shift(periods=delay) < 0) 50 | signal_01 = 1 - 2*(indicator < 0) 51 | prob_top = (rets < 0).rolling(window=est_roll).mean().shift(periods=1) 52 | prob_bot = prob_top.shift(periods=delay) 53 | signal_02 = 1 - 2*(prob_top/prob_bot)*(rets.shift(periods=delay) < 0) 54 | prob_bot = (indicator < 0).rolling(window=est_roll).mean().shift(periods=delay) 55 | signal_03 = 1 - 2*(prob_top/prob_bot)*(indicator.shift(periods=delay) < 0) 56 | 57 | # Compute the returns of each strategy 58 | s00 = rets*(signal_00.apply(np.sign)) 59 | s01 = rets*(signal_01.apply(np.sign)) 60 | s02 = rets*(signal_02.apply(np.sign)) 61 | s03 = rets*(signal_03.apply(np.sign)) 62 | 63 | # Put everything together, store and continue 64 | all = pd.concat([rets, s00, s01, s02, s03], axis=1).dropna() 65 | 66 | # Compute the cumulative and the excess returns over the B&H 67 | cr = ((all + 1).cumprod()-1)*100 68 | er = cr.iloc[-1,1:] - cr.iloc[-1, 0] 69 | 70 | # If any strategy has positive excess returns store it 71 | if any(er > 0): 72 | counter = counter + 1 73 | tt = np.hstack([delay, ind_roll, est_roll, er.to_numpy().flatten()]).reshape(1, 7) 74 | tt = pd.DataFrame(data=tt, index=[counter], columns=store.columns) 75 | store = pd.concat([store, tt], axis=0) 76 | 77 | # Find the best performers 78 | max_id = store.apply(np.argmax, axis=0) 79 | er00 = store.iloc[max_id['S00'],[0, 1, 2, 3]].to_numpy().reshape(-1, 1) 80 | er01 = store.iloc[max_id['S01'],[0, 1, 2, 4]].to_numpy().reshape(-1, 1) 81 | er02 = store.iloc[max_id['S01'],[0, 1, 2, 5]].to_numpy().reshape(-1, 1) 82 | er03 = store.iloc[max_id['S01'],[0, 1, 2, 6]].to_numpy().reshape(-1, 1) 83 | 84 | # Put together and print 85 | er_all = pd.DataFrame(np.c_[er00, er01, er02, er03], index = ['d', 'M', 'R', 'ER'], columns = ['S1', 'S2', 'S3', 'S4']) 86 | # 87 | print(round(er_all,3)) -------------------------------------------------------------------------------- /us_inflation_and_russia_wars.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/06/04/inflation-and-war/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the necessary packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas_datareader as pdr 15 | import pandas as pd 16 | 17 | # Get the data 18 | data = pdr.fred.FredReader('CPIAUCSL', start='2006-01-01', end='2023-04-30').read() 19 | infl = data.apply(np.log).diff(periods=12).dropna()*100 20 | 21 | # Mark the 12-months prior and 1+12-months after the event, with center in the month after, 22 | # and scale with the post-event inflation...dates are from the associated Wikipedia pages 23 | georgia_war = infl.loc['2007-09-01':'2009-09-01']/infl.loc['2008-09-01'] 24 | georgia_war.index = np.arange(-12, 13, 1) 25 | # 26 | crimean_war = infl.loc['2013-04-01':'2015-04-01']/infl.loc['2014-04-01'] 27 | crimean_war.index = np.arange(-12, 13, 1) 28 | # 29 | ukraine_war = infl.loc['2021-03-01':'2023-03-01']/infl.loc['2022-03-01'] 30 | ukraine_war.index = np.arange(-12, 13, 1) 31 | 32 | # Put together, with new index 33 | all = pd.concat([georgia_war, crimean_war, ukraine_war], axis=1)*100 34 | all.columns = ['Georgia War', 'Crimean War', 'Ukraine War'] 35 | 36 | # Now for the plots...first the whole period - note the split of columns for doing the 37 | # legends and axes labels right! 38 | ax1 = all[['Georgia War', 'Crimean War']].plot(title='Pre- and post-event US inflation vs. Russian Wars', xlabel='Months', ylabel='Inflation Index', color=['black', 'blue']) 39 | ax1.xaxis.grid(True, which='major') 40 | ax1.yaxis.grid(True, which='major') 41 | ax1.legend(loc='upper left') 42 | ax2 = ax1.twinx() 43 | all[['Ukraine War']].plot(ax=ax2, color=['red'], ylabel='Inflation Index') 44 | ax2.legend(loc='upper right') 45 | plt.show() 46 | 47 | # and then at and after the event 48 | ax1 = all.loc[0:,['Georgia War', 'Crimean War']].plot(title='Post-event US inflation vs. Russian Wars', xlabel='Months', ylabel='Inflation Index', color=['black', 'blue']) 49 | ax1.xaxis.grid(True, which='major') 50 | ax1.yaxis.grid(True, which='major') 51 | ax1.legend(loc='lower left') 52 | ax2 = ax1.twinx() 53 | all.loc[0:,['Ukraine War']].plot(ax=ax2,color=['red'], ylabel='Inflation Index') 54 | ax2.legend(loc='upper right') 55 | plt.show() 56 | 57 | # and let us repeat this for global food inflation, copy/paste and adapt from above 58 | data = pdr.fred.FredReader('PFOODINDEXM', start='2006-01-01', end='2023-04-30').read() 59 | infl = data.apply(np.log).diff(periods=12).dropna()*100 60 | # 61 | georgia_war = infl.loc['2007-09-01':'2009-09-01']/infl.loc['2008-09-01'] 62 | georgia_war.index = np.arange(-12, 13, 1) 63 | # 64 | crimean_war = infl.loc['2013-04-01':'2015-04-01']/infl.loc['2014-04-01'] 65 | crimean_war.index = np.arange(-12, 13, 1) 66 | # 67 | ukraine_war = infl.loc['2021-03-01':'2023-03-01']/infl.loc['2022-03-01'] 68 | ukraine_war.index = np.arange(-12, 13, 1) 69 | # Put together, with new index 70 | all = pd.concat([georgia_war, crimean_war, ukraine_war], axis=1)*100 71 | all.columns = ['Georgia War', 'Crimean War', 'Ukraine War'] 72 | 73 | # Now for the plots...use only Georgia and Ukraine wars...and post event only 74 | ax1 = all.loc[0:,['Georgia War']].plot(title='Post-event global food inflation vs. Russian Wars', xlabel='Months', ylabel='Food Inflation Index', color=['black']) 75 | ax1.xaxis.grid(True, which='major') 76 | ax1.yaxis.grid(True, which='major') 77 | ax1.legend(loc='lower left') 78 | ax2 = ax1.twinx() 79 | all.loc[0:,['Ukraine War']].plot(ax=ax2,color=['red'], ylabel='Food Inflation Index') 80 | ax2.legend(loc='upper right') 81 | plt.show() 82 | -------------------------------------------------------------------------------- /the explosive speculator.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/the-explosive-speculator/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | import pandas as pd 13 | import yfinance as yf 14 | 15 | # Select ticker to analyze 16 | ticker = 'BTC-USD' 17 | 18 | # Set starting and ending dates and rebalancing frequency 19 | start_date = '2022-01-01' 20 | end_date = '2024-02-29' 21 | freq = '1d' 22 | # Set some default values for the initial window 23 | if freq == '1d': 24 | ini_wind = 21 25 | elif freq == '1wk': 26 | ini_wind = 26 27 | elif freq == '1mo': 28 | ini_wind = 24 29 | # Set the max number of cross-correlations to consider 30 | roll_max = 10 31 | 32 | # Import the data 33 | data = yf.download(ticker, start=start_date, end=end_date, interval=freq)['Adj Close'].dropna() 34 | # Compute the returns and absolute returns 35 | r = data.pct_change().dropna() 36 | v = r.abs() 37 | 38 | # Get the sample size and initialize storage 39 | nobs = r.shape[0] 40 | store = pd.DataFrame(data=None, index=r.index, columns=[ticker, 'ES-max', 'ES-min', 'ES-max+1', 'ES-min+1']) 41 | store_abs = pd.DataFrame(data=None, index=r.index, columns=[ticker, 'ES-max', 'ES-min', 'ES-max+1', 'ES-min+1']) 42 | 43 | # The evaluation loop 44 | for i in np.arange(0, nobs-ini_wind, 1): 45 | # Crop to get the training data 46 | ri = r.iloc[i:(i+ini_wind)] 47 | vi = v.iloc[i:(i+ini_wind)] 48 | # Set the rolling windows for the cross-correlation computation 49 | roll = np.arange(1, roll_max+1, 1) 50 | # and storage 51 | store_corr = pd.Series(data=None, index=roll, name='Corr', dtype='float64') 52 | 53 | # Next compute all the cross-correlations 54 | for j in roll: 55 | cr = pd.concat([vi.shift(periods=j), ri], axis=1).dropna().corr().iloc[1,0] 56 | store_corr.loc[j] = cr 57 | 58 | # Compute the minimum and maximum, also in absolute values, of the cross-correlations 59 | actual = r.iloc[ini_wind+i] 60 | max_corr = store_corr.argmax()+1 61 | min_corr = store_corr.argmin()+1 62 | max_abs_corr = store_corr.abs().argmax()+1 63 | min_abs_corr = store_corr.abs().argmin()+1 64 | 65 | # Compute the signals, based on the change in the lagged volatility, and trade them 66 | signal_max = np.sign(vi.diff().shift(periods=max_corr).iloc[-1]) 67 | signal_min = np.sign(vi.diff().shift(periods=min_corr).iloc[-1]) 68 | signal_max1 = np.sign(vi.diff().shift(periods=max_corr+1).iloc[-1]) 69 | signal_min1 = np.sign(vi.diff().shift(periods=min_corr+1).iloc[-1]) 70 | store.iloc[ini_wind+i] = np.hstack([actual, actual*signal_max, actual*signal_min, actual*signal_max1, actual*signal_min1]) 71 | # repeat for the absolute cross-correlations 72 | signal_abs_max = np.sign(vi.diff().shift(periods=max_abs_corr).iloc[-1]) 73 | signal_abs_min = np.sign(vi.diff().shift(periods=min_abs_corr).iloc[-1]) 74 | signal_max1 = np.sign(vi.diff().shift(periods=max_abs_corr+1).iloc[-1]) 75 | signal_min1 = np.sign(vi.diff().shift(periods=min_abs_corr+1).iloc[-1]) 76 | store_abs.iloc[ini_wind+i] = np.hstack([actual, actual*signal_max, actual*signal_min, actual*signal_max1, actual*signal_min1]) 77 | 78 | # Done, drop the NAs, compute the total return 79 | store = store.dropna() 80 | store_abs = store_abs.dropna() 81 | cr = ((store+1).cumprod()-1)*100 82 | cr_abs = ((store_abs+1).cumprod()-1)*100 83 | 84 | # Plot and print 85 | # cr[[ticker, 'ES-max+1']].plot(title='The explosive speculator strategy for '+ticker+', daily rebalancing', xlabel='Date', ylabel='return in percent', grid='both') 86 | # plt.show() 87 | # 88 | print(cr.iloc[-1]) 89 | print(cr_abs.iloc[-1]) 90 | -------------------------------------------------------------------------------- /globalization_and_economic_freedom.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/05/27/globalization-and-economic-freedom/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the necessary packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import openpyxl 16 | 17 | # Read all the data 18 | # 19 | # Economic freedom scores are all in one place 20 | ecofree = pd.read_csv('freedom-scores.csv', header=0) 21 | 22 | # Globalization scores are also all in one place 23 | glob = pd.read_excel('KOFGI_2022_public.xlsx', header=0) 24 | 25 | # OK, select the years for analysis 26 | ecofree_year = 2020 27 | glob_year = 2000 28 | 29 | # Select the data 30 | x = ecofree.loc[ecofree['Index Year']==ecofree_year] 31 | x.index = x['Short Name'] 32 | # rename index so that it matches with the other dataframe 33 | x.index.name = 'country' 34 | x = x.drop(columns='Short Name') 35 | # 36 | y = glob.loc[glob['year']==glob_year] 37 | y.index = y['country'] 38 | y = y.drop(columns='country') 39 | 40 | # Merge the data, keep only common countries 41 | merged_data = x.merge(y, how='inner', left_on='country', right_on='country') 42 | print(merged_data.corr()['KOFGI']) 43 | 44 | # Select variables to use, fix the axes presentation 45 | set_econname = 'Government Integrity' 46 | set_globname = 'KOFGI' 47 | if ecofree_year > glob_year: 48 | yname = set_econname 49 | xname = set_globname 50 | else: 51 | yname = set_globname 52 | xname = set_econname 53 | # Use only the required variables 54 | xy = merged_data[[xname, yname, 'ISO Code']].dropna() 55 | # Compute the cross-correlation 56 | corr = xy[[xname, yname]].corr().iloc[1, 0] 57 | # Fix the titles for the plot 58 | if ecofree_year > glob_year: 59 | set_main_title = yname+' in '+str(ecofree_year)+' vs. Globalization in '+str(glob_year)+', ρ = '+str(round(corr,2)) 60 | set_x_label = 'Globalization, index' 61 | set_y_label = yname+', index' 62 | else: 63 | set_main_title = 'Globalization in '+str(glob_year)+' vs. '+xname+' in '+str(ecofree_year)+', ρ = '+str(round(corr,2)) 64 | set_y_label = 'Globalization, index' 65 | set_x_label = xname+', index' 66 | 67 | # Done, now for the plot 68 | f, ax = plt.subplots(figsize=[8, 5]) 69 | xx = xy[xname] 70 | yy = xy[yname] 71 | fontsize = 10 72 | plot_handler = ax.scatter(x=xx, y=yy, s=xx*(1.62*5), alpha=1.0, color='green') 73 | ax.set_title(set_main_title, fontsize=fontsize, fontweight='bold') 74 | ax.set_xlabel(set_x_label, fontweight='bold', fontsize=fontsize) 75 | ax.set_ylabel(set_y_label, fontweight='bold', fontsize=fontsize) 76 | ax.set_ylim([yy.min()-10, yy.max()+10]) 77 | ax.grid(True, which='both', color='black', linestyle=':') 78 | lbl = xy['ISO Code'].tolist() 79 | for tick in ax.xaxis.get_major_ticks(): 80 | tick.label1.set_fontsize(fontsize) 81 | tick.label1.set_fontweight('bold') 82 | for tick in ax.yaxis.get_major_ticks(): 83 | tick.label1.set_fontsize(fontsize) 84 | tick.label1.set_fontweight('bold') 85 | ax.set_facecolor("lightblue") 86 | # Carefull with the annotation!!! 87 | l_i = 0 88 | ann_list = [] 89 | for z1, z2 in zip(xx, yy): 90 | label = lbl[l_i] 91 | ann = ax.annotate(label, (z1, z2), textcoords="offset points", 92 | xytext=(10, 20), ha='right', fontweight='bold', fontsize=10) 93 | ann_list.append(ann) 94 | l_i += 1 95 | f.tight_layout() 96 | plt.show() 97 | 98 | # You can print (optionally save, remove the comment) the data ranked by the x-variable 99 | sorted_xy = xy.sort_values(by=xname) 100 | print(round(sorted_xy, 2)) 101 | # sorted_xy.to_csv(xname+' and '+yname+'.csv') -------------------------------------------------------------------------------- /the random enforcer.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/the-random-enforcer/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import yfinance as yf 16 | 17 | # Define the random enforcer, note the use of the global variable _W 18 | def random_enforcer(x, W=None, B=10, interval=(-1, 1)): 19 | nobs = x.shape[0] 20 | if W is None: 21 | W = np.random.uniform(size=(B, nobs), low=interval[0], high=interval[1]) 22 | z = W@(x.to_numpy().reshape(nobs, 1)) 23 | mu = np.mean(z) 24 | md = np.median(z) 25 | global _W 26 | _W =+ W 27 | return np.sign(mu/md) 28 | 29 | # Train the random enforcer 30 | def train_random_enforcer(data, R, delay, to_discrete=True, threshold=1, repl=10, bounds=(-1, 1)): 31 | rets = data 32 | if to_discrete: 33 | y = rets.apply(np.sign) 34 | else: 35 | y = rets 36 | do = True 37 | while do: 38 | signal = y.rolling(window=R).apply(random_enforcer, args=(None, repl, bounds, )).shift(periods=delay) 39 | both = pd.concat([rets, rets*signal], axis=1).dropna() 40 | tr = (both+1).prod() 41 | if (tr.iloc[1] - tr.iloc[0]) > threshold: 42 | do = False 43 | break 44 | return _W, tr 45 | 46 | # Select ticker, period of computation and frequency 47 | ticker = 'OIH' 48 | start_date = '2019-01-01' 49 | freq = '1wk' 50 | data = yf.download(ticker, period='max', interval=freq)['Adj Close'].dropna() 51 | if start_date is not None: 52 | data = data.loc[start_date:] 53 | # 54 | rets = data.pct_change().dropna() 55 | y = rets.apply(np.sign) 56 | nobs = y.shape[0] 57 | 58 | # Set the parameters of the evaluation 59 | set_repl = 3 60 | set_tt = [0.65, 0.70, 0.75, 0.80] 61 | set_roll = [2, 3, 4, 6, 12] 62 | 63 | # Initialize storage 64 | store_in = pd.DataFrame(data=None, index=set_tt, columns=set_roll) 65 | store_out = pd.DataFrame(data=None, index=set_tt, columns=set_roll) 66 | 67 | # Set the parameters of training 68 | set_discrete = True 69 | set_threshold = 0 70 | set_d = 2 71 | set_B = 5 72 | set_interval = (-1, 1) 73 | 74 | # Now, loop over the different periods of evaluation and produce results! 75 | for tt in set_tt: 76 | y_in = y.iloc[:int(tt*nobs)] 77 | y_out = y.iloc[int(tt*nobs):] 78 | rets_in = rets.iloc[:int(tt*nobs)] 79 | rets_out = rets.iloc[int(tt*nobs):] 80 | 81 | for roll in set_roll: 82 | print('Now doing tt=',tt,'and roll=',roll) 83 | # 84 | W_all = None 85 | for repl in range(set_repl): 86 | W, tr_in = train_random_enforcer(rets_in, roll, set_d, set_discrete, set_threshold, set_B, set_interval) 87 | # Average the W matrix 88 | W = W/(nobs - roll+1) 89 | W_all =+ W 90 | # Average the average W matrix 91 | W_all = W_all/set_repl 92 | signal_out = rets_out.rolling(window=roll).apply(random_enforcer, args=(W_all, )).shift(periods=set_d) 93 | both_out = pd.concat([rets_out, rets_out*signal_out], axis=1).dropna() 94 | tr_out = (both_out+1).prod() 95 | # Store in-sample (training) and out-of-sample (evaluation) performance as excess return 96 | store_in.loc[tt, roll] = tr_in.iloc[1]-tr_in.iloc[0] 97 | store_out.loc[tt, roll] = tr_out.iloc[1]-tr_out.iloc[0] 98 | 99 | # Print results for examination 100 | print(store_in) 101 | print(store_out, 3) 102 | ps = (store_out > 0).mean() 103 | print(ps) 104 | print(store_out.max()) 105 | print(store_out.mean()) 106 | -------------------------------------------------------------------------------- /the-μ-strategy.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/10/07/get-rich-quick-or-the-μ-strategy/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | 12 | # Import the required libraries 13 | import matplotlib.pyplot as plt 14 | import numpy as np 15 | import pandas as pd 16 | import scipy as sp 17 | import yfinance as yf 18 | import math 19 | 20 | # A set of functions to obtain the iterated logarithm 21 | def _log(x, base=math.e): 22 | return int(np.log(x) / np.log(base)) 23 | 24 | def recursiveLogStar(n, b=math.e): 25 | if n > 1.0: 26 | return 1.0 + recursiveLogStar(_log(n, b), b); 27 | else: 28 | return 0 29 | 30 | def logstar(x): 31 | n = len(x) 32 | z = np.zeros([n, 1]) 33 | for i in range(n): 34 | z[i] = recursiveLogStar(x[i]) # + np.log(2.865064) the extra constant not needed 35 | return z 36 | 37 | # A function to get the complexity-weighted sample mean, with complexity measured by the sample size 38 | def complexity_weighted_mean(x, nroll, weight_type=0): 39 | z = pd.DataFrame(data=None, index=x.index, columns=nroll) 40 | if weight_type == 0: 41 | scale = len(nroll) 42 | for i in range(len(nroll)): 43 | z[nroll[i]] = (x.rolling(window=nroll[i]).mean()) 44 | elif weight_type == 1: 45 | scale = (2.0**(-nroll)).sum() 46 | for i in range(len(nroll)): 47 | z[nroll[i]] = (2.0**(-nroll[i]))*(x.rolling(window=nroll[i]).mean()) 48 | elif weight_type == 2: 49 | scale = (2.0**(-logstar(nroll))).sum() 50 | for i in range(len(nroll)): 51 | z[nroll[i]] = (2.0**(-recursiveLogStar(nroll[i])))*(x.rolling(window=nroll[i]).mean()) 52 | return z.sum(axis=1)/scale 53 | 54 | 55 | # Get some data, for the post I used monthly rebalancing for SPY, DBA, TNA and EEM 56 | tickers = ['SPY', 'DBA', 'TNA', 'EEM'] 57 | 58 | # Forecasts are based on signs 59 | use_signs = True 60 | 61 | # Set the rolling windows for the speculative complexity averaging 62 | set_R1 = 2 63 | set_RM = 4 64 | nroll = np.arange(set_R1, set_RM+1, 1) 65 | # also set the weighting type 66 | set_weight_type = 2 67 | 68 | # Then, a loop over the tickers 69 | for ticker in tickers: 70 | data = yf.download(ticker, period='max', interval='1mo')['Adj Close'].dropna() 71 | r = data.pct_change().dropna().loc['2021-01-01':] 72 | y = r 73 | if use_signs: 74 | y = np.sign(r) 75 | 76 | # Compute the complexity mean for the forecast 77 | frc = complexity_weighted_mean(y, nroll, set_weight_type).apply(np.sign) 78 | 79 | # and then trade according to the rules of the post 80 | standard = r*frc.shift(periods=1) 81 | predict_long = (frc.shift(periods=2) == r.shift(periods=1).apply(np.sign)) 82 | predict_long_short = 2*predict_long - 1 83 | mu_long = r*predict_long 84 | mu_long_short = r*predict_long_short 85 | 86 | # Collect the results, remove impact of maximum rolling window 87 | rr = pd.concat([r, standard, mu_long, mu_long_short], axis=1).iloc[nroll[-1]:] 88 | rr.columns = [ticker, 'complexity', 'μ-long', 'μ-long/short'] 89 | 90 | # Do the plot 91 | tr = ((rr+1).cumprod()-1)*100 92 | tr.plot(title='Total trading returns of the μ-strategy in '+ticker, color=['red', 'black', 'blue', 'green'], ylabel='return in percent') 93 | plt.grid(visible=True, which='both') 94 | plt.show() 95 | 96 | # and print the total trading return, along with the window end-points 97 | print("End-points of windows are: ", nroll[0], nroll[-1]) 98 | print(tr.iloc[-1]) 99 | 100 | tr[[ticker, 'μ-long/short']].plot(title='Total trading returns of the μ-strategy in '+ticker, color=['red', 'green'], ylabel='return in percent') 101 | plt.grid(visible=True, which='both') 102 | plt.show() 103 | -------------------------------------------------------------------------------- /sp500_food_energy_output.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/04/29/the-market-as-predictor-of-relative-economic-activity/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import pandas_datareader as pdr 16 | import yfinance as yahoo 17 | import datetime as dt 18 | from dateutil.relativedelta import relativedelta 19 | 20 | # Tickers, start date, end date and data extraction 21 | tickers = ['INDPRO', 'MCUMFN', 'CPIUFDSL', 'CPIENGSL'] 22 | start_date = '2016-03-01' 23 | end_date = '2023-03-31' 24 | # Do the download 25 | econdata = pdr.fred.FredReader(tickers, start=start_date, end=end_date).read() 26 | # Give nice column names 27 | econdata.columns = ['Output', 'Capacity', 'Food', 'Energy'] 28 | 29 | # Get the two ratios, output/capacity and food/energy 30 | econdata = econdata.assign(Index1 = econdata['Food']/econdata['Energy'], 31 | Index2 = econdata['Output']/econdata['Capacity']) 32 | # Make nice column names again 33 | econdata.columns = ['Output', 'Capacity', 'Food', 'Energy', 'Food/Energy', 'Output/Capacity'] 34 | 35 | # Specify just the two ratios in a new dataframe, take annual log-growth 36 | indices = econdata[['Output/Capacity', 'Food/Energy']].apply(np.log).diff(periods=12).dropna()*100 37 | 38 | # Next get the SP500, compute log-growth too! 39 | sp500 = yahoo.download('^GSPC', period='max', interval='1mo')['Adj Close'].dropna() 40 | sp500 = sp500.apply(np.log).diff(periods=12).dropna()*100 41 | sp500.name = 'S&P500' 42 | 43 | # Merge the data, drop the NA 44 | data = pd.concat([indices, sp500], axis=1).dropna() 45 | 46 | # Put together the two series for comparison; first the relative real output, the CARRO 47 | y = data['Output/Capacity']-data['Food/Energy'] 48 | # then the S&P500 with the desired lead time - in a loop, find the lead time with max cross-correlation 49 | lead_time = np.arange(0, 13, 1) 50 | store_corr = pd.DataFrame(data=None, index=lead_time, columns=['CCorr']) 51 | for i in lead_time: 52 | x = data['S&P500'].shift(periods=i) 53 | new_data = pd.concat([y, x], axis=1).dropna() 54 | new_data.columns = ['Relative Real Output', 'S&P500'] 55 | store_corr.iloc[i] = new_data.corr().iloc[1,0] 56 | 57 | # Nice plot for the CARRO and S&P500 58 | plot_data = pd.concat([y, data['S&P500']], axis=1) 59 | plot_data.columns = ['CARRO', 'S&P500'] 60 | plot_data.plot(title='Capacity-adjusted relative real output - CARRO and the S&P500', xlabel='Year', ylabel='annual log-growth, percent', grid='both', color=['blue', 'green'], 61 | figsize=[13, 8]) 62 | plt.show() 63 | 64 | # Plot the lead time vs. the cross-correlation 65 | (store_corr*100).plot(title='Cross-correlation vs. lead time - S&P500 leading CARRO', 66 | xlabel='Lead Time, months', ylabel='Cross-correlation, percent', grid='both', figsize=[13, 8]) 67 | plt.show() 68 | print(store_corr) 69 | 70 | # and then the series with the optimal lead time 71 | istar = store_corr.sort_values(by='CCorr', ascending=False).index[0] 72 | # Fix the dates for the effective sample 73 | eff_start = dt.datetime.strptime(start_date, '%Y-%m-%d').date()+relativedelta(months=12+istar) 74 | sample = str(eff_start)+' to '+end_date 75 | # and do the rest of the computations 76 | x = data['S&P500'].shift(periods=istar) 77 | new_data = pd.concat([y, x], axis=1).dropna() 78 | new_data.columns = ['CARRO(+'+str(istar)+')', 'S&P500'] 79 | new_data.plot(title='S&P500 leading CARRO for a lead time of '+str(istar)+' months for '+sample, 80 | xlabel='Year', ylabel='annual log-growth, percent', grid='both', color=['blue', 'green'], figsize=[13, 8]) 81 | plt.show() 82 | # and a scatterplot to show the relationship from a different perspective 83 | new_data.plot(kind='scatter', x='S&P500', y='CARRO(+'+str(istar)+')', 84 | title='S&P500 leading CARRO for a lead time of '+str(istar)+' months for '+sample, 85 | xlabel='S&P500'', percent', ylabel='CARRO(+'+str(istar)+'), percent', grid='both', 86 | c='green', s=50, figsize=[13, 8]) 87 | plt.show() -------------------------------------------------------------------------------- /the random speculator.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/11/04/the-lazy-random-speculator/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | import pandas as pd 13 | import yfinance as yf 14 | 15 | # A function to compute the maximum drawdown, input is a dataframe of cumulative returns 16 | def max_dd(crets): 17 | maxcret = (crets+1).cummax(axis=0) 18 | drawdowns = ((crets + 1) / maxcret) - 1 19 | return drawdowns.min(axis=0) 20 | 21 | 22 | # Another function to collect performance measures, input is a dataframe of returns 23 | def performance_measures(rets, f_factor, target_r=0): 24 | mu = rets.mean() * f_factor 25 | sd = rets.std() * np.sqrt(f_factor) 26 | sr = mu / sd 27 | er = target_r - rets 28 | er = er.clip(lower=0) 29 | l2 = (er ** 2).mean(axis=0) 30 | st = mu/np.sqrt(l2) 31 | cr = (rets+1).cumprod(axis=0) - 1 32 | md = max_dd(cr) 33 | stats = pd.DataFrame([mu, sd, sr, st, cr.iloc[-1], md]) 34 | stats.index = ['Mean', 'Std. Dev.', 'Sharpe', 'Sortino', 'TR', 'MaxDD'] 35 | return stats.transpose(), cr 36 | 37 | # Select an ETF ticker, for the post I used SPY, TNA and LQD 38 | ticker = 'LQD' 39 | 40 | # Select frequency of trading 41 | freq = '1d' 42 | 43 | # Get some data after 2022 44 | data = yf.download(ticker, period='max', interval=freq)['Adj Close'] 45 | r = data.pct_change().dropna().loc['2022-01-01':].dropna() 46 | nobs = r.shape[0] 47 | 48 | # Select replications and minimum leverage level 49 | repl = 100 50 | minlev = 1 51 | 52 | # Select threshold and step 53 | tt = -0.1 54 | step = 0.01 55 | 56 | # Initialize storage across replications 57 | store_repl = np.zeros([nobs, repl]) 58 | 59 | # Loop across replications 60 | for j in range(repl): 61 | print('Now running simulation = ', j) 62 | 63 | # Re-initialize capital 64 | K0 = 100 65 | B0 = K0 66 | 67 | # Initialize storage and tracking 68 | x0 = np.random.uniform(low=-minlev, high=+minlev, size=1) 69 | store = pd.DataFrame(data=None, index=r.index, columns=['x', ticker, 'TLP']) 70 | track = 0 71 | 72 | # Loop over the observations 73 | for i in range(nobs): 74 | 75 | ri = r.iloc[i] 76 | K1 = K0 + x0*K0*ri 77 | B1 = B0*(1 + ri) 78 | 79 | if np.sign(ri) != np.sign(x0): 80 | if x0 > 0: 81 | x1 = np.random.uniform(low=x0, high=minlev+x0, size=1) 82 | else: 83 | x1 = np.random.uniform(low=-minlev-x0, high=x0, size=1) 84 | else: 85 | if x0 < 0: 86 | x1 = np.random.uniform(low=x0, high=minlev+x0, size=1) 87 | else: 88 | x1 = np.random.uniform(low=-minlev-x0, high=x0, size=1) 89 | 90 | store.iloc[i] = np.hstack([x1, B1, K1]) 91 | 92 | if (K1 - K0)/K0 > tt: 93 | if i > 1: 94 | x0 = store['x'].iloc[(i-1):(i+1)].mean() 95 | else: 96 | x0 = x1 97 | tt = tt + step 98 | else: 99 | tt = tt - step 100 | K0 = K1 101 | B0 = B1 102 | 103 | # Store and continue 104 | store_repl[:,j] = store['TLP'].values 105 | 106 | # Compute the average across replications 107 | store_avg = np.mean(store_repl, axis=1) 108 | 109 | # Add to dataframe 110 | store['TLP-avg'] = store_avg 111 | 112 | # Set the frequency factor for performance evaluation 113 | if freq == '1d': 114 | f_factor = 262 115 | elif freq == '1wk': 116 | f_factor = 52 117 | elif freq == '1mo': 118 | f_factor = 12 119 | 120 | # Get the statistics, print and plot! 121 | stats, cret = performance_measures(store[[ticker, 'TLP-avg']].pct_change().dropna(), f_factor) 122 | print(round(stats, 3)) 123 | (cret *100).plot(grid='both', title='The (lazy) random speculator strategy for '+ticker+', daily trading', ylabel='return in percent', xlabel='Date') 124 | plt.show() -------------------------------------------------------------------------------- /trust.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/05/22/in-government-we-trust/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the necessary packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import scipy as sp 16 | 17 | # Read the OECD data files 18 | trust = pd.read_csv('DP_LIVE_22052023220521471.csv', header=0) 19 | ineq = pd.read_csv('DP_LIVE_22052023220651049.csv', header=0) 20 | dinc = pd.read_csv('DP_LIVE_22052023220348330.csv', header=0) 21 | 22 | # Get the latest available data 23 | trust_last = trust.groupby(by='LOCATION').apply(lambda x: x['Value'].iloc[-1]) 24 | ineq_last = ineq.groupby(by='LOCATION').apply(lambda x: x['Value'].iloc[-1]) 25 | dinc_last = dinc.groupby(by='LOCATION').apply(lambda x: x['Value'].iloc[-1]/1e+3) 26 | 27 | # Put together, give nice names 28 | all = pd.concat([trust_last, ineq_last, dinc_last], axis=1).dropna() 29 | all.columns = ['Trust', 'Inequality', 'Disp. Income'] 30 | 31 | # Remove outliers? 32 | remove_outliers = True 33 | 34 | # Let's create a scatterplot, for inequality first 35 | if remove_outliers: 36 | use = all.drop(labels=['TUR', 'MEX', 'CHL', 'CRI', 'SVK', 'SVN', 'CZE']) 37 | else: 38 | use = all 39 | corr = use.corr().loc['Inequality', 'Trust'] 40 | f, ax = plt.subplots(figsize=[20, 12.3]) 41 | xx = use['Inequality'] 42 | yy = use['Trust'] 43 | fontsize = 12 44 | plot_handler = ax.scatter(x=xx, y=yy, s=yy*20, alpha=1.0, color='red') 45 | ax.set_title('Trust in Government vs. Income Inequality, ρ = '+str(round(corr,2)), fontsize=fontsize, fontweight='bold') 46 | ax.set_xlabel('Income Inequality, Gini', fontweight='bold', fontsize=fontsize) 47 | ax.set_ylabel('Trust in Government, %', fontweight='bold', fontsize=fontsize) 48 | ax.set_ylim([20, 90]) 49 | ax.grid(True, which='both', color='black', linestyle=':') 50 | lbl = use.index.tolist() 51 | for tick in ax.xaxis.get_major_ticks(): 52 | tick.label1.set_fontsize(fontsize) 53 | tick.label1.set_fontweight('bold') 54 | for tick in ax.yaxis.get_major_ticks(): 55 | tick.label1.set_fontsize(fontsize) 56 | tick.label1.set_fontweight('bold') 57 | ax.set_facecolor("lightblue") 58 | # Carefull with the annotation!!! 59 | l_i = 0 60 | ann_list = [] 61 | for z1, z2 in zip(xx, yy): 62 | label = lbl[l_i] 63 | ann = ax.annotate(label, (z1, z2), textcoords="offset points", 64 | xytext=(12, 22), ha='right', fontweight='bold', fontsize=12) 65 | ann_list.append(ann) 66 | l_i += 1 67 | f.tight_layout() 68 | plt.show() 69 | 70 | # and then for disposable income 71 | if remove_outliers: 72 | use = all.drop(labels=['MEX', 'CRI', 'USA']) 73 | else: 74 | use = all 75 | corr = use.corr().loc['Disp. Income', 'Trust'] 76 | f, ax = plt.subplots(figsize=[20, 12.3]) 77 | xx = use['Disp. Income'] 78 | yy = use['Trust'] 79 | fontsize = 12 80 | plot_handler = ax.scatter(x=xx, y=yy, s=yy*20, alpha=1.0, color='red') 81 | ax.set_title('Trust in Government vs. Disposable Income, ρ = '+str(round(corr,2)), fontsize=fontsize, fontweight='bold') 82 | ax.set_xlabel('Disposable Income, current USD', fontweight='bold', fontsize=fontsize) 83 | ax.set_ylabel('Trust in Government, %', fontweight='bold', fontsize=fontsize) 84 | ax.set_ylim([20, 90]) 85 | ax.grid(True, which='both', color='black', linestyle=':') 86 | lbl = use.index.tolist() 87 | for tick in ax.xaxis.get_major_ticks(): 88 | tick.label1.set_fontsize(fontsize) 89 | tick.label1.set_fontweight('bold') 90 | for tick in ax.yaxis.get_major_ticks(): 91 | tick.label1.set_fontsize(fontsize) 92 | tick.label1.set_fontweight('bold') 93 | ax.set_facecolor("lightblue") 94 | # Carefull with the annotation!!! 95 | l_i = 0 96 | ann_list = [] 97 | for z1, z2 in zip(xx, yy): 98 | label = lbl[l_i] 99 | ann = ax.annotate(label, (z1, z2), textcoords="offset points", 100 | xytext=(12, 22), ha='right', fontweight='bold', fontsize=12) 101 | ann_list.append(ann) 102 | l_i += 1 103 | f.tight_layout() 104 | plt.show() -------------------------------------------------------------------------------- /mean_and_probability.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/07/09/the-probable-speculative-constant/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the required libraries 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import scipy as sp 16 | import yfinance as yf 17 | 18 | # A function to compute the trajectory matrix for the ecdf calculation 19 | def get_trajectory(x, k): 20 | """ 21 | Compute the trajectory matrix of a matrix given a memory parameter 22 | 23 | :param x: array of data 24 | :param k: scalar, memory order 25 | :return: the trajectory matrix 26 | """ 27 | if len(x.shape) == 1: 28 | x = x.reshape(-1, 1) 29 | if k == 1: 30 | return x 31 | elif k > 1: 32 | y = np.zeros([x.shape[0] - k + 1, k * x.shape[1]]) 33 | for i in range(x.shape[0] - k + 1): 34 | y[i, :] = np.hstack(x[i:(i + k), :]) 35 | return y 36 | else: 37 | raise ValueError('In function get_trajectory the memory order must be >= 1') 38 | 39 | # Download some data 40 | ticker = 'BTC-USD' 41 | data = yf.download(ticker, period='max', interval='1d')['Adj Close'].dropna() 42 | # Get percent returns, select period of estimation/evaluation 43 | y = data.pct_change().dropna().loc['2022-01-01':] 44 | 45 | # Selec the rolling window - note that this will be used twice 46 | roll = 4 47 | # Compute the rolling mean and the predictive errors 48 | mu = y.rolling(window=roll).mean() 49 | e = (y - mu.shift(periods=1)).dropna() 50 | 51 | # Use the same rolling window to compute the trajectory matrix of the predictive errors 52 | zmat = get_trajectory(e.to_numpy(), roll) 53 | # Carefully align the rolling mean with the values of the trajectory matrix 54 | mmat = mu.iloc[(2*roll-2):-1].to_numpy().reshape(-1, 1) 55 | # Compute the ecdf easily 56 | pmat = np.apply_along_axis(np.mean, 1, (zmat <= -mmat)) 57 | 58 | # Align the evaluation returns 59 | ymat = y.iloc[2*roll:-1].to_numpy().reshape(-1, 1) 60 | # and, again, carefully align the rolling mean and ecdf for the signal (note the renaming) 61 | mmat = mu.iloc[(2*roll-1):-2].to_numpy().reshape(-1, 1) 62 | pmat = pmat[:-2].reshape(-1, 1) 63 | 64 | # Put everything together, index correctly 65 | together = pd.DataFrame(data=np.c_[ymat, mmat, pmat], index=y.iloc[2*roll:-1].index, columns=['Actual', 'Mean', 'Prob']) 66 | 67 | # Initialize the dataframe for the strategy returns 68 | rr = pd.DataFrame(data=None, index=together.index, columns=['Buy & Hold', 'Probable Speculative Constant AND', 'Probable Speculative Constant OR', 'Speculative Constant']) 69 | 70 | # Set the probability for the ecdf 71 | prob = 0.5 72 | 73 | # Get the strategy via a loop 74 | for i in range(together.shape[0]): 75 | xi = together.iloc[i, :] 76 | mi = xi['Mean'] 77 | pi = xi['Prob'] 78 | # The probable speculative constant - and 79 | if (mi > 0) and (pi <= prob): 80 | rr.iloc[i, 1] = xi['Actual'] 81 | elif (mi < 0) and (pi > prob): 82 | rr.iloc[i, 1] = -xi['Actual'] 83 | else: 84 | rr.iloc[i, 1] = 0 85 | # The probable speculative constant - or 86 | if (mi > 0) or (pi <= prob): 87 | rr.iloc[i, 2] = xi['Actual'] 88 | elif (mi < 0) or (pi > prob): 89 | rr.iloc[i, 2] = -xi['Actual'] 90 | else: 91 | rr.iloc[i, 2] = 0 92 | # The speculative constant alone 93 | if (mi > 0): 94 | rr.iloc[i, 3] = xi['Actual'] 95 | elif (mi < 0): 96 | rr.iloc[i, 3] = -xi['Actual'] 97 | # The benchmark 98 | rr.iloc[i, 0] = xi['Actual'] 99 | 100 | # Do a nice plot 101 | tr = ((rr+1).cumprod()-1)*100 102 | tr.plot(title='Total trading returns of the speculative constant strategies in '+ticker, color=['red', 'blue', 'green', 'orange'], ylabel='return in percent') 103 | plt.grid(visible=True, which='both') 104 | plt.show() 105 | # and print the total trading return 106 | print(roll) 107 | print(tr.iloc[-1]) -------------------------------------------------------------------------------- /the speculative sectors.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/the-speculative-sectors/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | 10 | # Import the packages 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | import pandas as pd 14 | import scipy as sp 15 | import statsmodels.api as sm 16 | import yfinance as yf 17 | 18 | # Linear model, least squares or robust, from statsmodels with sequential eliminination based on p-values 19 | def sequential_elimination_lm(set_Y, set_X, set_alpha, robust=False): 20 | if robust: 21 | out = sm.RLM(endog=set_Y, exog=set_X).fit() 22 | else: 23 | out = sm.OLS(endog=set_Y, exog=set_X, hasconst=True).fit() 24 | pv_old = out.pvalues 25 | ip_old = pv_old[pv_old <= set_alpha[0]].index 26 | if len(ip_old) == 0: 27 | return out 28 | 29 | # and with a simple loop remove the rest in the proper way with diminishing p-values 30 | for aa in np.arange(1, len(set_alpha)): 31 | xa = set_X[ip_old] 32 | ya = set_Y 33 | if robust: 34 | out = sm.RLM(endog=set_Y, exog=xa).fit() 35 | else: 36 | out = sm.OLS(endog=ya, exog=xa, hasconst=True).fit() 37 | pv_new = out.pvalues 38 | ip_new = pv_new[pv_new <= set_alpha[aa]].index 39 | if len(ip_new) > 0: 40 | pv_old = pv_new 41 | ip_old = ip_new 42 | 43 | # and this is the final model 44 | xa = set_X[ip_old] 45 | ya = set_Y 46 | out = sm.OLS(endog=ya, exog=xa, hasconst=True).fit() 47 | 48 | # Done! 49 | return out 50 | 51 | # A simple regression predictor based on the above function and the data structure of the post 52 | def srp(data, alpha, lag, robust): 53 | y = data.iloc[:, 0] 54 | x = sm.add_constant(data.iloc[:, 1:]) 55 | model = sequential_elimination_lm(y.iloc[lag:], x.shift(periods=lag).iloc[lag:], alpha, robust) 56 | beta = model.params 57 | xfor = x[model.model.exog_names].iloc[-1] 58 | fcst = np.sign((beta.mul(xfor)).sum()) 59 | return fcst 60 | 61 | # Load the data 62 | tickers = ['SPY', 'XLC', 'XLY', 'XLP', 'XLE', 'XLF', 'XLV', 'XLI', 'XLB', 'XLK', 'XLU', 'XLRE'] 63 | data_close = yf.download(tickers, period='max', interval='1d')['Adj Close'].dropna() 64 | # Select a starting date 65 | start_from = '2023-01-01' 66 | if start_from is not None: 67 | data_close = data_close.loc[start_from:] 68 | 69 | # Sector names 70 | sector_names = ['XLC', 'XLY', 'XLP', 'XLE', 'XLF', 'XLV', 'XLI', 'XLB', 'XLK', 'XLU', 'XLRE'] 71 | # Index names 72 | index_names = ['SPY'] 73 | # Sector closing names 74 | sector_close_names = [x+str('_C') for x in sector_names] 75 | # Index closing names 76 | index_close_names = [x+str('_C') for x in index_names] 77 | 78 | # Get the daily returns 79 | sector_close = data_close[sector_names].pct_change().dropna() 80 | index_close = data_close[index_names].pct_change().dropna() 81 | 82 | # The trading exercise is straightforward, with the signals coming from a multiple regression (see the blog post for theoretical details) 83 | set_factor_X = sector_close.apply(np.sign) 84 | set_index_Y = index_close.apply(np.sign) 85 | factor_data = pd.concat([set_index_Y, set_factor_X], axis=1) 86 | roll = 14 87 | store = pd.DataFrame(data=None, index=index_close['SPY'].index, columns=['SPY', 'Factors']) 88 | nobs = index_close['SPY'].shape[0] 89 | # Set the parameters for model reduction and estimation 90 | set_alpha = [0.9, 0.8] 91 | set_lag = 1 92 | use_robust = True 93 | 94 | # Do the trading evaluation in a loop 95 | for i in np.arange(0, nobs-roll, 1): 96 | i_data = factor_data.iloc[i:i+roll] 97 | fcst = srp(i_data, set_alpha, set_lag, use_robust) 98 | bench = index_close['SPY'].iloc[i+roll] 99 | store.iloc[i] = np.hstack([bench, bench*fcst]) 100 | 101 | # Compute the cumulative returns, print and plot 102 | store = store.dropna() 103 | store_cr = ((store+1).cumprod()-1)*100 104 | print(store_cr.iloc[-1]) 105 | # 106 | store_cr.plot(grid='both', title='The Speculative Sectors Strategy for SPY, daily data', xlabel='Date', ylabel='total return in percent') 107 | plt.show() 108 | -------------------------------------------------------------------------------- /the-adaptive-mean.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code adding for the post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/the-adaptive-mean/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | ## Import the packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import pandas_datareader as pdr 16 | import yfinance as yf 17 | 18 | ## Get some data 19 | variable = ['CPIAUCSL'] 20 | raw_data = pdr.fred.FredReader(variable, start='2000-01-01', end='2024-12-01').read() 21 | rets = raw_data.pct_change(periods=1).dropna() # diff(periods=1).dropna() # for TB3MS and FEDFUNDS 22 | nobs = rets.shape[0] 23 | 24 | ## Initialize parameters and storage 25 | roll = 36 26 | burn_in = roll + 2 27 | 28 | for gamma in [0.1, 0.25, 0.5, 0.7, 0.8, 0.9, 0.95, 0.99]: 29 | # 30 | store_fc = pd.DataFrame(data=None, index=rets.index, columns=['FCB-roll-'+str(roll), 'FC-roll-'+str(roll), 'FCB-rec', 'FB-rec']) 31 | store_ac = pd.DataFrame(data=None, index=rets.index, columns=['Actual']) 32 | store_fe = pd.DataFrame(data=None, index=rets.index, columns=['FEB-roll-'+str(roll), 'FE-roll-'+str(roll), 'FEB-rec', 'FE-rec']) 33 | store_sgn = pd.DataFrame(data=None, index=rets.index, columns=['FEB-roll-'+str(roll), 'FE-roll-'+str(roll), 'FEB-rec', 'FE-rec']) 34 | 35 | 36 | ## The computations are very direct 37 | for i in np.arange(roll, nobs, 1): 38 | 39 | # Section the data 40 | xrec = rets.iloc[:i] 41 | xrol = xrec[-roll:] 42 | actual = rets.iloc[i].values[0] 43 | 44 | # Compute the benchmark forecasts 45 | mu_rol = xrol.mean().values[0] 46 | mu_rec = xrec.mean().values[0] 47 | feb_rol = actual - mu_rol 48 | feb_rec = actual - mu_rec 49 | 50 | if i > burn_in: 51 | Ip_rol = (store_fe.iloc[:i,1] > 0).astype(float).iloc[-roll:] 52 | Pp_rol = (store_fe.iloc[:i,1] > 0).iloc[-roll:].mean() 53 | x_rol = (Ip_rol - Pp_rol).values.reshape(-1,1) 54 | y_rol = (xrol - mu_rol).values 55 | cxy_rol = np.mean(y_rol*x_rol) 56 | vx_rol = np.mean(x_rol ** 2) 57 | b_rol = 2*np.sign(cxy_rol)*cxy_rol/vx_rol 58 | if cxy_rol > 0: 59 | factor = gamma ** np.log(i) 60 | elif cxy_rol < 0: 61 | factor = (1/gamma) ** np.log(i) 62 | xf_rol = mu_rol + factor*np.sign(b_rol)*b_rol*x_rol[-1] 63 | # 64 | Ip_rec = (store_fe.iloc[:i,3] > 0).astype(float) 65 | Pp_rec = (store_fe.iloc[:i,3] > 0).mean() 66 | x_rec = (Ip_rec - Pp_rec).values.reshape(-1, 1) 67 | y_rec = (xrec - mu_rec).values 68 | cxy_rec = np.mean(y_rec*x_rec) 69 | vx_rec = np.mean(x_rec ** 2) 70 | b_rec = 2*np.sign(cxy_rec)*cxy_rec/vx_rec 71 | if cxy_rec > 0: 72 | factor = gamma ** np.log(i) 73 | elif cxy_rec < 0: 74 | factor = (1/gamma) ** np.log(i) 75 | xf_rec = mu_rec + factor*np.sign(b_rec)*b_rec*x_rec[-1] 76 | # 77 | fe_rol = actual - xf_rol 78 | fe_rec = actual - xf_rec 79 | else: 80 | xf_rol = mu_rol 81 | xf_rec = mu_rec 82 | fe_rol = feb_rol 83 | fe_rec = feb_rec 84 | 85 | # Store the forecasts, forecast errors and signs 86 | store_fc.iloc[i] = np.hstack([mu_rol, xf_rol, mu_rec, xf_rec]) 87 | store_ac.iloc[i] = actual 88 | store_fe.iloc[i] = np.hstack([feb_rol, fe_rol, feb_rec, fe_rec]) 89 | store_sgn.iloc[i] = (np.sign(actual) == np.hstack([np.sign(mu_rol), np.sign(xf_rol), np.sign(mu_rec), np.sign(xf_rec)])) 90 | 91 | print('Scaling is = ',factor) 92 | mse = (store_fe.dropna() ** 2).mean() 93 | rmse = mse/mse.iloc[2] 94 | mae = (store_fe.dropna().abs()).mean() 95 | rmae = mae/mae.iloc[2] 96 | ssr = store_sgn.dropna().mean() 97 | all = pd.concat([rmse, rmae, ssr], axis=1) 98 | all.columns = ['relMSE', 'relMAE', 'SSR'] 99 | print(all) 100 | -------------------------------------------------------------------------------- /Cox_Trading_System_WFO.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/the-inferential-speculator/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # ------------------------------------------------------------------------------------- 10 | 11 | from Cox_Trading_System import CoxProbabilisticTrader_V2 12 | from Cox_Trading_System import WalkForwardOptimizer 13 | import numpy as np 14 | from itertools import product 15 | import yfinance as yf 16 | from scipy.stats import norm 17 | from collections import defaultdict 18 | import matplotlib.pyplot as plt 19 | 20 | # Download data 21 | data = yf.download("SPY", start="2022-01-01", end="2025-06-01") 22 | prices = data['Close'].values 23 | 24 | # Define parameter grid for optimization 25 | param_grid = { 26 | 'lookback_window': [7, 14, 21], 27 | 'entropy_threshold': [0.1, 0.15, 0.2], 28 | 'prior_belief': [0.3, 0.5], 29 | 'gamma': [0.5, 0.75, 1.0], 30 | 'momentum_threshold': [-0.02, -0.01, 0.0], 31 | 'sharpe_threshold': [0.4, 0.5, 0.6], 32 | 'utility_threshold': [-0.1, 0.0, 0.1], 33 | 'trade_threshold': [0.4, 0.5, 0.6, 0.7] 34 | } 35 | 36 | # Initialize WFO 37 | wfo = WalkForwardOptimizer( 38 | trader_class=CoxProbabilisticTrader_V2, 39 | param_grid=param_grid, 40 | lookback_window=21, 41 | train_window=504, 42 | test_window=177 43 | ) 44 | 45 | # Run optimization 46 | results = wfo.optimize(prices) 47 | 48 | # Print summary 49 | for res in results: 50 | print(f"Train: {res['train_start']} to {res['train_end']}, Test: {res['test_start']} to {res['test_end']}") 51 | print(f"Best Params: {res['best_params']}") 52 | print(f"Test Cumulative Return: {res['test_performance']['cumulative_return']:.2%}") 53 | print(f"Test Sharpe Ratio: {res['test_performance']['sharpe_ratio']:.2f}") 54 | print("-" * 50) 55 | 56 | # from Cox_Trading_System_V1 import CoxProbabilisticTrader_V1, momentum_func, volatility_func, returns_func, trend_func 57 | # from WFO_Procedure import WalkForwardOptimizer 58 | # import yfinance as yf 59 | # 60 | # # Historical price data 61 | # data = yf.download("SPY", start="2018-01-01", end="2024-06-01") 62 | # prices = data['Close'].values 63 | # 64 | # # Static feature definitions (assumed constant across WFO) 65 | # proposition_system = { 66 | # 'momentum': (0.0, 0.01), 67 | # 'volatility': (0.015, 0.005), 68 | # 'returns': (0.0, 0.01), 69 | # 'trend': (0.0, 0.01) 70 | # } 71 | # 72 | # feature_functions = { 73 | # 'momentum': momentum_func, 74 | # 'volatility': volatility_func, 75 | # 'returns': returns_func, 76 | # 'trend': trend_func 77 | # } 78 | # 79 | # # Parameters to optimize (must match V1 signature) 80 | # param_grid = { 81 | # 'lookback_window': [63], 82 | # 'entropy_threshold': [0.1, 0.15], 83 | # 'prior_belief': [0.3], 84 | # 'state_k': [0.5, 0.65], 85 | # 'trade_threshold': [0.25, 0.35] 86 | # } 87 | # 88 | # # Custom wrapper for WFO that injects the static arguments 89 | # class V1Wrapper: 90 | # def __init__(self, **kwargs): 91 | # self.trader = CoxProbabilisticTrader_V1( 92 | # proposition_system=proposition_system, 93 | # feature_functions=feature_functions, 94 | # **kwargs 95 | # ) 96 | # 97 | # def backtest(self, prices): 98 | # return self.trader.backtest(prices) 99 | # 100 | # # Initialize WFO 101 | # wfo = WalkForwardOptimizer( 102 | # trader_class=V1Wrapper, 103 | # param_grid=param_grid, 104 | # lookback_window=63, 105 | # train_window=252, 106 | # test_window=63 107 | # ) 108 | # 109 | # # Run optimization 110 | # results = wfo.optimize(prices) 111 | # 112 | # # Print results 113 | # for res in results: 114 | # print(f"Train {res['train_start']} to {res['train_end']}, Test {res['test_start']} to {res['test_end']}") 115 | # print(f"Best Params: {res['best_params']}") 116 | # print(f"Test Cumulative Return: {res['test_performance']['cumulative_return']:.2%}") 117 | # print(f"Test Sharpe Ratio: {res['test_performance']['sharpe_ratio']:.2f}") 118 | # print("-" * 50) 119 | -------------------------------------------------------------------------------- /speculative_complexity.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/09/10/speculative-complexity/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the required libraries 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import scipy as sp 16 | import yfinance as yf 17 | import math 18 | 19 | # A set of functions to obtain the iterated logarithm 20 | def _log(x, base=math.e): 21 | return int(np.log(x) / np.log(base)) 22 | 23 | def recursiveLogStar(n, b=math.e): 24 | if n > 1.0: 25 | return 1.0 + recursiveLogStar(_log(n, b), b); 26 | else: 27 | return 0 28 | 29 | def logstar(x): 30 | n = len(x) 31 | z = np.zeros([n, 1]) 32 | for i in range(n): 33 | z[i] = recursiveLogStar(x[i]) # + np.log(2.865064) the extra constant not needed 34 | return z 35 | 36 | # A function to get the complexity-weighted sample mean, with complexity measured by the sample size 37 | def complexity_weighted_mean(x, nroll, weight_type=0): 38 | z = pd.DataFrame(data=None, index=x.index, columns=nroll) 39 | if weight_type == 0: 40 | scale = len(nroll) 41 | for i in range(len(nroll)): 42 | z[nroll[i]] = (x.rolling(window=nroll[i]).mean()) 43 | elif weight_type == 1: 44 | scale = (2.0**(-nroll)).sum() 45 | for i in range(len(nroll)): 46 | z[nroll[i]] = (2.0**(-nroll[i]))*(x.rolling(window=nroll[i]).mean()) 47 | elif weight_type == 2: 48 | scale = (2.0**(-logstar(nroll))).sum() 49 | for i in range(len(nroll)): 50 | z[nroll[i]] = (2.0**(-recursiveLogStar(nroll[i])))*(x.rolling(window=nroll[i]).mean()) 51 | return z.sum(axis=1)/scale 52 | 53 | # Select whether the end-points for the rolling window are fixed or random 54 | do_random = False 55 | 56 | # Download some data - for the post used 'FXE' and 'DBA' 57 | tickers = ['FXE', 'DBA', 'DBC', 'DBB', 'USCI'] 58 | for ticker in tickers: 59 | data = yf.download(ticker, period='max', interval='1mo')['Adj Close'].dropna() 60 | # Get percent returns, select period of estimation/evaluation 61 | y = data.pct_change().dropna()#.loc['2018-01-01':] # uncomment to start from another date 62 | 63 | # Compute the rolling mean and apply rolling window averaging 64 | # 65 | # First select the number of rolling windows, fixed or random! 66 | if do_random: 67 | # You can of course experiment with the values below 68 | R1 = np.random.randint(low=2, high=5, size=1) 69 | RM = np.random.randint(low=6, high=24, size=1) 70 | else: 71 | # As you can experiment below as well! 72 | R1 = 4 73 | RM = 18 74 | nroll = np.arange(R1, RM+1, 1) 75 | roll = nroll[0] 76 | # Compute all three weighted means 77 | mu0 = complexity_weighted_mean(y, nroll, 0) 78 | mu1 = complexity_weighted_mean(y, nroll, 1) 79 | mu2 = complexity_weighted_mean(y, nroll, 2) 80 | 81 | # Trade next, easy! 82 | str0 = y*(mu0.shift(periods=1).apply(np.sign)) 83 | str1 = y*(mu1.shift(periods=1).apply(np.sign)) 84 | str2 = y*(mu2.shift(periods=1).apply(np.sign)) 85 | 86 | # Collect the results, remove impact of maximum rolling window 87 | rr = pd.concat([y, str0, str1, str2], axis=1).iloc[nroll[-1]:] 88 | rr.columns = [ticker, 'equal weighting', 'complexity weighting', 'prior weighting'] 89 | 90 | # Evaluate from 2012 for comparability 91 | rr = rr.loc['2012-01-01':] 92 | 93 | # Do the plot 94 | tr = ((rr+1).cumprod()-1)*100 95 | tr.plot(title='Total trading returns of the speculative complexity strategy in '+ticker, color=['red', 'black', 'blue', 'green'], ylabel='return in percent') 96 | plt.grid(visible=True, which='both') 97 | plt.show() 98 | 99 | # and print the total trading return, along with the window end-points 100 | print("End-points of windows are: ", nroll[0], nroll[-1]) 101 | print(tr.iloc[-1]) 102 | 103 | # tr[[ticker, 'prior weighting']].plot(title='Total trading returns of the speculative complexity strategy in '+ticker, color=['red', 'blue'], ylabel='return in percent') 104 | # plt.grid(visible=True, which='both') 105 | # plt.show() -------------------------------------------------------------------------------- /inflation_drivers.inp: -------------------------------------------------------------------------------- 1 | # 2 | # Gretl code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/07/05/lead-me-not-into-inflation/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Open the data file 12 | open \ 13 | "P:/pCloud Sync/Papers/Own ideas/Blog/US other/Inflation plot/fredgraph.xls" \ 14 | --sheet=2 15 | 16 | # Full sample 17 | smpl full 18 | 19 | # Generate three plots 20 | gnuplot Inflation Freight --time-series --with-lines --output=display \ 21 | { set title "US Inflation and US PPI Inflation of Deep Sea Freight"; \ 22 | set linetype 1 lc rgb "black"; set linetype 2 lc rgb "red"; } 23 | 24 | gnuplot Inflation Energy --time-series --with-lines --output=display \ 25 | { set title "US Inflation and Global Price of Energy"; \ 26 | set linetype 1 lc rgb "black"; set linetype 2 lc rgb "red"; } 27 | 28 | gnuplot Inflation Energy Freight --time-series --with-lines --output=display \ 29 | { set title "US Inflation, Global Price of Energy and Deep Sea Freight Inflation"; \ 30 | set linetype 1 lc rgb "black"; set linetype 2 lc rgb "red"; set linetype 3 lc rgb "blue"; } 31 | 32 | # Compute and plot rolling cross-correlations, experiment with the lags; 33 | # "optimal" looks at 2 months 34 | scalar roll = 15 35 | scalar lag = 2 36 | scalar tobs = $nobs 37 | series Inflation_Freight = NA 38 | series Inflation_Energy = NA 39 | series Inflation_FFRate = NA 40 | 41 | # The loop to compute the cross-correlations from the beginning of the sample; 42 | # no need to invoke dates here... 43 | loop i=roll..tobs 44 | smpl i-roll+1 i 45 | Inflation_Freight[i] = corr(Inflation, Freight(-lag))*100 46 | Inflation_Energy[i] = corr(Inflation, Energy(-lag))*100 47 | Inflation_FFRate[i] = corr(Inflation, FFRate(-lag))*100 48 | endloop 49 | 50 | # Reset the sample 51 | smpl roll ; 52 | 53 | # Generate three additional plots 54 | gnuplot Inflation Inflation_Freight --time-series --with-lines --output=display \ 55 | { set title "US Inflation and Rolling Cross-Correlation PPI Inflation of Deep Sea Freight"; \ 56 | set linetype 1 lc rgb "black"; set linetype 2 lc rgb "red"; } 57 | 58 | gnuplot Inflation Inflation_Energy --time-series --with-lines --output=display \ 59 | { set title "US Inflation and Rolling Cross-Correlation with Global Price of Energy"; \ 60 | set linetype 1 lc rgb "black"; set linetype 2 lc rgb "red"; } 61 | 62 | gnuplot Inflation Inflation_FFRate --time-series --with-lines --output=display \ 63 | { set title "US Inflation and Rolling Cross-Correlation with Federal Funds Rate"; \ 64 | set linetype 1 lc rgb "black"; set linetype 2 lc rgb "red"; } 65 | 66 | # Reset the sample again, prepare for forecasting exercise 67 | smpl full 68 | 69 | # Now, we shall do a forecasting exercise based on these cross correlations 70 | scalar froll = 24 71 | series yf1 = NA 72 | series yf2 = NA 73 | series yf3 = NA 74 | series yf4 = NA 75 | 76 | # Select starting points correctly, must be at or after 2017:04 77 | id = 2019:12 78 | ed = 2023:05 79 | t_start = obsnum(id)+froll 80 | t_end = obsnum(ed)-1 81 | 82 | # The loop for the forecasting exercise 83 | loop i=t_start..t_end 84 | smpl i-froll+1 i 85 | ols Inflation const Inflation(-1) 86 | omit --auto=0.1 87 | smpl i+1 i+1 88 | fcast --static 89 | yf1[i] = $fcast 90 | # 91 | smpl i-froll+1 i 92 | ols Inflation const Inflation(-1) Energy(-12) Freight(-12) 93 | omit --auto=0.1 94 | smpl i+1 i+1 95 | fcast --static 96 | yf2[i] = $fcast 97 | # 98 | smpl i-froll+1 i 99 | ols Inflation const Inflation(-1) Inflation_Energy(-12) 100 | omit --auto=0.1 101 | smpl i+1 i+1 102 | fcast --static 103 | yf3[i] = $fcast 104 | # 105 | smpl i-froll+1 i 106 | ols Inflation const Inflation(-1) Inflation_Energy(-12) Inflation_Freight(-12) 107 | omit --auto=0.1 108 | smpl i+1 i+1 109 | fcast --static 110 | yf4[i] = $fcast 111 | endloop 112 | 113 | smpl t_start+1 t_end+1 114 | series ef1 = Inflation - yf1 115 | series ef2 = Inflation - yf2 116 | series ef3 = Inflation - yf3 117 | series ef4 = Inflation - yf4 118 | summary ef1 ef2 ef3 ef4 --simple 119 | 120 | scalar mse1 = mean(ef1^2) 121 | scalar mse2 = mean(ef2^2) 122 | scalar mse3 = mean(ef3^2) 123 | scalar mse4 = mean(ef4^4) 124 | 125 | scalar mae1 = mean(abs(ef1)) 126 | scalar mae2 = mean(abs(ef2)) 127 | scalar mae3 = mean(abs(ef3)) 128 | scalar mae4 = mean(abs(ef4)) 129 | -------------------------------------------------------------------------------- /speculative bias learning.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/speculative-bias-learning/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import yfinance 16 | 17 | # Select ticker and frequency of rebalancing 18 | ticker = 'BTC-USD' 19 | freq = '1d' 20 | # Select cut-off date 21 | start_from = '2022-01-01' 22 | # Get data and returns 23 | data = yfinance.download(ticker, period='max', interval=freq)['Adj Close'].dropna() 24 | data = data.loc[start_from:] 25 | rets = data.pct_change().dropna() 26 | y = rets.apply(np.sign) 27 | 28 | # Prepare storage 29 | e_store_reg = pd.DataFrame(data=None, index=rets.index, columns=['ex', 'ey']) 30 | e_store_bin = pd.DataFrame(data=None, index=rets.index, columns=['ex', 'ey']) 31 | rets_reg = pd.DataFrame(data=None, index=rets.index, columns=['B&H', 'Mean', 'Bias Learning']) 32 | rets_bin = pd.DataFrame(data=None, index=rets.index, columns=['B&H', 'Mean', 'Bias Learning']) 33 | 34 | # Number of observations and printing tracker 35 | nobs = rets.shape[0] 36 | try_max_reg = 1 37 | try_max_bin = 1 38 | 39 | # A double loop for searching 40 | for yroll in np.arange(3, 12, 1): 41 | for broll in np.arange(2, 6, 1): 42 | # Gamma parameter is fixed, you can experiment on it! 43 | gamma = -1.0 44 | 45 | # A loop over the observations 46 | for i in np.arange(0, nobs-yroll, 1): 47 | ri = rets.iloc[i:(i + yroll)] 48 | ra = rets.iloc[i + yroll] 49 | yi = y.iloc[i:(i + yroll)] 50 | ya = y.iloc[i + yroll] 51 | 52 | # Compute the input forecasts 53 | xf_reg = np.sign(ri.mean()) 54 | xf_bin = np.sign(yi.mean()) 55 | ex_reg = ra - xf_reg 56 | ex_bin = ya - xf_bin 57 | 58 | # and do the updating 59 | if i == 0: 60 | yf_reg = xf_reg 61 | yf_bin = xf_bin 62 | ey_reg = 0 63 | ey_bin = 0 64 | e_store_reg.iloc[i + yroll] = np.hstack([ex_reg, ey_reg]) 65 | e_store_bin.iloc[i + yroll] = np.hstack([ex_bin, ey_bin]) 66 | else: 67 | if (i <= broll): 68 | bias_reg = e_store_reg.iloc[:(i + yroll), 0].mean() 69 | bias_bin = e_store_bin.iloc[:(i + yroll), 0].mean() 70 | else: 71 | bias_reg = e_store_reg.iloc[(i + yroll - broll):(i + yroll), 1].mean() 72 | bias_bin = e_store_bin.iloc[(i + yroll - broll):(i + yroll), 1].mean() 73 | # 74 | yf_reg = np.sign(xf_reg + gamma*bias_reg) 75 | yf_bin = np.sign(xf_bin + gamma*bias_bin) 76 | ey_reg = ra - yf_reg 77 | ey_bin = ya - yf_bin 78 | # 79 | e_store_reg.iloc[i + yroll] = np.hstack([ex_reg, ey_reg]) 80 | e_store_bin.iloc[i + yroll] = np.hstack([ex_bin, ey_bin]) 81 | 82 | # Compute the per period trading returns 83 | rets_reg.iloc[i + yroll] = ra*np.hstack([1, xf_reg, yf_reg]) 84 | rets_bin.iloc[i + yroll] = ra*np.hstack([1, xf_bin, yf_bin]) 85 | 86 | # Compute total trading returns 87 | tr_reg = (rets_reg + 1).prod() 88 | tr_bin = (rets_bin + 1).prod() 89 | 90 | # Do selecting printing and plotting 91 | if tr_reg['Bias Learning'] > try_max_reg: 92 | print('Regular returns with yroll =',yroll,'and broll =',broll) 93 | print(tr_reg) 94 | try_max_reg = tr_reg['Bias Learning'] 95 | (rets_reg + 1).cumprod().plot(title='NAV of $1 for the bias learning strategy for '+ticker+', continuous data', ylabel='NAV of $1', xlabel='Date', grid='both') 96 | plt.show() 97 | if tr_bin['Bias Learning'] > try_max_bin: 98 | print('Binary returns with yroll =',yroll,'and broll =',broll) 99 | print(tr_bin) 100 | try_max_bin = tr_bin['Bias Learning'] 101 | (rets_bin + 1).cumprod().plot(title='NAV of $1 for the bias learning strategy for '+ticker+', binary data', ylabel='NAV of $1', xlabel='Date', grid='both') 102 | plt.show() 103 | -------------------------------------------------------------------------------- /money demand and inflation forecasting.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/money-demand-and-inflation-forecasting/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import pandas_datareader as pdr 16 | import statsmodels.api as sm 17 | from statsmodels.tsa.ar_model import AutoReg 18 | 19 | # Get the data and starting date 20 | tickers = ['CPIAUCSL', 'CURRSL', 'DEMDEPSL', 'DPSACBM027SBOG'] 21 | raw_data = pdr.fred.FredReader(tickers, start='1992-01-01', end='2024-10-01').read() 22 | data = (raw_data/raw_data.iloc[0]) 23 | start_from = '2017-01-01' 24 | 25 | # Select the deposit measure 26 | deposits = 'DEMDEPSL' 27 | # 28 | if deposits == 'DEMDEPSL': 29 | deposit_type = 'Demand' 30 | else: 31 | deposit_type = 'Total' 32 | yy = data['CPIAUCSL'] 33 | xx = data['CURRSL']/data[deposits] 34 | y = yy.pct_change(periods=12).dropna()*100 35 | x = xx.pct_change(periods=12).dropna()*100 36 | xx = pd.concat([y, x], axis=1) 37 | xx.columns = ['Inflation', 'Currency-to-'+deposit_type+' Deposits'] 38 | xx = xx.dropna() 39 | # Crop the data 40 | xx = xx.loc[start_from:] 41 | 42 | # Prepare to find the cross-correlations 43 | store_ccor_inf = pd.DataFrame(data=None, index=np.arange(0, 25, 1), columns=['Cross-Correlation']) 44 | # 45 | for i in np.arange(0, 25, 1): 46 | store_ccor_inf.iloc[i, 0] = xx['Inflation'].corr(xx['Currency-to-'+deposit_type+' Deposits'].shift(periods=i))*100 47 | # plot the lagged-cross correlations 48 | store_ccor_inf.plot(title='Cross-correlations of lagged Currency-to-'+deposit_type+' Deposits and inflation', xlabel='Lag', ylabel='Correlation', grid='both') 49 | plt.show() 50 | 51 | # extract max cross-correlation 52 | max_ccor_inf = store_ccor_inf.apply(lambda x: np.argmax(np.abs(x)), axis=0) 53 | 54 | # Estimate a delay regression - make sure to use the xx dataframe 55 | yt = xx['Inflation'].iloc[max_ccor_inf.iloc[0]:] 56 | xt = sm.add_constant(xx['Currency-to-'+deposit_type+' Deposits'].shift(periods=max_ccor_inf.iloc[0])).dropna() 57 | mod = sm.OLS(endog=yt, exog=xt).fit() 58 | xf = sm.add_constant(xx['Currency-to-'+deposit_type+' Deposits'].iloc[-max_ccor_inf.iloc[0]:]) 59 | frc = mod.predict(exog=xf) 60 | # Print the estimated delay regression and corresponding forecasts 61 | print(mod.summary()) 62 | print(frc) 63 | 64 | # Expand the sample, required for the dynamic part of the forecast and the plotting 65 | extra = pd.date_range(start='2024-09-01', periods=max_ccor_inf.iloc[0], freq='MS') 66 | extra_sample = pd.date_range(start=xx.index[0], end=extra[-1], freq='MS') 67 | 68 | # You can optionally adjust the forecasts for the dynamic term - I did this 69 | # in the most simplistic of ways, by an AR(1) model on the residuals of the 70 | # delay regression above 71 | do_dynamic = False 72 | if do_dynamic: 73 | res = AutoReg(endog=mod.resid, lags=1).fit() 74 | res_frc = res.predict(start='2024-09-01', end=extra.to_flat_index()[-1]) 75 | if do_dynamic: 76 | frc.index = res_frc.index 77 | frc = frc + res_frc 78 | 79 | # Next plot the aligned series plus the corresponding forecasts 80 | # 81 | # Expand the sample 82 | extra = pd.date_range(start='2024-09-01', periods=max_ccor_inf.iloc[0], freq='MS') 83 | extra_sample = pd.date_range(start=xx.index[0], end=extra[-1], freq='MS') 84 | z = pd.DataFrame(data=None, index=extra_sample, columns=['Inflation', 'Currency-to-'+deposit_type+' Deposits']) 85 | z['Inflation'] = xx['Inflation'] 86 | z['Currency-to-'+deposit_type+' Deposits'] = xx['Currency-to-'+deposit_type+' Deposits'] 87 | z['Currency-to-'+deposit_type+' Deposits'] = z['Currency-to-'+deposit_type+' Deposits'].shift(periods=max_ccor_inf.iloc[0]) 88 | z = z.iloc[max_ccor_inf.iloc[0]:] 89 | z.loc['2024-09-01':,'Inflation'] = frc.to_numpy() 90 | # 91 | ax1 = z['Currency-to-'+deposit_type+' Deposits'].plot(title='Inflation and Currency-to-'+deposit_type+' Deposits lagged '+str(max_ccor_inf.iloc[0])+' months', xlabel='Date', ylabel='Currency-to-'+deposit_type+' Deposits', color=['green']) 92 | ax1.xaxis.grid(True, which='major') 93 | ax1.yaxis.grid(True, which='major') 94 | ax1.legend(loc='lower left') 95 | ax2 = ax1.twinx() 96 | z['Inflation'].plot(ax=ax2,color=['blue'], ylabel='Inflation') 97 | ax2.legend(loc='lower right') 98 | plt.show() 99 | 100 | -------------------------------------------------------------------------------- /supersized_volatility_and_volume.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/09/17/supersized-volatility-and-volume-as-signal-enhancers/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the required libraries 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import scipy as sp 16 | import yfinance as yf 17 | import math 18 | 19 | # A set of functions to obtain the iterated logarithm 20 | def _log(x, base=math.e): 21 | return int(np.log(x) / np.log(base)) 22 | 23 | def recursiveLogStar(n, b=math.e): 24 | if n > 1.0: 25 | return 1.0 + recursiveLogStar(_log(n, b), b); 26 | else: 27 | return 0 28 | 29 | def logstar(x): 30 | n = len(x) 31 | z = np.zeros([n, 1]) 32 | for i in range(n): 33 | z[i] = recursiveLogStar(x[i]) # + np.log(2.865064) the extra constant not needed 34 | return z 35 | 36 | # A function to get the complexity-weighted sample mean, with complexity measured by the sample size 37 | def complexity_weighted_mean(x, nroll, weight_type=0): 38 | z = pd.DataFrame(data=None, index=x.index, columns=nroll) 39 | if weight_type == 0: 40 | scale = len(nroll) 41 | for i in range(len(nroll)): 42 | z[nroll[i]] = (x.rolling(window=nroll[i]).mean()) 43 | elif weight_type == 1: 44 | scale = (2.0**(-nroll)).sum() 45 | for i in range(len(nroll)): 46 | z[nroll[i]] = (2.0**(-nroll[i]))*(x.rolling(window=nroll[i]).mean()) 47 | elif weight_type == 2: 48 | scale = (2.0**(-logstar(nroll))).sum() 49 | for i in range(len(nroll)): 50 | z[nroll[i]] = (2.0**(-recursiveLogStar(nroll[i])))*(x.rolling(window=nroll[i]).mean()) 51 | return z.sum(axis=1)/scale 52 | 53 | # Select asset to work with and parametrization 54 | ticker = 'TNA' 55 | set_R1 = 2 56 | set_RM = 12 57 | # Select averaging type, 0 for equal, 1 for complexity, 2 for prior 58 | set_avg_type = 1 59 | # Select type of variable boosting, 'volatility', 'volume' or 'both' 60 | set_booster = 'both' 61 | # Set frequency of data, '1d', '1wk', '1mo' 62 | set_freq = '1wk' 63 | # Set starting date 64 | set_start = '2021-01-01' 65 | 66 | # Download the data, note the weekly, '1wk' frequency below 67 | data = yf.download(ticker, period='max', interval=set_freq)[['High', 'Low', 'Adj Close', 'Volume']].dropna() 68 | data = data.loc[set_start:] 69 | 70 | # Get the variables, pure returns first 71 | r = data['Adj Close'].pct_change().dropna() 72 | 73 | # Then the booster variables 74 | if set_booster == 'volatility': 75 | boost = (data['High']/data['Low'].shift(periods=1)-1) 76 | elif set_booster == 'volume': 77 | boost = data['Volume']/data['Volume'].shift(periods=1) 78 | elif set_booster == 'both': 79 | h = (data['High']/data['Low'].shift(periods=1)-1) 80 | v = data['Volume']/data['Volume'].shift(periods=1) 81 | boost = h*v 82 | # OK, now get the booster-augmented returns 83 | y = r*boost 84 | 85 | # Get the rolling windows 86 | nroll = np.arange(set_R1, set_RM+1, 1) 87 | roll = nroll[0] 88 | 89 | # Compute the rolling means and apply rolling window averaging, with and without the booster variable 90 | # 91 | # First, without the boosting 92 | mu0 = complexity_weighted_mean(r, nroll, set_avg_type) 93 | # and then with the boosting 94 | mu1 = complexity_weighted_mean(y, nroll, set_avg_type) 95 | 96 | # Trade next, easy! 97 | str0 = r*(mu0.shift(periods=1).apply(np.sign)) 98 | str1 = r*(mu1.shift(periods=1).apply(np.sign)) 99 | 100 | # Collect the results, remove impact of maximum rolling window 101 | rr = pd.concat([r, str0, str1], axis=1).iloc[nroll[-1]:] 102 | rr.columns = [ticker, 'spec.complexity', 'supersized'] 103 | 104 | # Do the plot 105 | tr = ((rr+1).cumprod()-1)*100 106 | tr.plot(title='Total trading returns of the speculative complexity strategy supersized for '+ticker, color=['red', 'black', 'blue', 'green'], ylabel='return in percent') 107 | plt.grid(visible=True, which='both') 108 | plt.show() 109 | 110 | # and print the total trading return, along with the window end-points 111 | print("End-points of windows are: ", nroll[0], nroll[-1]) 112 | print(tr.iloc[-1]) 113 | 114 | # tr[[ticker, 'supersized']].plot(title='Total trading returns of the speculative complexity strategy in '+ticker, color=['red', 'blue'], ylabel='return in percent') 115 | # plt.grid(visible=True, which='both') 116 | # plt.show() -------------------------------------------------------------------------------- /method in investment.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/11/17/method-in-investment/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | import pandas as pd 13 | import yfinance as yf 14 | 15 | # A function to compute the maximum drawdown 16 | def max_dd(wealth): 17 | maxwealth = wealth.cummax(axis=0) 18 | drawdowns = wealth/maxwealth - 1 19 | return drawdowns.min(axis=0) 20 | 21 | # Select a ticker to analyze, always weekly frequency 22 | ticker = 'QQQ' 23 | data = yf.download(ticker, period='max', interval='1wk')['Adj Close'].dropna() 24 | rets = data.pct_change().dropna().loc['2019-01-01':] 25 | 26 | # Get the 3- and 10-week rolling momentum 27 | m3 = rets.rolling(window=3).apply(lambda x: (x+1).prod()-1) 28 | m10= rets.rolling(window=10).apply(lambda x: (x+1).prod()-1) 29 | # Put data together 30 | all = pd.concat([rets, m3, m10], axis=1).dropna() 31 | all.columns = [ticker, ticker+'-M3', ticker+'+M10'] 32 | # Get number of observations 33 | nobs = all.shape[0] 34 | 35 | # Set parameters 36 | alpha = 0.02 37 | alpha2= alpha/2 38 | # 39 | iniK = 100 40 | K0 = iniK*(1+all.iloc[0,0]) 41 | 42 | # Initialize storage 43 | store = pd.DataFrame(data=None, index=all.index, columns=['The Method', ticker, 'AddFunds', 'TakeProfit']) 44 | store.iloc[0,:] = np.hstack([K0, K0, 0, 0]) 45 | add = 0 46 | 47 | # Loop over the observations 48 | for i in range(nobs-1): 49 | # First, check for taking profit at the 6% return over a month 50 | if i > 3: 51 | r_i = (store.iloc[i,0]/store.iloc[i-3,0])-1 52 | if r_i >= 0.06 and K0 > 0: 53 | # Take half-of-alpha for profit 54 | TakeProfit = alpha2*K0 55 | # Redefine available capital 56 | K0 = K0 - TakeProfit 57 | # Store profit taken 58 | store.iloc[i, 3] = TakeProfit 59 | else: 60 | TakeProfit = 0 61 | store.iloc[i, 3] = TakeProfit 62 | 63 | # Next, check for adding capital when momentum is negative 64 | x_i = all.iloc[i,:] 65 | # This happens as an OR 66 | if x_i.iloc[1] < -0.03 or x_i.iloc[2] < -0.1: 67 | # Compute total profit from inception 68 | TotalProfit = K0 - iniK 69 | # If the profit is positive then add alpha fraction of new funds 70 | if TotalProfit > 0: 71 | K1 = ((1+alpha)*K0 - alpha*iniK)*(1+all.iloc[i+1,0]) 72 | add = (K0 - iniK)*alpha 73 | else: 74 | K1 = K0*(1+all.iloc[i+1,0]) 75 | add = 0 76 | # and when momentum is positive, short the market 77 | elif x_i.iloc[1] > 0.03 and x_i.iloc[2] > 0.1: 78 | # Compute total profit from inception 79 | TotalProfit = K0 - iniK 80 | # If the profit is positive then short the market 81 | if TotalProfit > 0: 82 | K1 = K0*(1-alpha*all.iloc[i+1,0]) 83 | else: 84 | K1 = K0*(1+all.iloc[i+1,0]) 85 | add = 0 86 | else: 87 | K1 = K0*(1+all.iloc[i+1,0]) 88 | add = 0 89 | 90 | # Compute the benchmark return, store and move on 91 | benchK = store.iloc[i,1]*(1+all.iloc[i+1,0]) 92 | store.iloc[i+1,:3] = np.hstack([K1, benchK, add]) 93 | K0 = K1 94 | 95 | # Lets compute the maximum drawdown 96 | wealth = store[['The Method', ticker]] 97 | maxDD = wealth.apply(max_dd) 98 | 99 | # Do a performance plot 100 | wealth.plot(title='Method-in-Investment strategy for '+ticker, xlabel='Date', 101 | ylabel='Value of investment of '+str(iniK)+'$', grid='both') 102 | plt.show() 103 | 104 | # and another plot with the funds added and take profit taken 105 | in_and_out = store[['AddFunds', 'TakeProfit']] 106 | in_and_out.cumsum().plot(title='Method-in-Investment deposits & withdrawals for '+ticker, xlabel='Date', 107 | ylabel='Value in $', grid='both') 108 | plt.show() 109 | 110 | # Compute the annual deposits and withdrawals 111 | annual = in_and_out.groupby(by=in_and_out.index.year).sum() 112 | 113 | # Print everything 114 | print('TIcker:', ticker) 115 | print('Total wealth of strategy for initial investment of '+str(iniK)+'$ = ', wealth.iloc[-1, 0]) 116 | print('Total wealth of benchmark for initial investment of '+str(iniK)+'$ = ', wealth.iloc[-1, 1]) 117 | print('Maximum drawdown for strategy = ', maxDD[0]) 118 | print('Maximum drawdown for benchmark = ', maxDD[1]) 119 | # 120 | print(annual) 121 | print(annual.median()) 122 | print(in_and_out.cumsum()) 123 | -------------------------------------------------------------------------------- /complexity_weighted_forecasting.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/10/03/a-complex-neighbor-out-of-sample/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the required libraries 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import scipy as sp 16 | import pandas_datareader as pdr 17 | import math 18 | import datetime 19 | from dateutil.relativedelta import relativedelta 20 | from sklearn.neighbors import NearestNeighbors 21 | 22 | # A set of functions to obtain the iterated logarithm 23 | def _log(x, base=math.e): 24 | return int(np.log(x) / np.log(base)) 25 | 26 | def recursiveLogStar(n, b=math.e): 27 | if n > 1.0: 28 | return 1.0 + recursiveLogStar(_log(n, b), b); 29 | else: 30 | return 0 31 | 32 | def logstar(x): 33 | n = len(x) 34 | z = np.zeros([n, 1]) 35 | for i in range(n): 36 | z[i] = recursiveLogStar(x[i]) # + np.log(2.865064) the extra constant not needed 37 | return z 38 | 39 | # A function to apply complexity-based weighted for computing a weighted mean 40 | def cwm(x, h_ahead, weight_type=0): 41 | if weight_type == 0: 42 | z = x.mean() 43 | elif weight_type == 1: 44 | w = 2.0 ** (-np.arange(1, h_ahead+1, 1)) 45 | z = (w*x).sum()/w.sum() 46 | elif weight_type == 2: 47 | w = (2.0**(-logstar(np.arange(1, h_ahead+1, 1)))).flatten() 48 | z = (w*x).sum()/w.sum() 49 | return z 50 | 51 | # Download some data - for the post I used: 52 | # the US CPI-based inflation CPIAUCSL 53 | # the global price of wheat PWHEAMTUSDM 54 | # the global price of Brent oil POILBREUSDM 55 | # 56 | fred_tickers = ['POILBREUSDM', 'PWHEAMTUSDM', 'CPIAUCSL'] 57 | 58 | # Select if you want to forecast the growth rate or the level of the series but be careful: 59 | # strictly speaking, the nearest neighbors require a stationary series to work well... 60 | do_growth = True 61 | # set periods for growth rate if true 62 | set_periods = 12 63 | # Set number of steps-ahead for your forecast 64 | h_ahead = 24 65 | # Set the type of weighting 66 | set_weighting = 2 67 | 68 | # and do a quick loop over the tickers to produce results 69 | for ticker in fred_tickers: 70 | data = pdr.fred.FredReader(ticker, start='1990-01-01').read() 71 | if do_growth: 72 | y = data.pct_change(periods=set_periods).dropna()*100 73 | else: 74 | y = data 75 | 76 | # Get the nearest neighbors of the whole series 77 | knn = NearestNeighbors(n_neighbors=h_ahead+1) 78 | z = knn.fit(y) 79 | distance_mat, neighbours_mat = knn.kneighbors(y) 80 | # Extract the nearest neighbors of the last observation 81 | last = neighbours_mat[-1, 1:] 82 | 83 | # Create a new dates index 84 | tf = pd.date_range(y.index[-1]+relativedelta(months=1), periods=h_ahead, freq=y.index.inferred_freq) 85 | # and a dataframe to hold the nearest neighbors 86 | zf = pd.DataFrame(data=None, index=tf, columns=np.arange(1, h_ahead+1, 1)) 87 | # Fill-in via a quick loop 88 | for i in np.arange(1, h_ahead+1, 1): 89 | seq = np.arange((last[i-1]+1),(last[i-1]+(h_ahead+1))) 90 | if any(seq >= y.shape[0]): 91 | set_len = np.where(seq < y.shape[0])[0] 92 | zf.iloc[set_len, i-1] = y.iloc[(last[i-1]+1):(last[i-1]+(h_ahead+1))].to_numpy() 93 | else: 94 | zf[i] = y.iloc[(last[i-1]+1):(last[i-1]+(h_ahead+1))].to_numpy() 95 | 96 | # And apply complexity-based weighting across the nearest neighbors 97 | yf = zf.apply(cwm, args=(h_ahead, set_weighting,), axis=1) 98 | yf = pd.DataFrame(data=yf, index=yf.index, columns=y.columns) 99 | 100 | # Print the forecast 101 | print(yf) 102 | 103 | # Merge the historical observations with the forecast, note the trick 104 | yt = pd.concat([y, yf], axis=1) 105 | yt.columns = np.hstack([y.columns.tolist(), y.columns+' forecast']) 106 | # Fill in the last historical value in the forecast 107 | yt.iloc[y.shape[0]-1,1] = y.iloc[-1] 108 | 109 | # Plot the last few values, including the forecast 110 | plot_last = 60 111 | if do_growth: 112 | set_title = 'Actual values and future forecast for '+str(set_periods)+'-period growth rate' 113 | set_ylabel = 'percent' 114 | else: 115 | set_title = 'Actual values and future forecast' 116 | # 117 | yt.iloc[-plot_last:].plot(grid='both', title=set_title, xlabel='Date', ylabel=set_ylabel) 118 | plt.show() -------------------------------------------------------------------------------- /nn_tools.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/07/29/the-speculative-neighbor/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | import numpy as np 12 | import statsmodels.api as sm 13 | 14 | 15 | def get_trajectory(x, k): 16 | """ 17 | Compute the trajectory matrix of a matrix given a memory parameter 18 | 19 | :param x: array of data 20 | :param k: scalar, memory order 21 | :return: the trajectory matrix 22 | """ 23 | if len(x.shape) == 1: 24 | x = x.reshape(-1, 1) 25 | if k == 1: 26 | return x 27 | elif k > 1: 28 | y = np.zeros([x.shape[0] - k + 1, k * x.shape[1]]) 29 | for i in range(x.shape[0] - k + 1): 30 | y[i, :] = np.hstack(x[i:(i + k), :]) 31 | return y 32 | else: 33 | raise ValueError('In function get_trajectory the memory order must be >= 1') 34 | 35 | 36 | def get_nn(x, p=2, alpha=0.8, step=1): 37 | """ 38 | Compute the nearest neighbors to the last value 39 | 40 | :param x: array of data, vector or matrix 41 | :param p: scalar, distance exponent (-1 or 1 or 2) for the NN; -1 is plain matching 42 | :param alpha: scalar, between 0 and 1, % of NN to retain from sample 43 | :param step: scalar, steps to subsample NN 44 | :return: the indices in the input matrix with the nearest neighbors 45 | """ 46 | if len(x.shape) == 1: 47 | x = x.reshape(-1, 1) 48 | 49 | # Extract the last row from the trajectory matrix 50 | x_last = x[-1, :] 51 | # Compute all the distances, sort and extract the indices 52 | if p < 0: 53 | similarity_score = np.sum(np.logical_and(x[:-1, :], x_last), axis=1) 54 | winner_pos = np.argwhere(similarity_score >= -p).flatten() 55 | if len(winner_pos) == 0: 56 | return None 57 | _aux = np.argsort(similarity_score[winner_pos]) 58 | isd = winner_pos[_aux][::-1] 59 | else: 60 | if p == 0: 61 | dxt = np.sum(np.abs(x[:-1, :] - x_last), axis=1) 62 | else: 63 | dxt = np.sum((np.abs(x[:-1, :] - x_last) ** p), axis=1) ** (1 / p) 64 | isd = dxt.argsort() 65 | isd = isd[np.where(isd < len(x))] 66 | isd = isd[:int(len(isd) * alpha) + (1 if p < 0 else 0)] 67 | # Subsample? 68 | if step > 1: 69 | ss = np.arange(0, len(isd), step) 70 | isd = isd[ss] 71 | # Done - note that the index of the closest NN is the first in isd! 72 | return isd 73 | 74 | 75 | def get_nn_forecast(x, isd, beta=0.2, nn_type=None): 76 | """ 77 | Compute a modified NN by averaging around/forward/backward of each NN point 78 | 79 | :param x: vector of target variables 80 | :param isd: the indices of the nearest neighbors to account for 81 | :param beta: scalar, between 0 and 1, % of observations to retain around each NN point 82 | :param nn_type: string, the type of forecast to produce. If None, the mean of nn is returned 83 | :return: the array of individual NN forecasts and their mean 84 | """ 85 | isd = isd[isd < x.shape[0]] 86 | if len(x.shape) == 1: 87 | x = x.reshape((-1, 1)) 88 | z = x[isd, :] 89 | 90 | if nn_type is None: 91 | return z.mean(axis=0) 92 | 93 | f = np.zeros(z.shape) # this is the modified h-step ahead data 94 | for i in range(isd.shape[0]): 95 | ix = isd[i] 96 | if nn_type == 'center': 97 | i_min = max(0, int(ix - len(x)*beta)) 98 | i_max = min(int(ix + x.shape[0] * beta), x.shape[0]) 99 | elif nn_type == 'forward': 100 | i_min = ix 101 | i_max = min(int(ix + x.shape[0] * beta), x.shape[0]) 102 | elif nn_type == 'backward': 103 | i_min = max(0, int(ix - len(x)*beta)) 104 | i_max = ix 105 | elif nn_type == 'regress': 106 | i_min = ix 107 | i_max = min(int(ix + x.shape[0] * beta), x.shape[0]) 108 | else: 109 | raise ValueError('In wrong value passed in parameter .') 110 | if nn_type != 'regress': 111 | f[i] = np.mean(x[i_min:i_max, :], axis=0) 112 | elif nn_type == 'regress': 113 | xi = x[i_min:i_max, :] 114 | if len(xi) > 3: 115 | w = sm.add_constant(xi[:-1, :]) 116 | y = xi[1:, -1] 117 | out = sm.OLS(y, w).fit() 118 | f[i] = out.params[0] + np.sum(out.params[1:] * x[-1, :]) 119 | else: 120 | f[i] = np.mean(xi, axis=0) 121 | return f.mean(axis=0) 122 | -------------------------------------------------------------------------------- /the-speculative-regression.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/10/24/the-speculative-regression/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the required libraries 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import yfinance as yf 16 | import statsmodels.api as sm 17 | 18 | # Linear model, least squares or robust, from statsmodels with sequential eliminination based on p-values 19 | def sequential_elimination_lm(set_Y, set_X, set_alpha, robust=False): 20 | if robust: 21 | out = sm.RLM(endog=set_Y, exog=set_X).fit() 22 | else: 23 | out = sm.OLS(endog=set_Y, exog=set_X, hasconst=True).fit() 24 | pv_old = out.pvalues 25 | ip_old = pv_old[pv_old <= set_alpha[0]].index 26 | 27 | # and with a simple loop remove the rest in the proper way with diminishing p-values 28 | for aa in np.arange(1, len(set_alpha)): 29 | xa = set_X[ip_old] 30 | ya = set_Y 31 | if robust: 32 | out = sm.RLM(endog=set_Y, exog=xa).fit() 33 | else: 34 | out = sm.OLS(endog=ya, exog=xa, hasconst=True).fit() 35 | pv_new = out.pvalues 36 | ip_new = pv_new[pv_new <= set_alpha[aa]].index 37 | if len(ip_new) > 0: 38 | pv_old = pv_new 39 | ip_old = ip_new 40 | 41 | # and this is the final model 42 | xa = set_X[ip_old] 43 | ya = set_Y 44 | out = sm.OLS(endog=ya, exog=xa, hasconst=True).fit() 45 | 46 | # Done! 47 | return out 48 | 49 | # A simple regression predictor based on the above function and the data structure of the post 50 | def srp(data, alpha, robust): 51 | y = data.iloc[:, 0] 52 | x = sm.add_constant(data.iloc[:, 1:]) 53 | model = sequential_elimination_lm(y.iloc[1:], x.shift(periods=1).iloc[1:], alpha, robust) 54 | beta = model.params 55 | xfor = x.iloc[-1] 56 | fcst = (beta.mul(xfor)).sum() 57 | return np.sign(fcst) 58 | 59 | # Download some data, for the post I used SSO, USO, DBB, LQD, WEAT, IYR - please see the note on the 60 | # setting of the diminishing p-values later in the code!! 61 | ticker = 'IYR' 62 | data = yf.download(ticker, period='max', interval='1d')['Adj Close'].dropna() 63 | r = data.pct_change().dropna().loc['2022-01-01':] # you can change this of course 64 | r.name = ticker 65 | 66 | # Compute the necessary variables 67 | dr = r.diff() 68 | z1p = ((dr > 0) & (r > 0)).astype(float) 69 | z2p = ((dr <= 0) & (r > 0)).astype(float) 70 | z1n = ((dr <= 0) & (r <= 0)).astype(float) 71 | z2n = ((dr > 0) & (r <= 0)).astype(float) 72 | # Convert the zeroes to -ones, this is important!! 73 | z1p.iloc[z1p == 0] = -1.0 74 | z2p.iloc[z2p == 0] = -1.0 75 | z1n.iloc[z1n == 0] = -1.0 76 | z2n.iloc[z2n == 0] = -1.0 77 | # Put together 78 | z_all = pd.concat([r, dr, z1p, z2p, z1n, z2n], axis=1).dropna() 79 | z_all.columns = [ticker, 'D-'+ticker, 'Z1+', 'Z2+', 'Z1-', 'Z2-'] 80 | 81 | # Number of observations and initial window 82 | nobs = z_all.shape[0] 83 | ini_wind = 21 84 | 85 | # Select a sequence of p-values; this can be changed or fine-tuned 86 | set_alpha = [0.9, 0.7, 0.5, 0.25, 0.125, 0.0625] # used the full sequence for WEAT & IYR only, else used [0.9, 0.7] 87 | 88 | # Use robust estimation? 89 | use_robust = False 90 | 91 | # and initialize storage 92 | store = pd.DataFrame(data=None, index=z_all.index, columns=[ticker, 'Speculative Regression']) 93 | 94 | # Run a simple loop to get the signals and the strategy returns 95 | for i in np.arange(0, nobs-ini_wind, 1): 96 | z_i = z_all.iloc[:(i+ini_wind),:] # this is recursive estimation, change to i:(i+ini_wind) for rolling!! 97 | z_f = srp(z_i, set_alpha, use_robust) 98 | bnh = z_all.iloc[i+ini_wind, 0] 99 | stg = bnh*z_f 100 | store.iloc[i+ini_wind, :] = np.hstack([bnh, stg]) 101 | 102 | # Compute the cumulative return and plot 103 | cret = ((store + 1).cumprod() - 1)*100 104 | cret.plot(grid='both', title='The speculative regression strategy for '+ticker+' using daily returns', xlabel='Date', ylabel='return in percent') 105 | plt.savefig(ticker+'.png') 106 | plt.show() 107 | # 108 | print(cret.iloc[-1]) 109 | print(store.mean(axis=0)/store.std(axis=0)) 110 | 111 | #SSO -23.656021 112 | #Speculative Regression 122.367302 113 | #USO 26.183619 114 | #Speculative Regression 140.869522 115 | #DBB -21.432536 116 | #Speculative Regression 34.503916 117 | #LQD -17.359407 118 | #Speculative Regression 33.838079 119 | #WEAT -18.844565 120 | #Speculative Regression 49.009478 121 | #IYR -27.650459 122 | #Speculative Regression 17.98597 123 | -------------------------------------------------------------------------------- /rent or buy the profits run high.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/rent-or-buy-the-profits-run-high/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | import pandas as pd 13 | import scipy as sp 14 | import pandas_datareader as pdr 15 | import statsmodels.api as sm 16 | import yfinance as yf 17 | 18 | # A function to compute the maximum drawdown, input is a dataframe of cumulative returns 19 | def max_dd(crets): 20 | maxcret = (crets+1).cummax(axis=0) 21 | drawdowns = ((crets + 1) / maxcret) - 1 22 | return drawdowns.min(axis=0) 23 | 24 | # Another function to collect performance measures, input is a dataframe of returns 25 | def performance_measures(rets, f_factor, target_r=0): 26 | mu = rets.mean() * f_factor 27 | sd = rets.std() * np.sqrt(f_factor) 28 | sr = mu / sd 29 | er = target_r - rets 30 | cr = (rets+1).cumprod(axis=0) - 1 31 | md = max_dd(cr) 32 | stats = pd.DataFrame([mu, sd, sr, cr.iloc[-1], md]) 33 | stats.index = ['Mean', 'Std. Dev.', 'Sharpe', 'TR', 'MaxDD'] 34 | return stats.transpose(), cr 35 | 36 | # Read the economic data 37 | data1 = pdr.fred.FredReader(['CUSR0000SEHA', 'CSUSHPISA', 'CPIAUCSL'], start='2000-01-01', end='2024-01-31').read() 38 | 39 | # Read the financial data 40 | ticker = 'XLK' 41 | data2 = yf.download(ticker, start='2000-01-01', end='2024-01-31', interval='1mo').dropna()['Adj Close'] 42 | 43 | # Merge 44 | data = pd.concat([data1, data2], axis=1).dropna() 45 | data.columns = ['RentPI', 'HousePI', 'CPI', ticker] 46 | 47 | # Compute the explanatory variable 48 | data['RentPI/HousePI'] = data['RentPI']/data['HousePI'] 49 | 50 | # Compute growth rates 51 | use_data = data[[ticker, 'RentPI/HousePI']].pct_change().dropna()*100 52 | nobs = use_data.shape[0] 53 | 54 | # Select ols or robust estimation 55 | do_robust = False 56 | 57 | # Select a rolling window and the delay - I am using a direct search for the rolling window, you 58 | # can change this below 59 | set_roll = np.arange(6, 16, 1) 60 | delay = 2 61 | 62 | for roll in set_roll: 63 | 64 | # Initialize storage 65 | store = pd.DataFrame(data=None, index=use_data.index, columns=[ticker, 'Rent & Buy #1', 'Rent & Buy #2']) 66 | 67 | # Roll over 68 | for i in np.arange(0, nobs-roll-delay, 1): 69 | # Split the data 70 | di = use_data.iloc[i:(i+roll+delay):] 71 | 72 | # Compute the lags and align 73 | w = di['RentPI/HousePI'].shift(periods=delay).iloc[delay:] 74 | Iw = (w <= 0).astype(float) 75 | if all(Iw == 1): 76 | x = w 77 | else: 78 | x = pd.concat([w, Iw], axis=1) 79 | x = sm.add_constant(x) 80 | y = di[ticker].iloc[delay:] 81 | 82 | # Estimate and forecast with first model 83 | if do_robust: 84 | out = sm.RLM(endog=y, exog=sm.add_constant(w), hasconst=True, M=sm.robust.norms.TukeyBiweight()).fit() 85 | else: 86 | out = sm.OLS(endog=y, exog=sm.add_constant(w), hasconst=True).fit() 87 | bhat = out.params 88 | wf = di['RentPI/HousePI'].iloc[-delay] 89 | xhat = np.hstack([1, wf]) 90 | fhat1 = np.sum(bhat*xhat) 91 | 92 | # Estimate and forecast with second model 93 | if do_robust: 94 | out = sm.RLM(endog=y, exog=x, hasconst=True, M=sm.robust.norms.TukeyBiweight()).fit() 95 | else: 96 | out = sm.OLS(endog=y, exog=x, hasconst=True).fit() 97 | bhat = out.params 98 | wf = di['RentPI/HousePI'].iloc[-delay] 99 | Iwf = (wf <= 0).astype(float) 100 | if all(Iw == 1): 101 | xhat = np.hstack([1, wf]) 102 | else: 103 | xhat = np.hstack([1, wf, Iwf]) 104 | fhat2 = np.sum(bhat*xhat) 105 | 106 | # Trade, these trades are long-only, switch to signs for long-short 107 | bench = use_data[ticker].iloc[i+roll+delay]/100 108 | s1 = bench*(fhat1 > 0) # np.sign(fhat1) 109 | s2 = bench*(fhat2 > 0) # np.sign(fhat2) 110 | store.iloc[i+roll+delay] = np.hstack([bench, s1, s2]) 111 | 112 | store = store.dropna() 113 | stats, cr = performance_measures(store, 12, 0) 114 | print(roll) 115 | print(stats) 116 | if (ticker == 'SPY' and roll == 13) or (ticker == 'XLF' and roll == 11) or (ticker == 'XLE' and roll == 15) or (ticker == 'XLK' and roll == 11): 117 | cr.plot(title='The Rent & Buy Strategy vs the passive benchmark for '+ticker+', monthly data', xlabel='Date', ylabel='return in percent', grid='both') 118 | plt.show() 119 | -------------------------------------------------------------------------------- /a simple measure of economic activity and the z-strategy.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/a-simple-measure-of-real-economic-activity-the-z-strategy/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import pandas_datareader as pdr 16 | from scipy import signal, stats 17 | from statsmodels.tsa.stattools import acf 18 | from statsmodels.sandbox.stats import runs 19 | from statsmodels.stats.descriptivestats import sign_test 20 | import yfinance as yf 21 | 22 | # Get the data 23 | tickers = ['CUMFN', 'MDSP', 'GDPC1'] 24 | raw_data = pdr.fred.FredReader(tickers, start='1980-01-01', end='2024-10-01').read() 25 | 26 | # Get the two variables, the quarterly change in the contrast between capacity utilization and 27 | # mortage payments as % of disposable income 28 | y0 = (raw_data['CUMFN']-raw_data['MDSP']).diff(periods=1) 29 | # and the quarterly real GDP growth 30 | y1 = raw_data['GDPC1'].pct_change(periods=1)*100 31 | 32 | # Put together, give column names 33 | data = pd.concat([y0, y1], axis=1).dropna() 34 | data.columns = ['Capacity minus Mortgage Payments', 'Real GDP Growth'] 35 | 36 | # A full sample plot of the data 37 | data.plot(secondary_y='Capacity minus Mortage Payments', title='', xlabel='Date', grid='both') 38 | plt.show() 39 | 40 | # Select dates for doing the comparisons 41 | dates = ['1981-01-01', '1985-01-01', '1990-01-01', '1995-01-01', '2000-01-01', '2005-01-01', '2010-01-01', '2015-01-01', '2019-01-01'] 42 | 43 | # Import the financial data 44 | sp500 = yf.download('^GSPC', period='max', interval='1mo')['Adj Close'].dropna() 45 | rets = sp500.pct_change().dropna() 46 | rets = rets.resample('QS-OCT').apply(lambda x: x.iloc[-1]) 47 | 48 | # Prepare dataframe to save the results 49 | results = pd.DataFrame(data=None, index=dates, columns=['Corr(y, x)', 'Corr(fy, fx)', 'Sbar', 'Corr(ry, rx)', 'Normality', 'Sign', 'ER']) 50 | 51 | # Loop over the dates and compute what you need 52 | for d in dates: 53 | # Section the data 54 | x = data.loc[d:] 55 | r = rets.loc[d:'2023-10-01'] 56 | 57 | # Save correlation of the data 58 | results.loc[d, 'Corr(y, x)'] = x.corr().iloc[1,0] 59 | 60 | # Scale the data for computing their spectral density and coherence 61 | sx = x/x.std(axis=0) 62 | f0, Pxx_den0 = signal.welch(sx.iloc[:,0].to_numpy(), fs=1, nperseg=36, scaling='spectrum') 63 | f1, Pxx_den1 = signal.welch(sx.iloc[:,1].to_numpy(), fs=1, nperseg=36, scaling='spectrum') 64 | ff = pd.DataFrame(data=np.hstack([Pxx_den0.reshape(-1, 1), Pxx_den1.reshape(-1, 1)]), index=f0, columns=data.columns) 65 | # ff.plot(title='Spectral densities from '+d, xlabel='Frequency', ylabel='Power', grid='both') 66 | # plt.show() 67 | 68 | # Save the correlation of the spectra 69 | results.loc[d, 'Corr(fy, fx)'] = ff.corr().iloc[1,0] 70 | 71 | # Compute and plot the coherence 72 | f, Cxy = signal.coherence(sx.iloc[:,0], sx.iloc[:, 1], nperseg=36) 73 | # plt.plot(f, Cxy) 74 | # plt.title('Squared coherence from '+d) 75 | # plt.grid(which='both') 76 | # plt.xlabel('Frequency') 77 | # plt.ylabel('Squared coherence') 78 | # plt.show() 79 | 80 | # Save the average coherence 81 | results.loc[d, 'Sbar'] = np.mean(Cxy) 82 | 83 | # Compute and plot the ACF 84 | a0 = acf(x.iloc[:,0]).reshape(-1, 1) 85 | a1 = acf(x.iloc[:,1]).reshape(-1, 1) 86 | aa = np.hstack([a0, a1]) 87 | # plt.plot(aa) 88 | # plt.title('Autocorrelations from '+d) 89 | # plt.grid(which='both') 90 | # plt.xlabel('Lag') 91 | # plt.ylabel('Autocorrelation') 92 | # plt.show() 93 | 94 | # Save the correlation of the ACFs 95 | results.loc[d, 'Corr(ry, rx)'] = np.corrcoef(aa.T)[1,0] 96 | 97 | # Compute the two tests and save their pvalues, for the difference of the two series 98 | z = x.iloc[:,1] - x.iloc[:,0] 99 | results.loc[d, 'Normality'] = stats.normaltest(z).pvalue 100 | results.loc[d, 'Sign'] = sign_test(z, mu0=z.mean())[1] 101 | 102 | # Trade the index 103 | u = r*(z.shift(periods=2).apply(np.sign)) 104 | both = pd.concat([r, u], axis=1).dropna() 105 | tr = (both+1).prod() 106 | results.loc[d, 'ER'] = tr.iloc[1] - tr.iloc[0] 107 | if d == dates[0]: 108 | cr = ((both+1).cumprod()-1)*100 109 | cr.plot(title='Cumulative return of the Z strategy from '+d+', d=2 quarters', xlabel='Date', ylabel='return in percent', grid='both') 110 | plt.show() 111 | 112 | # OK, just print the results now 113 | print(results) 114 | 115 | 116 | -------------------------------------------------------------------------------- /the-speculative-transform.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/12/10/the-speculative-transform/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | import yfinance as yf 13 | import pandas as pd 14 | import pandas_datareader as pdr 15 | 16 | # A function to compute the maximum drawdown, input is a dataframe of cumulative returns 17 | def max_dd(crets): 18 | maxcret = (crets+1).cummax(axis=0) 19 | drawdowns = ((crets + 1) / maxcret) - 1 20 | return drawdowns.min(axis=0) 21 | 22 | # Another function to collect performance measures, input is a dataframe of returns 23 | def performance_measures(rets, f_factor, target_r=0): 24 | mu = rets.mean() * f_factor 25 | sd = rets.std() * np.sqrt(f_factor) 26 | sr = mu / sd 27 | er = target_r - rets 28 | er = er.clip(lower=0) 29 | l2 = (er ** 2).mean(axis=0) 30 | st = mu/np.sqrt(l2) 31 | cr = (rets+1).cumprod(axis=0) - 1 32 | md = max_dd(cr) 33 | stats = pd.DataFrame([mu, sd, sr, st, cr.iloc[-1], md]) 34 | stats.index = ['Mean', 'Std. Dev.', 'Sharpe', 'Sortino', 'TR', 'MaxDD'] 35 | return stats.transpose(), cr 36 | 37 | # Select ticker and frequency of data 38 | ticker = 'TNA' 39 | freq = '1d' 40 | # Don't forget to adjust the frequency factor 41 | ff = 260 42 | # Set starting date 43 | start_date = '2023-01-01' 44 | 45 | # Get the data 46 | data = yf.download(ticker, period='max', interval='1d').dropna().loc[start_date:] 47 | price = data['Adj Close'] 48 | volume = data['Volume'] 49 | 50 | # Prepare the variables 51 | # 52 | # Smoothing volume 53 | smooth = 7 54 | V = volume.rolling(window=smooth).mean() 55 | U = (volume.apply(np.log) - V.apply(np.log)) 56 | # Scale expanding or rolling 57 | rho1 = U/U.abs().expanding().mean() 58 | rho2 = U/U.abs().rolling(window=smooth).mean() 59 | 60 | # Scale prices and returns 61 | piso = 0.25*(100 ** 0.75)*(price ** 0.25) 62 | diso = 0.031623*price.diff()/(price ** 0.75) 63 | rets = price.pct_change() 64 | 65 | # Put together, give names, prepare other parameters 66 | all = pd.concat([price, piso, volume, V, U, rho1, rho2, rets, diso], axis=1).dropna() 67 | all.columns = [ticker, ticker+'-iso', ticker+'-V', 'V', 'U', 'rho1', 'rho2', 'R', 'R-iso'] 68 | nobs = all.shape[0] 69 | roll = 4 70 | is_rolling = True 71 | 72 | # Initialize storage 73 | store1 = pd.DataFrame(data=None, index=all.index, columns=['vS1', 'vS2', 'vS3', 'vS4', ticker]) 74 | store2 = pd.DataFrame(data=None, index=all.index, columns=['vS1', 'vS2', 'vS3', 'vS4', ticker]) 75 | 76 | # The evulation loop 77 | for i in np.arange(roll, nobs, 1): 78 | if is_rolling: 79 | all_i = all.iloc[(i-roll):i] 80 | else: 81 | all_i = all.iloc[:i] 82 | 83 | # First, with rho1 84 | riso_mean = all_i['R-iso'].mean() 85 | rho1_mean = all_i['rho1'].mean() 86 | riso_last = all_i['R-iso'].iloc[-1] 87 | rho1_last = all_i['rho1'].iloc[-1] 88 | ra = all['R'].iloc[i] 89 | if np.sign(riso_mean) == np.sign(rho1_last): 90 | store1['vS1'].iloc[i] = ra 91 | else: 92 | store1['vS1'].iloc[i] = -ra 93 | if np.sign(riso_last) == np.sign(rho1_mean): 94 | store1['vS2'].iloc[i] = -ra 95 | else: 96 | store1['vS2'].iloc[i] = ra 97 | r = all_i[['rho1', 'R-iso']].corr().iloc[1,0] 98 | r1 = (all_i['rho1'].shift(periods=1)).corr(all_i['R-iso']) 99 | store1['vS3'].iloc[i] = np.sign(r)*ra 100 | store1['vS4'].iloc[i] = np.sign(r1)*ra 101 | store1[ticker].iloc[i] = ra 102 | 103 | # Second, with rho2 104 | riso_mean = all_i['R-iso'].mean() 105 | rho1_mean = all_i['rho2'].mean() 106 | riso_last = all_i['R-iso'].iloc[-1] 107 | rho1_last = all_i['rho2'].iloc[-1] 108 | ra = all['R'].iloc[i] 109 | if np.sign(riso_mean) == np.sign(rho1_last): 110 | store2['vS1'].iloc[i] = ra 111 | else: 112 | store2['vS1'].iloc[i] = -ra 113 | if np.sign(riso_last) == np.sign(rho1_mean): 114 | store2['vS2'].iloc[i] = -ra 115 | else: 116 | store2['vS2'].iloc[i] = ra 117 | r = all_i[['rho2', 'R-iso']].corr().iloc[1,0] 118 | r1 = (all_i['rho2'].shift(periods=1)).corr(all_i['R-iso']) 119 | store2['vS3'].iloc[i] = np.sign(r)*ra 120 | store2['vS4'].iloc[i] = np.sign(r1)*ra 121 | store2[ticker].iloc[i] = ra 122 | 123 | # Done, print performance and plot 124 | out1 = performance_measures(store1.dropna(), ff, 0) 125 | out2 = performance_measures(store2.dropna(), ff, 0) 126 | # 127 | print(out1[0]) 128 | print(out2[0]) 129 | 130 | (out2[1]*100)[[ticker, 'vS4']].plot(title='The speculative transform strategy for '+ticker+', daily data', xlabel='Date', ylabel='return in percent', grid='both') 131 | plt.show() -------------------------------------------------------------------------------- /the speculative unemployment proxies.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/currency-and-money-market-funds-as-unemployment-proxies/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import pandas_datareader as pdr 16 | from scipy import signal, stats 17 | from statsmodels.tsa.stattools import acf 18 | from statsmodels.sandbox.stats import runs 19 | from statsmodels.stats.descriptivestats import sign_test 20 | import yfinance as yf 21 | 22 | # Get the weekly data 23 | tickers = ['WCURRNS', 'WRMFNS'] 24 | raw_data = pdr.fred.FredReader(tickers, start='1980-01-01', end='2024-10-01').read() 25 | raw_data['WMR'] = np.log(raw_data['WCURRNS']/raw_data['WRMFNS'])*100 26 | 27 | # Transform the main series into monthly 28 | wmr_monthly = raw_data['WMR'].resample('MS').mean() 29 | 30 | # Get the unemployment data 31 | tickers = ['UNRATE'] 32 | raw_data = pdr.fred.FredReader(tickers, start='1980-01-01', end='2024-10-01').read() 33 | # Add the main series 34 | raw_data['AMR'] = wmr_monthly 35 | # plot the original data (commented out) 36 | # ax1 = raw_data['UNRATE'].plot(title='Unemployment and the WMR Indicator', xlabel='Date', ylabel='Unemployment Rate', color='black') 37 | # ax1.xaxis.grid(True, which='major') 38 | # ax1.yaxis.grid(True, which='major') 39 | # ax1.legend(loc='lower left') 40 | # ax2 = ax1.twinx() 41 | # raw_data['WMR'].plot(ax=ax2,color=['blue'], ylabel='Currency in M1/Retail Money Market Funds Growth') 42 | # ax2.legend(loc='upper left') 43 | # plt.show() 44 | 45 | # Compute the lagged-cross correlation between the main series and the unemployment rate 46 | # 47 | start_from = '2019-01-01' # Use '1990-01-01', '2000-01-01' and '2019-01-01' 48 | if start_from is not None: 49 | x = raw_data.loc[start_from:].dropna() 50 | else: 51 | start_from = str(raw_data.index.year[0])+'-01-01' 52 | x = raw_data.dropna() 53 | # 54 | store_ccor = pd.DataFrame(data=None, index=np.arange(0, 49, 1), columns=['WMR-UNRATE']) 55 | for i in np.arange(0, 49, 1): 56 | store_ccor.iloc[i, 0] = x['UNRATE'].corr(x['AMR'].shift(periods=i))*100 57 | # plot the lagged-cross correlations 58 | store_ccor.plot(title='Cross-correlations between lagged AMR and the Unemployment Rate \n from '+start_from, xlabel='Lag', ylabel='Correlation', grid='both') 59 | plt.show() 60 | # extract max cross-correlation 61 | max_ccor = store_ccor.apply(lambda x: np.argmax(np.abs(x)), axis=0)-1 62 | 63 | # Next plot the aligned main series with the unemployment rate 64 | # 65 | # Expand the sample 66 | extra = pd.date_range(start='2024-09-01', periods=max_ccor.iloc[0], freq='MS') 67 | extra_sample = pd.date_range(start=x.index[0], end=extra[-1], freq='MS') 68 | z = pd.DataFrame(data=None, index=extra_sample, columns=['UNRATE', 'WMR']) 69 | z['UNRATE'] = x['UNRATE'] 70 | z['AMR'] = x['AMR'] 71 | z['AMR'] = z['AMR'].shift(periods=max_ccor.iloc[0]) 72 | z = z.iloc[max_ccor.iloc[0]:] 73 | # 74 | ax1 = z['UNRATE'].plot(title='Unemployment and the AMR Indicator lagged '+str(max_ccor.iloc[0])+' months \n from '+start_from, xlabel='Date', ylabel='Unemployment Rate', color='black') 75 | ax1.xaxis.grid(True, which='major') 76 | ax1.yaxis.grid(True, which='major') 77 | ax1.legend(loc='lower left') 78 | ax2 = ax1.twinx() 79 | z['AMR'].plot(ax=ax2,color=['blue'], ylabel='Currency in M1/Retail Money Market Funds Growth') 80 | ax2.legend(loc='upper left') 81 | plt.show() 82 | 83 | # Now for the trading exercise, a very simple one 84 | ticker = 'TNA' 85 | if ticker == 'SPY': 86 | d_delay = 2 87 | elif ticker == 'QQQ': 88 | d_delay = 1 89 | elif ticker == 'TNA': 90 | d_delay = 2 91 | # 92 | fin_data = yf.download(ticker, period='max', interval='1mo')['Adj Close'].dropna() 93 | rets = fin_data.pct_change().dropna() 94 | amr_rets = pd.concat([x, rets], axis=1).dropna() 95 | amr_rets.columns = ['UNRATE', 'AMR', ticker] 96 | for jj in np.arange(1, 13, 1): 97 | # the strategy is based just on the sign of the AMR indicator before 2019 98 | # and the negative of the sign of the difference of the AMR indicator after 2019 99 | if start_from < '2019-01-01': 100 | str = amr_rets[ticker]*(amr_rets['AMR'].shift(periods=jj).apply(np.sign)) 101 | else: 102 | str = -amr_rets[ticker]*(amr_rets['AMR'].diff(periods=d_delay).shift(periods=jj).apply(np.sign)) 103 | both = pd.concat([amr_rets[ticker], str], axis=1).dropna() 104 | both.columns = [ticker, 'AMR-'+ticker] 105 | tr = ((both + 1).cumprod()-1)*100 106 | # plot 107 | tr.plot(title='Total return of the AMR strategy for '+ticker+' from '+start_from, xlabel='Date', ylabel='return in percent', grid='both') 108 | plt.show() 109 | print(tr.iloc[-1]) -------------------------------------------------------------------------------- /speculative climate change.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/speculative-climate-change/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import pandas_datareader as pdr 16 | import statsmodels.api as sm 17 | import yfinance as yf 18 | 19 | # Read the static data and complete with a download 20 | temp = pd.read_csv('global temperature data.csv', header=0, index_col=0, date_format='%Y%m') 21 | sp500 = pd.read_csv('SP500-monthly-from-1928.csv', header=0, index_col=0, date_format='%m/%d/%Y') 22 | sp500.name = 'SP500' 23 | sp500_add = yf.download('^GSPC', start='2022-06-30', end='2023-12-31', interval='1mo')['Adj Close'] 24 | sp500_add.name = 'SP500' 25 | sp500 = pd.concat([sp500, sp500_add], axis=0) 26 | # Crop the temperature from 1928 27 | temp = temp.loc['1928-01-01':] 28 | temp.columns = ['Temp'] 29 | # Merge the indices 30 | temp.index = sp500.index 31 | # add industrial production 32 | indpro = pdr.fred.FredReader('INDPRO', start='1928-01-01', end='2023-12-01').read() 33 | indpro.index = sp500.index 34 | # Put together 35 | sp500_temp_indpro = pd.concat([sp500.apply(np.log), temp, indpro], axis=1) 36 | # and crop from 1950 37 | sp500_temp_indpro = sp500_temp_indpro.loc['1950-01-01':] 38 | sp500_temp_indpro.to_csv('sp500_temp_indpro.csv') 39 | 40 | # Rolling or expanding computation of the mean temperature? 41 | do_rolling = True 42 | 43 | # Loop over all starting years 44 | for year in ['1950-01-01', '1960-01-01', '1970-01-01', '1975-01-01', '1980-01-01', '1985-01-01', '1990-01-01', '1995-01-01', '2000-01-01', '2005-01-01', '2010-01-01', '2015-01-01', '2020-01-01']: 45 | 46 | # Compute the returns and the signal variables 47 | if do_rolling: 48 | rets = pd.concat([sp500.pct_change(), temp, temp.rolling(window=12).mean(), temp.diff(), temp.diff().rolling(window=12).mean()], axis=1).dropna() 49 | else: 50 | rets = pd.concat([sp500.pct_change(), temp, temp.expanding().mean(), temp.diff(), temp.diff().expanding().mean()], axis=1).dropna() 51 | # Give nice column names 52 | rets.columns = ['SP500', 'Temp', 'Temp-Mean', 'Δ-Temp', 'Δ-Temp-Mean'] 53 | 54 | rets = rets.loc[year:] 55 | print('******************************************************') 56 | print(' ') 57 | print('Year is='+year) 58 | 59 | # Compute the signals for delays of 1 to 24 months 60 | for i in np.arange(1, 25, 1): 61 | signal_temp = rets['Temp'].shift(periods=i).apply(np.sign) 62 | signal_temp_mean = rets['Temp-Mean'].shift(periods=i).apply(np.sign) 63 | signal_dtemp = rets['Δ-Temp'].shift(periods=i).apply(np.sign) 64 | signal_dtemp_mean = rets['Δ-Temp-Mean'].shift(periods=i).apply(np.sign) 65 | 66 | # Compute the strategies 67 | benchmark = rets['SP500'] 68 | strategy1 = benchmark*signal_temp 69 | strategy2 = benchmark*signal_temp_mean 70 | strategy3 = benchmark*signal_dtemp 71 | strategy4 = benchmark*signal_dtemp_mean 72 | 73 | # Put together and compute the total returns 74 | both = pd.concat([benchmark, strategy1, strategy2, strategy3, strategy4], axis=1).dropna() 75 | tr = ((both+1).prod()-1)*100 76 | 77 | # If the delays is profitable then print the excess returns 78 | if (tr.iloc[1] > tr.iloc[0]): 79 | print('Evaluation starts in '+format(both.index[0], '%Y-%m')) 80 | print('----------------------------') 81 | print('Strategy #1, Temp with delay =',i,'and ER = {0:6.2f}'.format(tr.iloc[1]-tr.iloc[0])) 82 | # 83 | if (tr.iloc[2] > tr.iloc[0]): 84 | print(' ') 85 | print('Evaluation starts in '+format(both.index[0], '%Y-%m')) 86 | print('----------------------------') 87 | print('Strategy #2, Temp-Mean with delay =',i,'and ER = {0:6.2f}'.format(tr.iloc[2]-tr.iloc[0])) 88 | # 89 | if (tr.iloc[3] > tr.iloc[0]): 90 | print(' ') 91 | print('Evaluation starts in '+format(both.index[0], '%Y-%m')) 92 | print('----------------------------') 93 | print('Strategy #3, Δ-Temp with delay =',i,'and ER = {0:6.2f}'.format(tr.iloc[3]-tr.iloc[0])) 94 | # 95 | if (tr.iloc[4] > tr.iloc[0]): 96 | print(' ') 97 | print('Evaluation starts in '+format(both.index[0], '%Y-%m')) 98 | print('----------------------------') 99 | print('Strategy #4, Δ-Temp-Mean with delay =',i,'and ER = {0:6.2f}'.format(tr.iloc[4]-tr.iloc[0])) 100 | # 101 | print(' ') 102 | print('******************************************************') 103 | -------------------------------------------------------------------------------- /speculative sign smoothing and NN.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/speculative-sign-smoothing-and-nearest-neighbors/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | ## Import the packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import scipy as sp 16 | import yfinance as yf 17 | 18 | ## Define some functions 19 | # 20 | def get_trajectory(x, k): 21 | if len(x.shape) == 1: 22 | x = x.reshape(-1, 1) 23 | if k == 1: 24 | return x 25 | elif k > 1: 26 | y = np.zeros([x.shape[0] - k + 1, k * x.shape[1]]) 27 | for i in range(x.shape[0] - k + 1): 28 | y[i, :] = np.hstack(x[i:(i + k), :]) 29 | return y 30 | else: 31 | raise ValueError('In function get_trajectory the memory order must be >= 1') 32 | 33 | ## 34 | def get_nn(x, p=2, alpha=0.8, step=1): 35 | if len(x.shape) == 1: 36 | x = x.reshape(-1, 1) 37 | # Extract the last row from the trajectory matrix 38 | x_last = x[-1, :] 39 | # Compute all the distances, sort and extract the indices 40 | if p < 0: 41 | similarity_score = np.sum(np.logical_and(x[:-1, :], x_last), axis=1) 42 | winner_pos = np.argwhere(similarity_score >= -p).flatten() 43 | if len(winner_pos) == 0: 44 | return None 45 | _aux = np.argsort(similarity_score[winner_pos]) 46 | isd = winner_pos[_aux][::-1] 47 | else: 48 | if p == 0: 49 | dxt = np.sum(np.abs(x[:-1, :] - x_last), axis=1) 50 | else: 51 | dxt = np.sum((np.abs(x[:-1, :] - x_last) ** p), axis=1) ** (1 / p) 52 | isd = dxt.argsort() 53 | isd = isd[np.where(isd < len(x))] 54 | isd = isd[:int(len(isd) * alpha) + (1 if p < 0 else 0)] 55 | # Subsample? 56 | if step > 1: 57 | ss = np.arange(0, len(isd), step) 58 | isd = isd[ss] 59 | # Done - note that the index of the closest NN is the first in isd! 60 | return isd 61 | 62 | ## 63 | def get_nn_forecast(x, isd): 64 | isd = isd[isd < x.shape[0]] 65 | if len(x.shape) == 1: 66 | x = x.reshape((-1, 1)) 67 | z = x[isd, :] 68 | # The forecast is about discrete values, use the mode 69 | return sp.stats.mode(z, axis=0) 70 | 71 | ## 72 | def get_trained_gamma(s): 73 | s2 = s[2:] 74 | s1 = s[1:-1] 75 | s0 = s[:-2] 76 | ds = s1 - s0 77 | gamma = (s2 - ds)/s0 78 | return gamma 79 | 80 | ## 81 | def get_combinator(r, set_k=10, set_prop=0.25): 82 | s = np.sign(r.values) 83 | s[s == 0.0] = 1.0 84 | gamma = get_trained_gamma(s) 85 | tmat = get_trajectory(gamma, k=set_k) 86 | target = gamma[(set_k-1):].reshape(-1, 1) 87 | nn = get_nn(tmat, p=-1, alpha=set_prop) 88 | fnn = get_nn_forecast(target, nn)[0][0] 89 | gammaf = get_nn_forecast(target, nn)[0][0] 90 | fnn = np.sign((s[-1]-s[-2]) + gammaf*s[-2]) 91 | return np.sign(fnn) 92 | 93 | ## Now for the analysis 94 | 95 | # Get some data 96 | ticker = 'WEAT' 97 | freq = '1d' 98 | start_from = '2022-01-01' 99 | data = yf.download(ticker, period='max', interval=freq)['Adj Close'] 100 | rets = data.pct_change().dropna() 101 | rets = rets.loc[start_from:] 102 | y = np.sign(rets.values) 103 | # Set the parameters 104 | max_roll = 30 105 | set_nn = np.arange(3, (max_roll/2.0)+1, 1).astype(int) 106 | set_prop = np.arange(0.1, 0.55, 0.05) 107 | set_delay = 1 108 | # Counters and storage 109 | tot_count = 0 110 | pos_count = 0 111 | store_er = pd.DataFrame(data=np.zeros([1, 3]), index=[0], columns=['Embedding', '%NN', 'ER']) 112 | 113 | # The main double loop, over the embedding dimension and the % of NN used forming the forecast 114 | for nn in set_nn: 115 | for prop in set_prop: 116 | tot_count = tot_count + 1 117 | signal = rets.rolling(window=max_roll).apply(get_combinator, args=(nn, prop, )) 118 | strategy = rets*signal.shift(periods=set_delay) 119 | both = pd.concat([rets, strategy], axis=1).dropna() 120 | both.columns = [ticker, 'Smoothed NN'] 121 | tr = ((both + 1).cumprod()-1)*100 122 | # tr.plot() 123 | # plt.show() 124 | er = tr.iloc[-1,1] - tr.iloc[-1,0] 125 | # print(nn, prop, er) 126 | if er > 0: 127 | pos_count = pos_count + 1 128 | # print(nn, round(prop, 3), round(er, 3)) 129 | to_store = pd.DataFrame(data=np.array([nn, prop, er]).reshape(1, 3), index=[pos_count], columns=store_er.columns) 130 | store_er = pd.concat([store_er, to_store], axis=0) 131 | 132 | # Trim the storage dataframe 133 | store_er = store_er.iloc[1:] 134 | # Compute the succesful proportion of trades across combinations 135 | pos_prop = pos_count/tot_count 136 | # Compute the descriptive statistics of the successful trades 137 | stats = store_er['ER'].describe() 138 | # Print and you are done 139 | print(pos_prop) 140 | print(stats) -------------------------------------------------------------------------------- /the speculative probabilities.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/speculative-probabilities/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import yfinance as yf 16 | import os 17 | 18 | import sys 19 | file = open('output.txt', 'w') 20 | sys.stdout = file 21 | 22 | import warnings 23 | warnings.filterwarnings("ignore") 24 | 25 | # Set tickers and frequency of trading 26 | set_tickers = ['DBB'] #['SPY', 'TNA', 'DBC', 'DBA', 'DBB', 'DBP', 'OIH', 'BTC-USD'] 27 | set_freq = '1mo' 28 | 29 | # Set optional starting date 30 | set_start = None #'2023-01-01' 31 | 32 | # Set short, 0 or -1 33 | set_short = -1.0 34 | 35 | # Set rolling window for benchmark 36 | set_roll = [13]# [3, 4, 5, 6, 9, 11, 12, 13] 37 | 38 | # and whether you want recursive estimation of historical probabilities 39 | set_recursive = False 40 | 41 | # Finally, set the threshold for trading 42 | set_thresh = [0.45] #np.arange(0.1, 0.55, 0.05) 43 | 44 | for roll in set_roll: 45 | for thresh in set_thresh: 46 | # Set storage 47 | set_store = pd.DataFrame(data=None, index=['Rets', 'Bench', 'SP-I', 'SP-II', 'SP-III'], columns=set_tickers) 48 | 49 | # Now, do a loop over the tickers and over the observations 50 | for ticker in set_tickers: 51 | # Prepare the data 52 | data = yf.download(ticker, period='max', interval=set_freq)['Adj Close'].dropna() 53 | rets = data.pct_change().dropna() 54 | if set_start is not None: 55 | rets = rets.loc[set_start:] 56 | x = (rets > 0).astype(float) 57 | nobs = x.shape[0] 58 | 59 | # Initialize storage per ticker 60 | store = pd.DataFrame(data=None, index=x.index, columns=['Rets', 'Actual', 'BF', 'XF0', 'XF1', 'BE', 'XE', 'ell-0', 'ell-1']) 61 | trade = pd.DataFrame(data=None, index=x.index, columns=['Rets', 'Bench', 'SP-I', 'SP-II', 'SP-III']) 62 | signs = pd.DataFrame(data=None, index=x.index, columns=['Sign Actual', 'Sign SP-I', 'Sign SP-II', 'Sign SP-III']) 63 | 64 | # Loop over all observations 65 | for i in np.arange(roll, nobs-1, 1): 66 | # Split data and initialize probabilities 67 | last = x.iloc[i-1] 68 | actual = x.iloc[i] 69 | if set_recursive: 70 | lagged = x.iloc[:i].mean() 71 | else: 72 | lagged = x.iloc[(i-roll):i].mean() 73 | if i == roll: 74 | pi_0 = lagged 75 | 76 | # Compute forecast errors and do probability updating 77 | ei_b = actual - lagged 78 | ei_0 = actual - pi_0 79 | # Compute the mean lambda from its range 80 | ell_0 = np.mean([(pi_0+0.5)/(pi_0-1), (pi_0+0.5)/pi_0]) 81 | ell_1 = np.mean([(pi_0-0.5)/(pi_0-1), (pi_0-0.5)/pi_0]) 82 | #ell_11 = np.mean([(pi_0-0.5)/(pi_0-1), (pi_0-0.5)/pi_0]) 83 | #ell_12 = np.mean([(pi_0-0.5)/pi_0, (pi_0-0.5)/(pi_0-1)]) 84 | # Select appropriate lambda 85 | if last > 0: 86 | pi_1 = pi_0 + ell_1*(ei_0 - 0.5) 87 | # if pi_0 > 0.5: 88 | # pi_1 = pi_0 + ell_11*(ei_1 - 0.5) 89 | # else: 90 | # pi_1 = pi_0 + ell_12*(ei_1 - 0.5) 91 | else: 92 | pi_1 = pi_0 + ell_0*(ei_0 - 0.5) 93 | # In case you get off the bounds revert to benchmark 94 | if (pi_1 < 0) or (pi_1 > 1): 95 | pi_1 = lagged 96 | #print('Probability correction!') 97 | 98 | # Compute trading signals and trade accordingly 99 | bs = (lagged >= thresh).astype(float) + set_short*(lagged < thresh).astype(float) 100 | ps1 = (pi_1 >= thresh).astype(float) + set_short*(pi_1 < thresh).astype(float) 101 | ps2 = (pi_1 >= lagged).astype(float) + set_short*(pi_1 < lagged).astype(float) 102 | ps3 = (lagged >= thresh and pi_1 > thresh).astype(float) + set_short*(lagged <= thresh or pi_1 < thresh).astype(float) 103 | # Store forecasts and trading returns per ticker 104 | store.iloc[i+1] = np.hstack([rets.iloc[i], actual, lagged, pi_0, pi_1, ei_b, ei_0, ell_0, ell_1]) 105 | trade.iloc[i+1] = np.hstack([rets.iloc[i+1], rets.iloc[i+1]*bs, rets.iloc[i+1]*ps1, rets.iloc[i+1]*ps2, rets.iloc[i+1]*ps3]) 106 | signs.iloc[i+1] = np.hstack([np.sign(rets.iloc[i+1]), ps1, ps2, ps3]) 107 | # Update the current probability 108 | pi_0 = pi_1 109 | 110 | # Done, print and plot per ticker 111 | set_store[ticker] = (trade.dropna()+1).prod() 112 | #(trade+1).cumprod().plot() 113 | #plt.show() 114 | 115 | # Plot overall performance 116 | print('Roll=',roll) 117 | print('Threshold=',thresh) 118 | print(set_store.T) 119 | 120 | file.close() 121 | -------------------------------------------------------------------------------- /pre-election-Greece.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/05/21/economic-growth-and-relative-population-growth/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas_datareader as pdr 15 | import pandas as pd 16 | import statsmodels.api as sm 17 | 18 | # Get the tickers with their explanations 19 | t1 = 'POPTOTGRA647NWDB' # total population 20 | t2 = 'SPPOP65UPTOZSGRC' # percent of total population over 65 21 | t3 = 'SPPOP0014TOZSGRC' # percent of total population less than 14 22 | t4 = 'RGDPNAGRA666NRUG' # real GDP at constant national prices, in 2017 US dollars 23 | t5 = 'RKNANPGRA666NRUG' # capital stock at constant national prices, in 2017 US dollars 24 | 25 | # Get the data 26 | raw = pdr.fred.FredReader([t1, t2, t3, t4, t5], start='1960-01-10').read() 27 | raw.columns = ['POP', 'POP > 65', 'POP < 14', 'RGDP', 'RKS'] 28 | 29 | # Assign to new dataset 30 | data = raw 31 | 32 | # Compute the young-to-old index 33 | age_ratio = data['POP < 14']/data['POP > 65'] 34 | age_ratio.name = 'YOUNG/OLD' 35 | data = pd.concat([data, age_ratio], axis=1) 36 | 37 | # and plot it 38 | ax = age_ratio.plot(grid='both', title='Ratio of young (less than 14 years) to old (greater than 65 years) \n (as percents of total population)', xlabel='Year', ylabel='Ratio') 39 | ax.title.set_fontsize(10) 40 | plt.show() 41 | 42 | # Convert to log-growth rates 43 | use_variables = ['RGDP', 'RKS', 'YOUNG/OLD'] 44 | data[use_variables] = data[use_variables].apply(np.log).diff().dropna()*100 45 | 46 | # Lets plot the growth rates of the data one at a time # instructive to see how the details are made... 47 | # fig, axes = plt.subplots(3,1) 48 | # for i in range(data[use_variables].shape[1]): 49 | # xi = (data[use_variables].iloc[:,i]) 50 | # if i <= 1: 51 | # set_xlabel = '' 52 | # else: 53 | # set_xlabel = 'Year' 54 | # set_title = 'Annual log-growth of '+xi.name 55 | # xi.plot(ax=axes.flatten()[i], title=set_title, xlabel=set_xlabel, ylabel='percent', grid='both') 56 | # axes.flatten()[i].title.set_size(10) 57 | # axes.flatten()[i].yaxis.label.set_size(10) 58 | # axes.flatten()[i].xaxis.label.set_size(10) 59 | # axes.flatten()[i].tick_params(labelsize=10) 60 | # plt.show() 61 | 62 | # Lets compute the cross-correlation (CC) between lagged economic growth and the growth of the young/old 63 | store_cc = pd.DataFrame(data=None, index=range(16), columns=['CC-RGDP', 'CC-RKS']) 64 | 65 | for i in range(16): 66 | df1 = pd.concat([data['YOUNG/OLD'], data['RGDP'].shift(periods=i)], axis=1).dropna() 67 | df2 = pd.concat([data['YOUNG/OLD'], data['RKS'].shift(periods=i)], axis=1).dropna() 68 | store_cc.iloc[i, 0] = df1.corr().iloc[1, 0]*100 69 | store_cc.iloc[i, 1] = df2.corr().iloc[1, 0]*100 70 | 71 | # Plot the max CC for both series of growth 72 | store_cc.plot(grid='both', title='Cross-correlation between YOUNG/OLD and economic growth', xlabel='Lag', ylabel='percent') 73 | plt.show() 74 | 75 | # Now plot the series aligned based on their max CC 76 | # 77 | # Prepare two data frames 78 | max_lag = store_cc.apply(np.argmax) 79 | plot_data1 = pd.concat([data['YOUNG/OLD'], data['RGDP'].shift(periods=max_lag[0])], axis=1).dropna() 80 | plot_data1.columns = ['YOUNG/OLD', 'RGDP('+str(-max_lag[0])+')'] 81 | plot_data2 = pd.concat([data['YOUNG/OLD'], data['RKS'].shift(periods=max_lag[1])], axis=1).dropna() 82 | plot_data2.columns = ['YOUNG/OLD', 'RKS('+str(-max_lag[1])+')'] 83 | 84 | # Plot them, and note the special usage for moving around the legend location!! 85 | # 86 | # First, for RGDP 87 | # 88 | ax1 = plot_data1.iloc[:,0].plot(label=plot_data1.columns[0]) 89 | ax2 = plot_data1.iloc[:,1].plot(label=plot_data1.columns[1], secondary_y=True) 90 | # 91 | h1, l1 = ax1.get_legend_handles_labels() 92 | h2, l2 = ax2.get_legend_handles_labels() 93 | # 94 | plt.legend(h1+h2, l1+l2, loc='lower right') 95 | plt.title('Young/Old growth vs. lagged real GDP growth') 96 | ax1.xaxis.grid(True, which='major') 97 | ax1.yaxis.grid(True, which='major') 98 | plt.show() 99 | 100 | # and then for RKS 101 | # 102 | ax1 = plot_data2.iloc[:,0].plot(label=plot_data2.columns[0]) 103 | ax2 = plot_data2.iloc[:,1].plot(label=plot_data2.columns[1], secondary_y=True) 104 | # 105 | h1, l1 = ax1.get_legend_handles_labels() 106 | h2, l2 = ax2.get_legend_handles_labels() 107 | # 108 | plt.legend(h1+h2, l1+l2, loc='lower left') 109 | plt.title('Young/Old growth vs. lagged real capital stock growth') 110 | ax1.xaxis.grid(True, which='major') 111 | ax1.yaxis.grid(True, which='major') 112 | plt.show() 113 | 114 | # Add a scatterplot for the association between lagged real GDP growth and relative population growth 115 | plot_data1.plot(kind='scatter', x=1, y=0, s=np.abs(plot_data1.iloc[:,1])*20, color='red', grid='both', xlabel='Lagged real GDP growth', ylabel='Relative population growth', title='Young/Old growth vs. lagged real GDP growth') 116 | plt.show() 117 | 118 | # and finally one for the capital stock etc... 119 | plot_data2.plot(kind='scatter', x=1, y=0, s=np.abs(plot_data2.iloc[:,1])*20, color='red', grid='both', xlabel='Lagged real capital stock growth', ylabel='Relative population growth', title='Young/Old growth vs. lagged real capital stock growth') 120 | plt.show() 121 | -------------------------------------------------------------------------------- /competitive_speculator.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/12/07/the-competitive-speculator/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | import yfinance as yf 13 | import pandas as pd 14 | import pandas_datareader as pdr 15 | 16 | # Define the weighted mean 17 | def weighted_mean(x, alpha=1.0): 18 | w = np.arange(1, x.shape[0]+1, 1) ** alpha 19 | m = (x * w).sum()/w.sum() 20 | return m 21 | 22 | # A function to compute the maximum drawdown, input is a dataframe of cumulative returns 23 | def max_dd(crets): 24 | maxcret = (crets+1).cummax(axis=0) 25 | drawdowns = ((crets + 1) / maxcret) - 1 26 | return drawdowns.min(axis=0) 27 | 28 | # Another function to collect performance measures, input is a dataframe of returns 29 | def performance_measures(rets, f_factor, target_r=0): 30 | mu = rets.mean() * f_factor 31 | sd = rets.std() * np.sqrt(f_factor) 32 | sr = mu / sd 33 | er = target_r - rets 34 | er = er.clip(lower=0) 35 | l2 = (er ** 2).mean(axis=0) 36 | st = mu/np.sqrt(l2) 37 | cr = (rets+1).cumprod(axis=0) - 1 38 | md = max_dd(cr) 39 | stats = pd.DataFrame([mu, sd, sr, st, cr.iloc[-1], md]) 40 | stats.index = ['Mean', 'Std. Dev.', 'Sharpe', 'Sortino', 'TR', 'MaxDD'] 41 | return stats.transpose(), cr 42 | 43 | # Select a ticker to examine - note that the frequency is set to monthly and the interest rate is a monthly one 44 | ticker = 'DBC' 45 | asset = yf.download(ticker, period='max', interval='1mo')['Adj Close'].dropna() 46 | set_rate = 'TB4WK' 47 | interest_rate = pdr.fred.FredReader(set_rate, start=asset.index[0], end=asset.index[-1]).read()/100 + 1 48 | interest_rate = ((interest_rate ** (1/12)) - 1) # need to de-annualize the interest rate 49 | d_interest_rate = interest_rate.diff().dropna() 50 | d_asset = asset.pct_change().dropna() 51 | all = pd.concat([asset, d_asset, interest_rate, d_interest_rate], axis=1).dropna() 52 | all.columns = [ticker, 'D-'+ticker, 'R', 'DR'] 53 | nobs = all.shape[0] 54 | 55 | # Set the rolling window from 3 to 6 months, in returns 56 | nroll = np.arange(4, 8, 1) 57 | 58 | # Set the scale factor from 0 to 2, in intervals of 0.2 59 | nkappa = np.arange(0, 2.2, 0.2) 60 | 61 | # Initialze storage to search 62 | store_all = pd.DataFrame(data=None, index=nkappa, columns=nroll) 63 | 64 | # Do the search loop 65 | for roll in nroll: 66 | for kappa in nkappa: 67 | # 68 | # print(roll, kappa) 69 | store = pd.DataFrame(data=None, index=all.index, columns=[ticker, 'Speculation']) 70 | store_beta = pd.DataFrame(data=None, index=all.index, columns=['Discount']) 71 | count_nd = 0 72 | # Do the evaluation loop 73 | for i in np.arange(roll, nobs, 1): 74 | all_i = all.iloc[(i-roll):i] 75 | drift = weighted_mean(all_i['D-'+ticker], 0) 76 | kstd = kappa*all_i['D-'+ticker].std() 77 | last_yi = all_i[ticker].iloc[-1] 78 | last_dri = all_i['DR'].iloc[-1] 79 | last_ri = all_i['DR'].iloc[-1] 80 | if last_ri < 0: 81 | last_ri = 0 82 | count_nd += 1 83 | beta_i = 1/(1+last_ri) 84 | store_beta.iloc[i] = beta_i 85 | spec = beta_i*(last_yi + drift + kstd - last_dri) - last_yi 86 | benc = all['D-'+ticker].iloc[i] 87 | strg = np.sign(spec)*benc 88 | store.iloc[i,:] = np.hstack([benc, strg]) 89 | # 90 | # print(count_nd/(nobs-roll)) 91 | # Compute the excess returns and store 92 | crets = ((store+1).dropna().prod()-1)*100 93 | drets = crets['Speculation'] - crets[ticker] 94 | store_all.loc[kappa, roll] = drets 95 | 96 | # Find the best performing parameter combination and redo the analysis 97 | max_ret = store_all.max().max() 98 | max_dim = np.where(store_all == max_ret) 99 | roll_max = nroll[max_dim[1]][0] 100 | kappa_max = nkappa[max_dim[0]][0] 101 | 102 | store = pd.DataFrame(data=None, index=all.index, columns=[ticker, 'Speculation']) 103 | store_beta = pd.DataFrame(data=None, index=all.index, columns=['Discount']) 104 | count_nd = 0 105 | # 106 | for i in np.arange(roll_max, nobs, 1): 107 | all_i = all.iloc[(i-roll_max):i] 108 | drift = weighted_mean(all_i['D-'+ticker], 0) 109 | kstd = kappa_max*all_i['D-'+ticker].std() 110 | last_yi = all_i[ticker].iloc[-1] 111 | last_dri = all_i['DR'].iloc[-1] 112 | last_ri = all_i['DR'].iloc[-1] 113 | if last_ri < 0: 114 | last_ri = 0 115 | count_nd += 1 116 | beta_i = 1/(1+last_ri) 117 | store_beta.iloc[i] = beta_i 118 | spec = beta_i*(last_yi + drift + kstd - last_dri) - last_yi 119 | benc = all['D-'+ticker].iloc[i] 120 | strg = np.sign(spec)*benc 121 | # strg = (spec >= 0)*benc 122 | store.iloc[i,:] = np.hstack([benc, strg]) 123 | # 124 | # print(count_nd/(nobs-roll)) 125 | crets = ((store+1).dropna().cumprod()-1)*100 126 | 127 | # Done, plot and print 128 | crets.plot(title='The competitive speculator strategy for '+ticker+', monthly rebalancing', xlabel='Date', ylabel='return in percent', grid='both') 129 | plt.show() 130 | # 131 | stats = performance_measures(store, 12, 0) 132 | print(roll_max, kappa_max) 133 | print(stats[0]) -------------------------------------------------------------------------------- /the DSCP strategy.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/the-predictive-nature-of-deposits-sales-and-prices-the-dscp-strategy/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import the packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import pandas_datareader as pdr 16 | import scipy as sp 17 | import statsmodels.api as sm 18 | import yfinance as yf 19 | 20 | # Parametrization 21 | weekly_ticker = ['DPSACBW027SBOG'] 22 | monthly_tickers = ['PPIACO', 'CPIAUCSL', 'MRTSSM44000USS'] 23 | asset_ticker = 'TNA' # SPY QQQ IWF TNA DBC DBA XLF XLE 24 | start_from = '2000-01-01' # 2000-01-01 or earliest time and 2018-01-01 25 | max_delay = 12 26 | 27 | # Import monthly economic data 28 | monthly_data = pdr.fred.FredReader(monthly_tickers, start=start_from, end='2024-03-01').read() 29 | monthly_data.columns = ['PPI', 'CPI', 'Sales'] 30 | monthly_data = monthly_data.dropna() 31 | 32 | # Import weekly deposits and convert to monthly 33 | weekly_data = pdr.fred.FredReader(weekly_ticker, start=start_from, end='2024-03-01').read() 34 | weekly_to_monthly = weekly_data.resample('M').apply(lambda x: x.iloc[-1]) 35 | weekly_to_monthly = weekly_to_monthly.loc[start_from:'2024-02-01'] 36 | weekly_to_monthly.index = monthly_data.index 37 | monthly_data['Deposits'] = weekly_to_monthly 38 | 39 | # Import the financial returns 40 | asset = yf.download(asset_ticker, start=start_from, end='2024-02-01', interval='1mo')['Adj Close'].dropna() 41 | monthly_data[asset_ticker] = asset 42 | 43 | # Create the new variables 44 | monthly_data['Sales/Deposits'] = monthly_data['Sales']/monthly_data['Deposits'] 45 | monthly_data['CPI/PPI'] = monthly_data['CPI']/monthly_data['PPI'] 46 | monthly_data['Deposits/PPI'] = monthly_data['Deposits']/monthly_data['PPI'] 47 | monthly_data['Sales/CPI'] = monthly_data['Sales']/monthly_data['CPI'] 48 | 49 | # Take monthly growth rates 50 | data = monthly_data.pct_change().dropna() 51 | 52 | # Set search delay sequence and storage 53 | seq_delay = np.arange(1, max_delay+1, 1) 54 | store_cr = pd.DataFrame(data=None, index=seq_delay, columns=['Bench', 'S1', 'S2', 'S3', 'S1ns', 'S2ns', 'S3ns']) 55 | 56 | # Do the loop over all delay values 57 | for delay in seq_delay: 58 | 59 | # Compute the strategies directly 60 | bench = data[asset_ticker].iloc[delay:] 61 | s1 = data[asset_ticker]*np.sign((data['Deposits/PPI']-data['Sales/CPI']).shift(periods=delay)) 62 | s2 = data[asset_ticker]*np.sign((data['Deposits']-data['CPI']).shift(periods=delay)) 63 | s3 = data[asset_ticker]*np.sign((data['Deposits']).shift(periods=delay)) 64 | s1ns = data[asset_ticker]*((data['Deposits/PPI']-data['Sales/CPI']).shift(periods=delay) >= 0) 65 | s2ns = data[asset_ticker]*((data['Deposits']-data['CPI']).shift(periods=delay) >= 0) 66 | s3ns = data[asset_ticker]*((data['Deposits']).shift(periods=delay) >= 0) 67 | cr_bench = ((bench+1).cumprod()-1)*100 68 | cr_s1 = ((s1+1).cumprod()-1)*100 69 | cr_s2 = ((s2+1).cumprod()-1)*100 70 | cr_s3 = ((s3+1).cumprod()-1)*100 71 | cr_s1ns = ((s1ns+1).cumprod()-1)*100 72 | cr_s2ns = ((s2ns+1).cumprod()-1)*100 73 | cr_s3ns = ((s3ns+1).cumprod()-1)*100 74 | # 75 | store_cr.loc[delay] = np.hstack([cr_bench.iloc[-1], cr_s1.iloc[-1], cr_s2.iloc[-1], cr_s3.iloc[-1], cr_s1ns.iloc[-1], cr_s2ns.iloc[-1], cr_s3ns.iloc[-1]]) 76 | 77 | # Find the max over all delay periods, print and then adjust lines 82 and 83 manually to produce the plots! 78 | cr_max = store_cr.max(axis=0) 79 | print(cr_max) 80 | print(store_cr.idxmax(axis=0)) 81 | # 82 | best_delay = 10 83 | best_str = 'S1ns' 84 | 85 | # Plotting 86 | if best_str == 'S1': 87 | bench = data[asset_ticker].iloc[best_delay:] 88 | best = data[asset_ticker]*np.sign((data['Deposits/PPI']-data['Sales/CPI']).shift(periods=best_delay)) 89 | cr_bench = ((bench+1).cumprod()-1)*100 90 | cr_best = ((best+1).cumprod()-1)*100 91 | merge = pd.concat([cr_bench, cr_best], axis=1).dropna() 92 | merge.columns = [asset_ticker, 'S1'] 93 | elif best_str == 'S2': 94 | bench = data[asset_ticker].iloc[best_delay:] 95 | best = data[asset_ticker]*np.sign((data['Deposits']-data['CPI']).shift(periods=best_delay)) 96 | cr_bench = ((bench+1).cumprod()-1)*100 97 | cr_best = ((best+1).cumprod()-1)*100 98 | merge = pd.concat([cr_bench, cr_best], axis=1).dropna() 99 | merge.columns = [asset_ticker, 'S2'] 100 | elif best_str == 'S1ns': 101 | bench = data[asset_ticker].iloc[best_delay:] 102 | best = data[asset_ticker]*((data['Deposits/PPI']-data['Sales/CPI']).shift(periods=best_delay) >= 0) 103 | cr_bench = ((bench+1).cumprod()-1)*100 104 | cr_best = ((best+1).cumprod()-1)*100 105 | merge = pd.concat([cr_bench, cr_best], axis=1).dropna() 106 | merge.columns = [asset_ticker, 'S1ns'] 107 | elif best_str == 'S2ns': 108 | bench = data[asset_ticker].iloc[best_delay:] 109 | best = data[asset_ticker]*((data['Deposits']-data['CPI']).shift(periods=best_delay) >= 0) 110 | cr_bench = ((bench+1).cumprod()-1)*100 111 | cr_best = ((best+1).cumprod()-1)*100 112 | merge = pd.concat([cr_bench, cr_best], axis=1).dropna() 113 | merge.columns = [asset_ticker, 'S2ns'] 114 | 115 | # Done! 116 | merge.plot(title='The DSCP strategy for '+asset_ticker, xlabel='Date', ylabel='return in percent', grid='both') 117 | plt.show() 118 | -------------------------------------------------------------------------------- /macroeconomic uncertainty and currency trading.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.cce.uoa.gr/thomakos/macroeconomic-uncertainty-currency-and-commodities-trading/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | import pandas as pd 13 | import pandas_datareader as pdr 14 | import scipy as sp 15 | import yfinance as yf 16 | 17 | # Input the the uncertainty indices 18 | enames = ['USEPUINDXM', 'RUSEPUINDXM', 'EUEPUINDXM', 'INDEPUINDXM', 'UKEPUINDXM'] 19 | data = pdr.fred.FredReader(enames, start='1990-01-01', end='2024-01-31').read() 20 | data.columns = ['UI-US', 'UI-RS', 'UI-EU', 'UI-IN', 'UI-UK'] 21 | # Now input the currency ETFs 22 | fxnames = ['UUP', 'FXE', 'FXY', 'FXB', 'FXF', 'FXA', 'DBA', 'DBC', 'OIH', 'WEAT'] 23 | fx_data = yf.download(fxnames, start='2002-01-01', end='2024-01-31', interval='1mo')['Adj Close'].dropna() 24 | # and merge them 25 | all_data = pd.concat([data, fx_data], axis=1) 26 | 27 | # Create the index and add it to the datasset 28 | idx_data = all_data[['UI-US', 'UI-RS', 'UI-EU', 'UI-IN', 'UI-UK']] 29 | all_data['GUI'] = (idx_data/idx_data.iloc[0]).mean(axis=1) 30 | 31 | # All variables of interest together 32 | all_names = np.hstack([fxnames, 'GUI']).tolist() 33 | 34 | # Select the starting years for computing the cross-correlationis 35 | years = pd.date_range(start='2007-03-01', end='2023-01-01', freq='AS-JAN') 36 | store_rho = pd.DataFrame(data=None, index=years, columns=fxnames) 37 | # Sequence of lags for the cross-correlations 38 | seq_lags = np.arange(1, 13, 1) 39 | # Makes sense to consider the monthly growth rates but you can change this 40 | set_diff = 1 41 | 42 | # Loop over the different starting years 43 | for yy in years: 44 | # Compute the growth rates, start from each particular year 45 | new_d_data = all_data.pct_change(periods=set_diff).dropna().loc[yy:] 46 | store_cz = pd.DataFrame(data=None, index=seq_lags, columns=fxnames) 47 | 48 | # For each of the lags compute the cross-correlation 49 | for i in seq_lags: 50 | z = pd.concat([new_d_data[fxnames],new_d_data['GUI'].shift(periods=i)], axis=1).dropna() 51 | cz = z.corr() 52 | store_cz.loc[i] = (cz[fxnames].loc['GUI']).to_numpy() 53 | 54 | # For each year and for all lags compute and store the lag with max abs cross-correlation 55 | store_rho.loc[yy] = store_cz.apply(lambda x: np.argmax(np.abs(x)), axis=0)+1 56 | 57 | # Done, print the optimal lags 58 | print(store_rho) 59 | store_rho.to_csv(str(set_diff)+'-rho.csv') 60 | 61 | # Now for the forecasting stage - note that here we start from each particular year and 62 | # then we make the out-of-sample 63 | set_year = '2019-01-01' 64 | d_data = all_data[all_names].pct_change(periods=set_diff).dropna() 65 | d_data = d_data.loc[set_year:] 66 | yname = 'WEAT' 67 | # Use the in-sample mean lag 68 | set_lag = int(np.floor(store_rho[yname].mean())) 69 | # IMPORTANT: CHANGE TO 2024-01-01 FOR WHEAT, OIL, FOOD AND TO 2024-02-01 FOR THE FINANCIALS!!! 70 | f_data = pd.DataFrame(data=None, index=pd.date_range(start='2024-02-01', periods=set_lag, freq='MS'), columns=[yname, 'GUI']) 71 | # Compute the delay regression 72 | py = pd.concat([d_data[yname], d_data['GUI'].shift(periods=set_lag)], axis=1).dropna() 73 | pyc = py.corr().iloc[1,0] 74 | sy = py.std() 75 | my = py.mean() 76 | beta = pyc*(sy[yname]/sy['GUI']) 77 | alpha = my[yname] - beta*my['GUI'] 78 | f_data['GUI'] = (d_data['GUI'].iloc[-set_lag:]).to_numpy() 79 | # Compute the out-of-sample forecast 80 | f_data[yname] = alpha + (d_data['GUI'].iloc[-set_lag:]).to_numpy()*beta 81 | both_data = pd.concat([py, f_data], axis=0) 82 | # print and plot! 83 | recent_both = both_data.loc['2023-10-01':] 84 | recent_both.plot(grid='both') 85 | plt.show() 86 | print(recent_both) 87 | 88 | # Finally, for the trading experiment using the same dependent variable as above 89 | y = all_data[yname].pct_change() # this must be "pure" monthly returns 90 | x = all_data['GUI'].pct_change(periods=set_diff) # but this can be anything in terms of periods 91 | y = y.loc[x.index] 92 | x = x.loc[x.index] 93 | 94 | # and here we trade for each different lag of the index and compare with the optimal ones found before 95 | store_xy = pd.DataFrame(data=None, index=seq_lags, columns=['Strategy', 'Benchmark']) 96 | # 97 | for i in seq_lags: 98 | xy = (y*(x.shift(periods=i).apply(np.sign))).dropna() 99 | rxy = pd.concat([xy, y.loc[xy.index]], axis=1) 100 | rxy.columns = ['GUI'+' on '+yname, yname] 101 | t = ((rxy+1).prod()-1)*100 102 | #t_by_year = rxy.groupby(by=rxy.index.year).apply(lambda x: (x+1).prod()) 103 | #print(i) 104 | #print(t_by_year) 105 | store_xy.loc[i] = t.to_numpy() 106 | # Plot the best performers 107 | z_uup = (i == 4) and (yname == 'UUP') 108 | z_fxe = (i == 12) and (yname == 'FXE') 109 | z_fxy = (i == 2) and (yname == 'FXY') 110 | z_fxb = (i == 10) and (yname == 'FXB') 111 | z_fxf = (i == 2) and (yname == 'FXF') 112 | z_fxa = (i == 3) and (yname == 'FXA') 113 | z_dba = (i == 12) and (yname == 'DBA') 114 | z_dbc = (i == 11) and (yname == 'DBC') 115 | z_oih = (i == 10) and (yname == 'OIH') 116 | z_weat = (i == 3) and (yname == 'WEAT') 117 | cxy = (((rxy+1).cumprod()-1)*100) 118 | set_title = 'Global Uncertainty Index (GUI) strategy for '+yname 119 | if z_uup or z_fxe or z_fxy or z_fxb or z_fxf or z_fxa or z_weat or z_dba or z_dbc or z_oih: 120 | cxy.plot(title=set_title, xlabel='Date', ylabel='return in percent', grid='both') 121 | plt.show() 122 | 123 | # Print the trading results 124 | print(store_xy) -------------------------------------------------------------------------------- /automatic_window_selection.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/10/15/tcb-with-automatic-window-length/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | 12 | # Import the required libraries 13 | import matplotlib.pyplot as plt 14 | import numpy as np 15 | import pandas as pd 16 | import scipy as sp 17 | import yfinance as yf 18 | 19 | # Define the function that finds the best window according to type and nroll 20 | def automatic_window_length(x, nroll, type): 21 | y = x.rank() 22 | z = y == y.max() 23 | out = nroll[z] 24 | if len(out) == 0: 25 | return np.nan 26 | else: 27 | if type == 'min': 28 | return int(np.min(out)) 29 | elif type == 'median': 30 | return int(np.median(out)) 31 | elif type == 'mean': 32 | return int(np.mean(out)) 33 | elif type == 'max': 34 | return int(np.max(out)) 35 | 36 | # Get some data, for the post I used the tickers below 37 | tickers = ['EEM', 'DBA', 'DBC', 'USO', 'FXE'] 38 | 39 | # Forecasts are based on discrete or continuous data? 40 | use_signs = True 41 | 42 | # Set the rolling windows 43 | set_R1 = 2 44 | set_RM = 18 45 | nroll = np.arange(set_R1, set_RM+1, 1) 46 | 47 | # Set the rolling period for the computation of cumulative returns, or to -1 to use recursive computation 48 | set_roll_wealth = -1 49 | 50 | # Then, a loop over the tickers 51 | for ticker in tickers: 52 | # Download the data, using monthly frequency below and starting from 2013 53 | data = yf.download(ticker, period='max', interval='1mo')['Adj Close'].dropna() 54 | r = data.pct_change().dropna().loc['2013-01-01':] # you can change this of course 55 | r.name = ticker 56 | y = r 57 | if use_signs: 58 | y = np.sign(r) 59 | 60 | # Nobs 61 | nobs = y.shape[0] 62 | 63 | # Initialize storage 64 | forecasts = pd.DataFrame(data=None, index=y.index, columns=nroll) 65 | rolreturn = pd.DataFrame(data=None, index=y.index, columns=nroll) 66 | 67 | # Loop over the rolling windows, the forecast is the plain sample mean 68 | for i in nroll: 69 | frc = y.rolling(window=i).mean().apply(np.sign) 70 | ret = r*(frc.shift(periods=1)) 71 | if set_roll_wealth > 0: 72 | rrt = (ret + 1).rolling(window=set_roll_wealth).apply(np.prod) - 1 73 | else: 74 | rrt = (ret + 1).cumprod() - 1 75 | forecasts[i] = frc 76 | rolreturn[i] = rrt 77 | 78 | # Compute the combinations 79 | c_min = rolreturn.apply(automatic_window_length, args=(nroll, 'min'), axis=1) 80 | c_mdn = rolreturn.apply(automatic_window_length, args=(nroll, 'median'), axis=1) 81 | c_mnn = rolreturn.apply(automatic_window_length, args=(nroll, 'mean'), axis=1) 82 | c_max = rolreturn.apply(automatic_window_length, args=(nroll, 'max'), axis=1) 83 | 84 | # Based on the combination select the appropriate forecast each time 85 | new_forecasts = pd.DataFrame(data=None, index=y.index, 86 | columns=['AWL-min', 'AWL-median', 'AWL-mean', 'AWL-max']) 87 | for i in range(nobs): 88 | c_min_i = c_min.iloc[i] 89 | c_mdn_i = c_mdn.iloc[i] 90 | c_mnn_i = c_mnn.iloc[i] 91 | c_max_i = c_max.iloc[i] 92 | # 93 | c_min_i_check = np.isnan(c_min_i) 94 | c_mdn_i_check = np.isnan(c_mdn_i) 95 | c_mnn_i_check = np.isnan(c_mnn_i) 96 | c_max_i_check = np.isnan(c_max_i) 97 | # 98 | if not c_min_i_check: 99 | new_forecasts.iloc[i, 0] = forecasts.iloc[i, int(c_min_i-set_R1)] 100 | if not c_mdn_i_check: 101 | new_forecasts.iloc[i, 1] = forecasts.iloc[i, int(c_mdn_i-set_R1)] 102 | if not c_mnn_i_check: 103 | new_forecasts.iloc[i, 2] = forecasts.iloc[i, int(c_mnn_i-set_R1)] 104 | if not c_max_i_check: 105 | new_forecasts.iloc[i, 3] = forecasts.iloc[i, int(c_max_i-set_R1)] 106 | 107 | # and compute the new returns 108 | new_r = pd.DataFrame(data=np.repeat(r.to_numpy(), 4).reshape(-1, 4), index=r.index, 109 | columns=['AWL-min', 'AWL-median', 'AWL-mean', 'AWL-max']) 110 | new_ret = new_r*(new_forecasts.shift(periods=1)) 111 | if set_roll_wealth > 0: 112 | new_rolreturn = (new_ret + 1).rolling(window=set_roll_wealth).apply(np.prod) - 1 113 | else: 114 | new_rolreturn = (new_ret + 1).cumprod() - 1 115 | 116 | # and the benchmark 117 | if set_roll_wealth > 0: 118 | bench = (r + 1).rolling(window=set_roll_wealth).apply(np.prod) - 1 119 | else: 120 | bench = (r + 1).cumprod() - 1 121 | 122 | # Collect the results, remove impact of maximum rolling window 123 | all = pd.concat([bench, new_rolreturn], axis=1).dropna()*100 124 | 125 | # Do the plot 126 | all[[ticker, 'AWL-min']].plot(title='Total trading returns of the AWL-min strategy for '+ticker, ylabel='return in percent') 127 | plt.grid(visible=True, which='both') 128 | plt.show() 129 | 130 | # and a plot of the AWL windows selected 131 | cmb_all = pd.concat([c_min, c_mdn, c_mnn, c_max], axis=1) 132 | cmb_all.columns = ['AWL-min', 'AWL-median', 'AWL-mean', 'AWL-max'] 133 | cmb_all['AWL-min'].plot(title='AWL-min selections for '+ticker, ylabel='window length') 134 | plt.grid(visible=True, which='both') 135 | plt.show() 136 | 137 | # and print some statistics for the cumulative or average cumulative return 138 | if set_roll_wealth > 0: 139 | print(all.mean(axis=0)) 140 | else: 141 | print(all.iloc[-1]) 142 | 143 | # and cumulative return by year 144 | r_all = pd.concat([r, new_ret], axis=1) 145 | by_year = r_all.groupby(by=new_ret.index.year).apply(lambda x: (x+1).prod()-1)*100 146 | print(by_year) -------------------------------------------------------------------------------- /realgdp.py: -------------------------------------------------------------------------------- 1 | # 2 | # Python code replicating results on this post: 3 | # 4 | # https://prognostikon.wordpress.com/2023/05/06/peaks-and-troughs-forecasting-us-real-gdp-growth/ 5 | # 6 | # at my blog Prognostikon 7 | # 8 | # (c) Dimitrios D. Thomakos, dimitrios.thomakos@gmail.com, https://github.com/dthomakos 9 | # 10 | 11 | # Import packages 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import pandas as pd 15 | import pandas_datareader as pdr 16 | from dateutil.relativedelta import relativedelta 17 | 18 | # Get the real GDP 19 | realgdp = pdr.fred.FredReader('GDPC1', start='1947-01-01', end='2023-07-01').read() 20 | # Compute the annual growth rate 21 | growthrate = realgdp.apply(np.log).diff(periods=4).dropna()*100 22 | # Give nice column names 23 | growthrate.columns = ['Real GDP Growth Rate'] 24 | 25 | # Given the most recent highest peak find the nearest neighbors (NN) 26 | set_peak = 8 27 | # Fix target and training sets 28 | set_last = growthrate.iloc[-set_peak:] 29 | set_rest = growthrate.iloc[:-set_peak] 30 | 31 | # Compute the magnitude and positions of the NN 32 | mod = set_rest.shape[0]%set_peak 33 | set_rest = set_rest.iloc[mod:] 34 | set_M = set_rest.shape[0]-set_peak+1 35 | store_dates = pd.DataFrame(data=np.zeros([set_peak, set_M])) 36 | store_values = pd.DataFrame(data=np.zeros([set_peak, set_M])) 37 | store_distances = pd.DataFrame(data=np.zeros([set_M, 1]), columns=['Distances']) 38 | 39 | # Carefull in the NN - the first position must be a peak!! 40 | for i in range(set_rest.shape[0]-set_peak+1): 41 | xi = set_rest.iloc[i:(i+set_peak)] 42 | store_dates.iloc[:,i] = xi.index 43 | store_values.iloc[:, i] = xi.to_numpy() 44 | test = (xi.iloc[0] > xi.iloc[1:]).all().to_numpy()[0] 45 | if test: 46 | dist = np.sqrt(((xi.to_numpy()[0] - set_last.to_numpy()[0]) ** 2).sum()) 47 | else: 48 | dist = 999 49 | store_distances.iloc[i,:] = dist 50 | 51 | # Now, get the dates and values for the ordered distances 52 | idx = store_distances.sort_values(by='Distances').index 53 | store_dates = store_dates.loc[:,idx] 54 | store_values = store_values.loc[:, idx] 55 | 56 | # By inspection now, select the first 3 NN by magnitude of their first value 57 | # and by non-overlapping periods: [6, 40, 139, 67, 95] and [6, 40, 95] 58 | look_back = [6, 40, 139, 67, 95] 59 | look_back = [6, 40, 95] 60 | frc1 = pd.DataFrame(data=np.zeros([2*set_peak, len(look_back)]), columns=look_back) 61 | 62 | # For each period we must also find the next set_peak values for the forecast 63 | for j in look_back: 64 | idx = store_dates.loc[:,j] 65 | frc1.loc[:,j] = growthrate.loc[idx[0]:(idx[set_peak-1]+relativedelta(months=3*set_peak))].to_numpy() 66 | 67 | # Compute the mean over the NN 68 | mean1 = frc1.mean(axis=1) 69 | # and the standard deviation of this mean 70 | std = frc1.std(axis=1)/np.sqrt(len(look_back)) 71 | # bounds... 72 | mean1_lb = mean1 - 2*std 73 | mean1_ub = mean1 + 2*std 74 | mean1_all = pd.concat([mean1_lb, mean1, mean1_ub], axis=1) 75 | 76 | # Let us add a standard NN forecast for comparison based on the last observation 77 | last_obs = growthrate.iloc[-1] 78 | dist = np.sqrt(((growthrate - growthrate.iloc[-1])**2)).sort_values(by='Real GDP Growth Rate').iloc[1:] 79 | # select number of NN to use 80 | set_NN = 2 81 | frc2 = pd.DataFrame(data=np.zeros([set_peak, set_NN]), columns=range(set_NN)) 82 | for j in range(set_NN): 83 | frc2.iloc[:,j] = growthrate.loc[dist.index[j]+relativedelta(months=3):(dist.index[j]+relativedelta(months=3*set_peak+1))].to_numpy() 84 | # get the forecast and bounds 85 | mean2 = frc2.mean(axis=1) 86 | std = frc2.std(axis=1)/np.sqrt(set_NN) 87 | mean2_lb = mean2 - 2*std 88 | mean2_ub = mean2 + 2*std 89 | mean2_all = pd.concat([mean2_lb, mean2, mean2_ub], axis=1) 90 | 91 | # Done, fix nicely for a plot 92 | actual = pd.DataFrame(data=np.vstack([set_last.to_numpy(), np.repeat(np.nan, set_peak).reshape(-1, 1)]), 93 | index=pd.date_range(start='2021-04-01', periods=2*set_peak, freq='Q-DEC')) 94 | mean1_all.iloc[:set_peak] = np.nan 95 | mean2_all = pd.concat([pd.DataFrame(np.repeat(np.nan, 3*set_peak).reshape(set_peak, 3), columns=range(3)), mean2_all], axis=0) 96 | mean1_all.index = actual.index 97 | mean2_all.index = actual.index 98 | # 99 | actual_frc1 = pd.concat([actual, mean1_all], axis=1) 100 | actual_frc1.columns = ['Real GDP growth rate', 'NNP-forecast lower bound', 101 | 'NNP-forecast', 'NNP-forecast upper bound'] 102 | actual_frc2 = pd.concat([actual, mean2_all], axis=1) 103 | actual_frc2.columns = ['Real GDP growth rate', 'NN-forecast lower bound', 104 | 'NN-forecast', 'NN-forecast upper bound'] 105 | actual_frc = pd.concat([actual, mean1_all.iloc[:,1], mean2_all.iloc[:,1]], axis=1) 106 | actual_frc.columns = ['Real GDP growth rate', 'NNP-forecast', 'NN-forecast'] 107 | # 108 | ax1 = actual_frc1.plot(grid='both', color=['black', 'red', 'blue', 'red'], style=['-', ':', '--', ':'], 109 | title='US real GDP growth rate and NN peak-based forecast with 95% bounds using '+str(len(look_back))+' NN', 110 | figsize=[13, 8], xlabel='Date', ylabel='percent') 111 | # plt.axvline(actual_frc1.index[set_peak-1],color='black', linestyle=':') 112 | ax1.xaxis.grid(True, which='minor') 113 | ax1.yaxis.grid(True, which='minor') 114 | plt.show() 115 | # 116 | ax2 = actual_frc2.plot(grid='both', color=['black', 'red', 'blue', 'red'], style=['-', ':', '--', ':'], 117 | title='US real GDP growth rate and standard NN forecast with 95% bounds using '+str(set_NN)+' NN', 118 | figsize=[13, 8], xlabel='Date', ylabel='percent') 119 | ax2.xaxis.grid(True, which='minor') 120 | ax2.yaxis.grid(True, which='minor') 121 | plt.show() 122 | # 123 | # for this last plot insert the last actual value in the forecasts to look nicer 124 | actual_frc.loc['2023-03-31',['NNP-forecast', 'NN-forecast']] = actual_frc.loc['2023-03-31', 'Real GDP growth rate'] 125 | ax3 = actual_frc.plot(grid='both', color=['black', 'blue', 'green'], style=['-', '--', ':'], 126 | title='US real GDP growth rate and both NN-type forecasts using '+str(len(look_back))+' NNP and '+str(set_NN)+' NN', figsize=[13, 8], xlabel='Date', ylabel='percent') 127 | ax3.xaxis.grid(True, which='minor') 128 | ax3.yaxis.grid(True, which='minor') 129 | plt.show() --------------------------------------------------------------------------------