├── .DS_Store ├── Project 1 └── analysis.py ├── Project 2 ├── optimization.py └── report.pdf ├── Project 3 └── marketsim.py ├── Project 4 └── gen_data.py ├── Project 5 ├── BagLearner.py ├── DTLearner.py ├── InsaneLearner.py ├── RTLearner.py └── report.pdf ├── Project 6 └── QLearner.py ├── Project 7 ├── BestPossibleStrategy.py ├── ManualStrategy.py ├── indicators.py ├── marketsimcode.py ├── readme.txt └── report.pdf ├── Project 8 ├── BagLearner.py ├── ManualStrategy.py ├── RTLearner.py ├── StrategyLearner.py ├── experiment1.py ├── experiment2.py ├── indicators.py ├── marketsimcode.py └── report.pdf └── README.md /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anu003/CS7646-Machine-Learning-for-Trading/921a69fd1dc6135624f4560b4dcce80111df78ca/.DS_Store -------------------------------------------------------------------------------- /Project 1/analysis.py: -------------------------------------------------------------------------------- 1 | """Analyze a portfolio.""" 2 | 3 | import pandas as pd 4 | import numpy as np 5 | import datetime as dt 6 | from util import get_data, plot_data 7 | 8 | # This is the function that will be tested by the autograder 9 | # The student must update this code to properly implement the functionality 10 | 11 | def assess_portfolio(sd = dt.datetime(2008,1,1), ed = dt.datetime(2009,1,1), \ 12 | syms = ['GOOG','AAPL','GLD','XOM'], \ 13 | allocs=[0.1,0.2,0.3,0.4], \ 14 | sv=1000000, rfr=0.0, sf=252.0, \ 15 | gen_plot=False): 16 | 17 | # Read in adjusted closing prices for given symbols, date range 18 | dates = pd.date_range(sd, ed) 19 | prices_all = get_data(syms, dates) # automatically adds SPY 20 | 21 | prices_all.fillna(method="ffill", inplace=True) 22 | prices_all.fillna(method="bfill", inplace=True) 23 | 24 | prices = prices_all[syms] # only portfolio symbols 25 | prices_SPY = prices_all['SPY'] # only SPY, for comparison later 26 | 27 | # Get daily portfolio value 28 | #port_val = prices_SPY # add code here to compute daily portfolio values 29 | normed=prices/prices.values[0] 30 | alloced=normed.multiply(allocs) 31 | pos_vals=alloced.multiply(sv) 32 | port_val=pos_vals.sum(axis=1) 33 | 34 | # Get portfolio statistics (note: std_daily_ret = volatility) 35 | cr, adr, sddr, sr = [0.25, 0.001, 0.0005, 2.1] # add code here to compute stats 36 | #dr[1:]=(df[1:]/df[:-1].values)-1 37 | dr=(port_val/port_val.shift(1))-1 38 | #dr.ix[0,:]=0 39 | cr=(port_val[-1]/port_val[0])-1 40 | adr=dr.mean() 41 | sddr=dr.std() 42 | a=np.sqrt(sf) 43 | sr=a*((dr.subtract(rfr)).mean())/sddr 44 | 45 | # Add code here to properly compute end value 46 | ev = port_val[-1] 47 | 48 | # Compare daily portfolio value with SPY using a normalized plot 49 | if gen_plot: 50 | # add code to plot here 51 | port_val = port_val/port_val[0] 52 | prices_SPY = prices_SPY/prices_SPY[0] 53 | df_temp = pd.concat([port_val, prices_SPY], keys=['Portfolio', 'SPY'], axis=1) 54 | plot_data(df_temp, title="Daily portfolio value and SPY") 55 | pass 56 | 57 | return cr, adr, sddr, sr, ev 58 | 59 | def test_code(): 60 | # This code WILL NOT be tested by the auto grader 61 | # It is only here to help you set up and test your code 62 | 63 | # Define input parameters 64 | # Note that ALL of these values will be set to different values by 65 | # the autograder! 66 | start_date = dt.datetime(2010,1,1) 67 | end_date = dt.datetime(2010,12,31) 68 | symbols = ['GOOG', 'AAPL', 'GLD', 'XOM'] 69 | allocations = [0.2, 0.3, 0.4, 0.1] 70 | start_val = 1000000 71 | risk_free_rate = 0.0 72 | sample_freq = 252 73 | 74 | #Read data 75 | dates=pd.date_range(start_date,end_date) 76 | df=get_data(symbols,dates) 77 | #plot_data(df) 78 | 79 | # Assess the portfolio 80 | cr, adr, sddr, sr, ev = assess_portfolio(sd = start_date, ed = end_date,\ 81 | syms = symbols, \ 82 | allocs = allocations,\ 83 | sv = start_val, \ 84 | gen_plot = False) 85 | 86 | # Print statistics 87 | print "Start Date:", start_date 88 | print "End Date:", end_date 89 | print "Symbols:", symbols 90 | print "Allocations:", allocations 91 | print "Sharpe Ratio:", sr 92 | print "Volatility (stdev of daily returns):", sddr 93 | print "Average Daily Return:", adr 94 | print "Cumulative Return:", cr 95 | 96 | if __name__ == "__main__": 97 | test_code() 98 | -------------------------------------------------------------------------------- /Project 2/optimization.py: -------------------------------------------------------------------------------- 1 | """MC1-P2: Optimize a portfolio.""" 2 | 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import datetime as dt 7 | import scipy.optimize as spo 8 | from util import get_data, plot_data 9 | 10 | def f(allocs,normed,sv): 11 | #normed=prices/prices.values[0] 12 | alloced=normed.multiply(allocs) 13 | pos_vals=alloced.multiply(sv) 14 | port_val=pos_vals.sum(axis=1) 15 | dr=(port_val/port_val.shift(1))-1 16 | std_dev=dr.std() 17 | return std_dev 18 | 19 | # This is the function that will be tested by the autograder 20 | # The student must update this code to properly implement the functionality 21 | def optimize_portfolio(sd=dt.datetime(2008,1,1), ed=dt.datetime(2009,1,1), \ 22 | syms=['GOOG','AAPL','GLD','XOM'], gen_plot=False): 23 | 24 | sv=1000000 25 | rfr=0.0 26 | sf=252.0 27 | 28 | # Read in adjusted closing prices for given symbols, date range 29 | dates = pd.date_range(sd, ed) 30 | prices_all = get_data(syms, dates) # automatically adds SPY 31 | 32 | prices_all.fillna(method="ffill",inplace=True) 33 | prices_all.fillna(method="bfill",inplace=True) 34 | 35 | prices = prices_all[syms] # only portfolio symbols 36 | prices_SPY = prices_all['SPY'] # only SPY, for comparison later 37 | 38 | # find the allocations for the optimal portfolio 39 | # note that the values here ARE NOT meant to be correct for a test case 40 | # allocs = np.asarray([0.2, 0.2, 0.3, 0.3]) # add code here to find the allocations 41 | 42 | n=len(syms) 43 | guess = 1.0/n 44 | 45 | #Generate array allocs for 46 | a_list=[guess]*n 47 | allocs=np.asarray(a_list) 48 | #print 'Allocs:',allocs 49 | normed=prices/prices.values[0] 50 | 51 | my_bounds=[(0.0,1.0) for i in normed.columns] 52 | #print "Bounds:", my_bounds 53 | 54 | my_constraints = ({ 'type': 'eq', 'fun': lambda inputs: 1.0 - np.sum(inputs) }) 55 | 56 | my_result=spo.minimize(f,allocs,args=(normed,sv,), method='SLSQP', constraints=my_constraints, bounds=my_bounds, options={'disp':True}) 57 | opt_allocs=my_result.x 58 | opt_sddr=my_result.fun 59 | #normed=prices/prices.values[0] 60 | alloced=normed.multiply(opt_allocs) 61 | pos_vals=alloced.multiply(sv) 62 | port_val=pos_vals.sum(axis=1) 63 | 64 | cr, adr, sddr, sr = [0.25, 0.001, 0.0005, 2.1] # add code here to compute stats 65 | dr=(port_val/port_val.shift(1))-1 66 | cr=(port_val[-1]/port_val[0])-1 67 | adr=dr.mean() 68 | sddr=opt_sddr 69 | a=np.sqrt(sf) 70 | sr=a*((dr.subtract(rfr)).mean())/sddr 71 | 72 | # Get daily portfolio value 73 | # port_val = prices_SPY # add code here to compute daily portfolio values 74 | 75 | 76 | # Compare daily portfolio value with SPY using a normalized plot 77 | if gen_plot: 78 | # add code to plot here 79 | port_val = port_val/port_val[0] 80 | prices_SPY = prices_SPY/prices_SPY[0] 81 | df_temp = pd.concat([port_val, prices_SPY], keys=['Portfolio', 'SPY'], axis=1) 82 | plot_data(df_temp, title="Daily portfolio value and SPY") 83 | pass 84 | 85 | return opt_allocs, cr, adr, sddr, sr 86 | 87 | def test_code(): 88 | # This function WILL NOT be called by the auto grader 89 | # Do not assume that any variables defined here are available to your function/code 90 | # It is only here to help you set up and test your code 91 | 92 | # Define input parameters 93 | # Note that ALL of these values will be set to different values by 94 | # the autograder! 95 | 96 | start_date = dt.datetime(2008,6,1) 97 | end_date = dt.datetime(2009,6,1) 98 | symbols = ['IBM', 'X', 'GLD'] 99 | 100 | # Assess the portfolio 101 | allocations, cr, adr, sddr, sr = optimize_portfolio(sd = start_date, ed = end_date,\ 102 | syms = symbols, \ 103 | gen_plot = False) 104 | 105 | # Print statistics 106 | print "Start Date:", start_date 107 | print "End Date:", end_date 108 | print "Symbols:", symbols 109 | print "Allocations:", allocations 110 | print "Sharpe Ratio:", sr 111 | print "Volatility (stdev of daily returns):", sddr 112 | print "Average Daily Return:", adr 113 | print "Cumulative Return:", cr 114 | 115 | if __name__ == "__main__": 116 | # This code WILL NOT be called by the auto grader 117 | # Do not assume that it will be called 118 | test_code() 119 | -------------------------------------------------------------------------------- /Project 2/report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anu003/CS7646-Machine-Learning-for-Trading/921a69fd1dc6135624f4560b4dcce80111df78ca/Project 2/report.pdf -------------------------------------------------------------------------------- /Project 3/marketsim.py: -------------------------------------------------------------------------------- 1 | """MC2-P1: Market simulator.""" 2 | 3 | import pandas as pd 4 | import numpy as np 5 | import datetime as dt 6 | import os 7 | from util import get_data, plot_data 8 | 9 | def compute_portvals(orders_file = "./orders/orders.csv", start_val = 1000000, commission=9.95, impact=0.005): 10 | # this is the function the autograder will call to test your code 11 | # NOTE: orders_file may be a string, or it may be a file object. Your 12 | # code should work correctly with either input 13 | # TODO: Your code here 14 | 15 | #read in data 16 | orders_df = pd.read_csv(orders_file, index_col='Date', parse_dates=True, na_values=['nan']) 17 | #sort dates 18 | orders_df=orders_df.sort_index() 19 | 20 | #scan symbols 21 | symbols = list(set(orders_df['Symbol'].values)) 22 | #get date range 23 | orders_df.index=pd.to_datetime(orders_df.index) 24 | start_date=orders_df.index.values[0] 25 | end_date = orders_df.index.values[-1] 26 | dates = pd.date_range(start_date, end_date) 27 | 28 | #read in prices 29 | prices = get_data(symbols, dates) 30 | 31 | #add an extra column 'Cash' and initialize it to all ones 32 | prices['Cash'] = np.ones(prices.shape[0]) 33 | 34 | #duplicate price df into a units df, intialize it to all zeros 35 | share_units=prices*0.0 36 | 37 | #initialize cash position with starting value 38 | share_units.iloc[0,-1]=start_val 39 | 40 | order=orders_df.iloc[0] 41 | 42 | #adjust share_units to show how stock units and cash are changing over time with orders 43 | 44 | for index2, row2 in orders_df.iterrows(): 45 | stock_name = row2[0] 46 | order_price = prices[stock_name].ix[index2] 47 | order_units = row2[2] 48 | 49 | #set up signs as multipliers 50 | if row2[1]=="BUY": 51 | sign=-1 52 | else: 53 | sign=1 54 | 55 | #update share_units with order 56 | share_units.loc[index2,stock_name]+=order_units*sign*-1 57 | share_units.loc[index2,"Cash"]+=order_units*order_price*sign 58 | 59 | #deduct commission for every transaction 60 | share_units.loc[index2,"Cash"]-=commission 61 | 62 | #impact = no. of orders in transaction * price of each share * impact. deduct impact for every transaction 63 | my_impact=order_units*order_price*impact 64 | share_units.loc[index2,"Cash"]-=my_impact 65 | 66 | #update share_units 67 | for i in range(1,share_units.shape[0]): 68 | for j in range (0,share_units.shape[1]): 69 | new_val=share_units.iloc[i,j]+share_units.iloc[i-1,j] 70 | share_units.iloc[i,j]=new_val 71 | 72 | #calculate port_vals 73 | port_vals=prices*share_units 74 | port_vals["port_val"]=port_vals.sum(axis=1) 75 | port_vals["daily_returns"] = (port_vals["port_val"][1:] / port_vals["port_val"][:-1].values) - 1 76 | port_vals["daily_returns"][0] = 0 77 | 78 | portvals=port_vals.iloc[:,-2:-1] 79 | # In the template, instead of computing the value of the portfolio, we just 80 | # read in the value of IBM over 6 months 81 | #start_date = dt.datetime(2008,1,1) 82 | #end_date = dt.datetime(2008,6,1) 83 | #portvals = get_data(['IBM'], pd.date_range(start_date, end_date)) 84 | #portvals = portvals[['IBM']] # remove SPY 85 | 86 | return portvals 87 | 88 | def author(): 89 | return 'nmenon34' # replace tb34 with your Georgia Tech username. 90 | 91 | def test_code(): 92 | # this is a helper function you can use to test your code 93 | # note that during autograding his function will not be called. 94 | # Define input parameters 95 | 96 | of = "./orders/orders-02.csv" 97 | sv = 1000000 98 | 99 | # Process orders 100 | portvals = compute_portvals(orders_file = of, start_val = sv) 101 | if isinstance(portvals, pd.DataFrame): 102 | portvals = portvals[portvals.columns[0]] # just get the first column 103 | else: 104 | "warning, code did not return a DataFrame" 105 | 106 | # Get portfolio stats 107 | # Here we just fake the data. you should use your code from previous assignments. 108 | start_date = dt.datetime(2008,1,1) 109 | end_date = dt.datetime(2008,6,1) 110 | #cum_ret, avg_daily_ret, std_daily_ret, sharpe_ratio = [0.2,0.01,0.02,1.5] 111 | 112 | #portfolio stats calculated similar to assess_portfolio 113 | #rfr=0 114 | #sf=252 115 | #cum_ret=(portvals[-1]/portvals[0])-1 116 | #dr=(portvals/portvals.shift(1))-1 117 | #avg_daily_ret=dr.mean() 118 | #std_daily_ret=dr.std() 119 | #a=np.sqrt(sf) 120 | #sharpe_ratio=a*((dr.subtract(rfr)).mean())/std_daily_ret 121 | #cum_ret_SPY, avg_daily_ret_SPY, std_daily_ret_SPY, sharpe_ratio_SPY = [0.2,0.01,0.02,1.5] 122 | 123 | # Compare portfolio against $SPX 124 | print "Date Range: {} to {}".format(start_date, end_date) 125 | print 126 | print "Sharpe Ratio of Fund: {}".format(sharpe_ratio) 127 | print "Sharpe Ratio of SPY : {}".format(sharpe_ratio_SPY) 128 | print 129 | print "Cumulative Return of Fund: {}".format(cum_ret) 130 | print "Cumulative Return of SPY : {}".format(cum_ret_SPY) 131 | print 132 | print "Standard Deviation of Fund: {}".format(std_daily_ret) 133 | print "Standard Deviation of SPY : {}".format(std_daily_ret_SPY) 134 | print 135 | print "Average Daily Return of Fund: {}".format(avg_daily_ret) 136 | print "Average Daily Return of SPY : {}".format(avg_daily_ret_SPY) 137 | print 138 | print "Final Portfolio Value: {}".format(portvals[-1]) 139 | 140 | if __name__ == "__main__": 141 | test_code() 142 | -------------------------------------------------------------------------------- /Project 4/gen_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | template for generating data to fool learners (c) 2016 Tucker Balch 3 | """ 4 | 5 | import numpy as np 6 | import math 7 | 8 | # this function should return a dataset (X and Y) that will work 9 | # better for linear regression than decision trees 10 | def best4LinReg(seed=1489683273): 11 | np.random.seed(seed) 12 | #X = np.zeros((100,2)) 13 | #Y = np.random.random(size = (100,))*200-100 14 | # Here's is an example of creating a Y from randomly generated 15 | # X with multiple columns 16 | #Y = X[:,0] + np.sin(X[:,1]) + X[:,2]**2 + X[:,3]**3 17 | #rows=np.random.randint(10,1001) 18 | #cols=np.random.randint(2,1001) 19 | rows=50 20 | cols=10 21 | X=np.zeros((rows,cols)) 22 | X=np.random.random((rows, cols)) 23 | #X[0,0]=np.random.randint(1,10) 24 | #for c in range(1,cols): 25 | #X[0,c]=X[0,c-1]+1 26 | #for r in range(1,rows): 27 | #X[r,0]=X[r-1,0]+1 28 | #for a in range(1,rows): 29 | #for b in range(1,cols): 30 | #X[a,b]=X[a-1,b-1]+1 31 | Y=np.zeros(rows) 32 | for i in range(0,rows): 33 | Y[i]=X[i,:].sum() 34 | return X, Y 35 | 36 | def best4DT(seed=1489683273): 37 | np.random.seed(seed) 38 | #X = np.zeros((100,2)) 39 | #Y = np.random.random(size = (100,))*200-100 40 | #rows=np.random.randint(10,1001) 41 | #cols=np.random.randint(2,1001) 42 | rows=100 43 | cols=10 44 | #X=np.zeros((rows,cols)) 45 | X=np.random.random_integers(0,1000000,(rows, cols)) 46 | #X=np.random.random((rows, cols)) 47 | 48 | #new=np.zeros(rows) 49 | #X[0,0]=np.random.randint(1,5) 50 | #for c in range(1,cols): 51 | #X[0,c]=X[0,c-1]+1 52 | #for r in range(1,rows): 53 | #X[r,0]=X[r-1,0]**2 54 | #for a in range(1,rows): 55 | #for b in range(1,cols): 56 | #X[a,b]=X[a-1,b-1]**2 57 | Y=np.zeros(rows) 58 | #for i in range(0,rows): 59 | #new[i]=X[i,:].sum() 60 | #med=np.median(new) 61 | #if new[i]Split_Val]) #larger values form right sub-tree 47 | 48 | root = np.array([[feature,Split_Val,1,Left_Tree.shape[0]+1]]) 49 | temp = np.append(root,Left_Tree,axis=0) 50 | return np.append(temp,Right_Tree,axis=0) 51 | 52 | def best_feature(self,data): 53 | #returns index of selected feature column 54 | 55 | Max_val=0 56 | best_feature=0 57 | 58 | dataX=data.shape[1]-1 #extract dataX 59 | dataY=data[:,data.shape[1]-1] #extract dataY 60 | 61 | temp=[] 62 | for feature in range(0,dataX): 63 | correlation_val = np.corrcoef(data[:,feature],dataY) 64 | correlation_val = abs(correlation_val[0,1]) 65 | temp.append(correlation_val) 66 | 67 | for i in range(0,len(temp)): 68 | if temp[i]>Max_val: 69 | Max_val = temp[i] 70 | best_feature = i 71 | best_feature = int(best_feature) 72 | 73 | return int(best_feature) 74 | 75 | def query(self,points): 76 | """ 77 | @summary: Estimate a set of test points given the model we built. 78 | @param points: should be a numpy array with each row corresponding to a specific query. 79 | @returns the estimated values according to the saved model. 80 | """ 81 | #return (self.model_coefs[:-1] * points).sum(axis = 1) + self.model_coefs[-1] 82 | 83 | ans=[] 84 | row_count=points.shape[0] #[0] returns rows, [1] returns columns 85 | for row in range(0,row_count): 86 | value=self.query_tree(points[row,:]) #pass the current row to query_tree() to determine corresponding value 87 | ans.append(float(value)) 88 | return ans 89 | 90 | def query_tree(self, my_tuple): 91 | row=0 92 | 93 | #if not a leaf node 94 | while(self.tree[row,0]!='Leaf'): 95 | feature=self.tree[row,0] 96 | Split_Val=self.tree[row,1] 97 | 98 | if my_tuple[int(float(feature))]<=float(Split_Val): 99 | row=row+int(float(self.tree[row,2])) #Left_Tree 100 | else: 101 | row=row+int(float(self.tree[row,3])) #Right_Tree 102 | 103 | #if a leaf node 104 | return self.tree[row,1] 105 | 106 | if __name__=="__main__": 107 | print "the secret clue is 'zzyzx'" 108 | 109 | -------------------------------------------------------------------------------- /Project 5/InsaneLearner.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import DTLearner as dt 3 | import RTLearner as rt 4 | import BagLearner as bg 5 | import LinRegLearner as lrl 6 | 7 | class InsaneLearner(object): 8 | 9 | def __init__(self,verbose = False): 10 | 11 | self.learner_list=[] 12 | for i in range(0,20): 13 | self.learner_list.append(bg.BagLearner(lrl.LinRegLearner,kwargs={},bags=20)) 14 | #pass # move along, these aren't the drones you're looking for 15 | 16 | def author(self): 17 | return 'nmenon34' # replace tb34 with your Georgia Tech username 18 | 19 | def addEvidence(self,dataX,dataY): 20 | """ 21 | @summary: Add training data to learner 22 | @param dataX: X values of data to add 23 | @param dataY: the Y training values 24 | """ 25 | for learner in self.learner_list: 26 | learner.addEvidence(dataX,dataY) 27 | 28 | def query(self,points): 29 | """ 30 | @summary: Estimate a set of test points given the model we built. 31 | @param points: should be a numpy array with each row corresponding to a specific query. 32 | @returns the estimated values according to the saved model. 33 | """ 34 | q=[] 35 | for lr in self.learner_list: 36 | q.append(lr.query(points)) 37 | q_array = np.array(q) 38 | ans = np.mean(q_array,axis=0) 39 | 40 | return ans.tolist() 41 | 42 | if __name__=="__main__": 43 | print "the secret clue is 'zzyzx'" 44 | 45 | -------------------------------------------------------------------------------- /Project 5/RTLearner.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from random import randint 3 | 4 | class RTLearner(object): 5 | 6 | def __init__(self, leaf_size=1, verbose=False): 7 | #pass # move along, these aren't the drones you're looking for 8 | self.tree=None 9 | self.leaf_size=leaf_size 10 | 11 | def author(self): 12 | return 'nmenon34' # replace tb34 with your Georgia Tech username 13 | 14 | def addEvidence(self,dataX,dataY): 15 | """ 16 | @summary: Add training data to learner 17 | @param dataX: X values of data to add 18 | @param dataY: the Y training values 19 | """ 20 | ## slap on 1s column so linear regression finds a constant term 21 | #newdataX = np.ones([dataX.shape[0],dataX.shape[1]+1]) 22 | #newdataX[:,0:dataX.shape[1]]=dataX 23 | 24 | ## build and save the model 25 | #self.model_coefs, residuals, rank, s = np.linalg.lstsq(newdataX, dataY) 26 | 27 | dataY=np.array([dataY]) 28 | new_dataY=dataY.T #transpose of dataY 29 | all_data=np.append(dataX,new_dataY,axis=1) 30 | self.tree=self.build_tree(all_data) 31 | 32 | def build_tree(self,data): 33 | if data.shape[0]<=self.leaf_size: #if no.of nodes left < leaf-size, all are leaf nodes. hence, return mean 34 | return np.array([["Leaf",np.mean(data[:,-1]),-1,-1]]) 35 | 36 | if np.all(data[0,-1]==data[:,-1],axis=0): #if all values in dataY are same 37 | return np.array([["Leaf",data[0,-1],-1,-1]]) 38 | else: 39 | #feature=int(self.best_feature(data)) 40 | #randomly choose best feature 41 | feature = randint(0,data.shape[1]-2) 42 | Split_Val=np.median(data[:,feature]) #median of all values in column of best feature 43 | 44 | Max=max(data[:,feature]) 45 | if Max==Split_Val: 46 | return np.array([['Leaf',np.mean(data[:,-1]),-1,-1]]) #empty right sub-tree. Only left sub-tree formed 47 | 48 | Left_Tree=self.build_tree(data[data[:,feature]<=Split_Val]) #lesser values form left sub-tree 49 | Right_Tree=self.build_tree(data[data[:,feature]>Split_Val]) #larger values form right sub-tree 50 | 51 | root = np.array([[feature,Split_Val,1,Left_Tree.shape[0]+1]]) 52 | temp = np.append(root,Left_Tree,axis=0) 53 | return np.append(temp,Right_Tree,axis=0) 54 | 55 | def query(self,points): 56 | """ 57 | @summary: Estimate a set of test points given the model we built. 58 | @param points: should be a numpy array with each row corresponding to a specific query. 59 | @returns the estimated values according to the saved model. 60 | """ 61 | #return (self.model_coefs[:-1] * points).sum(axis = 1) + self.model_coefs[-1] 62 | 63 | ans=[] 64 | row_count=points.shape[0] #[0] returns rows, [1] returns columns 65 | for row in range(0,row_count): 66 | value=self.query_tree(points[row,:]) #pass the current row to query_tree() to determine corresponding value 67 | ans.append(float(value)) 68 | return ans 69 | 70 | def query_tree(self, my_tuple): 71 | row=0 72 | 73 | #if not a leaf node 74 | while(self.tree[row,0]!='Leaf'): 75 | feature=self.tree[row,0] 76 | Split_Val=self.tree[row,1] 77 | 78 | if my_tuple[int(float(feature))]<=float(Split_Val): 79 | row=row+int(float(self.tree[row,2])) #Left_Tree 80 | else: 81 | row=row+int(float(self.tree[row,3])) #Right_Tree 82 | 83 | #if a leaf node 84 | return self.tree[row,1] 85 | 86 | if __name__=="__main__": 87 | print "the secret clue is 'zzyzx'" 88 | 89 | -------------------------------------------------------------------------------- /Project 5/report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anu003/CS7646-Machine-Learning-for-Trading/921a69fd1dc6135624f4560b4dcce80111df78ca/Project 5/report.pdf -------------------------------------------------------------------------------- /Project 6/QLearner.py: -------------------------------------------------------------------------------- 1 | """ 2 | Template for implementing QLearner (c) 2015 Tucker Balch 3 | """ 4 | 5 | import numpy as np 6 | import random as rand 7 | 8 | class QLearner(object): 9 | 10 | def author(self): 11 | return 'nmenon34' 12 | 13 | def __init__(self, \ 14 | num_states=100, \ 15 | num_actions = 4, \ 16 | alpha = 0.2, \ 17 | gamma = 0.9, \ 18 | rar = 0.5, \ 19 | radr = 0.99, \ 20 | dyna = 0, \ 21 | verbose = False): 22 | 23 | self.num_states = num_states 24 | self.num_actions = num_actions 25 | self.alpha = alpha 26 | self.gamma = gamma 27 | self.rar = rar 28 | self.radr = radr 29 | self.dyna = dyna 30 | self.verbose = verbose 31 | self.s = 0 32 | self.a = 0 33 | self.exp_tuple = [] 34 | self.Q = np.random.uniform(-1.0, 1.0, [num_states,num_actions]) 35 | 36 | def querysetstate(self, s): 37 | """ 38 | @summary: Update the state without updating the Q-table 39 | @param s: The new state 40 | @returns: The selected action 41 | """ 42 | self.s = s 43 | self.a = action = np.argmax(self.Q[s]) 44 | if self.verbose: print "s =", s,"a =",action 45 | return action 46 | 47 | def query(self,s_prime,r): 48 | """ 49 | @summary: Update the Q table and return an action 50 | @param s_prime: The new state 51 | @param r: The new reward 52 | @returns: The selected action 53 | """ 54 | 55 | #update Q-table 56 | self.update_Q(self.s, self.a, s_prime, r) 57 | 58 | #update experience tuple 59 | self.exp_tuple.append((self.s, self.a, s_prime, r)) 60 | 61 | #dyna 62 | if self.dyna>0: 63 | self.execute_dyna() 64 | 65 | prob = rand.uniform(0.0, 1.0) 66 | if prob < self.rar: 67 | action = rand.randint(0, self.num_actions - 1) 68 | else: 69 | action = np.argmax(self.Q[s_prime]) 70 | self.rar *= self.radr 71 | if self.verbose: print "s =", s_prime,"a =", action,"r =",r 72 | self.s = s_prime 73 | self.a = action 74 | return action 75 | 76 | def update_Q(self, s, a, s_prime, r): 77 | self.Q[s, a] = (1-self.alpha)*self.Q[s,a]+self.alpha*(r+self.gamma*self.Q[s_prime, np.argmax(self.Q[s_prime])]) 78 | 79 | def execute_dyna(self): 80 | exp_tuple_len = len(self.exp_tuple) 81 | random_tuple = np.random.randint(exp_tuple_len, size=self.dyna) 82 | for i in range(0, self.dyna): 83 | temp_tuple = self.exp_tuple[random_tuple[i]] 84 | rand_s = temp_tuple[0] 85 | rand_a = temp_tuple[1] 86 | rand_s_prime = temp_tuple[2] 87 | r = temp_tuple[3] 88 | self.update_Q(rand_s, rand_a, rand_s_prime, r) 89 | 90 | if __name__=="__main__": 91 | print "Remember Q from Star Trek? Well, this isn't him" 92 | 93 | -------------------------------------------------------------------------------- /Project 7/BestPossibleStrategy.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import datetime as dt 4 | import util as ut 5 | import matplotlib.pyplot as plt 6 | from marketsimcode import * 7 | 8 | def testPolicy(symbol = "JPM", sd=dt.datetime(2008,1,1), ed=dt.datetime(2009,12,31), sv = 100000): 9 | dates = pd.date_range(sd,ed) 10 | df_prices = ut.get_data([symbol], dates) 11 | prices=df_prices[symbol] 12 | prices=prices/prices[0] 13 | 14 | df1=pd.DataFrame() 15 | df2=pd.DataFrame() 16 | df1['ORDER'] = prices < prices.shift(-1) 17 | 18 | df1['ORDER'].replace(True, 'BUY', inplace=True) 19 | df1['ORDER'].replace(False, 'SELL', inplace=True) 20 | 21 | df2['ORDER'] = df1['ORDER'].append( 22 | df1['ORDER'].shift(1).replace('BUY', 'TMP').replace('SELL', 'BUY').replace('TMP', 'SELL').dropna()) 23 | 24 | df2['SYMBOL'] = 'JPM' 25 | df2['SHARES'] = 1000 26 | 27 | df2.sort_index(inplace=True) 28 | 29 | return df2 30 | 31 | def author(): 32 | return 'nmenon34' 33 | 34 | def test_code(): 35 | dev_sd=dt.datetime(2008,1,1) 36 | dev_ed=dt.datetime(2009,12,31) 37 | test_sd=dt.datetime(2010,1,1) 38 | test_ed=dt.datetime(2011,12,31) 39 | 40 | symbol='JPM' 41 | dates = pd.date_range(dev_sd, dev_ed) 42 | prices_all = ut.get_data([symbol], dates) 43 | 44 | prices=prices_all[symbol] 45 | prices=prices/prices[0] 46 | 47 | sv = 100000 48 | 49 | df3=testPolicy(symbol = "JPM", sd=dev_sd, ed=dev_ed, sv = 100000) 50 | port_vals = compute_portvals(df3, sv, 0, 0) 51 | #port_vals 52 | 53 | df3=pd.DataFrame(index=prices.index, columns=['ORDER','SYMBOL','SHARES']) 54 | df3['ORDER'] = 'BUY' 55 | df3['SYMBOL'] = 'JPM' 56 | df3['SHARES'] = 1000 57 | df4 = df3[:1] 58 | #df4 59 | df5 = df3.copy().tail(1) 60 | df5['ORDER'] = 'BUY' 61 | df5['SYMBOL'] = 'JPM' 62 | df5['SHARES'] = 0 63 | df4 = df4.append(df5) 64 | #df4 65 | bench_vals = compute_portvals(df4, sv, 0, 0) 66 | #bench_vals 67 | 68 | plt.figure(figsize=(20,7)) 69 | plt.gca().set_color_cycle(['black', 'blue']) 70 | port, = plt.plot(port_vals) 71 | bench, = plt.plot(bench_vals) 72 | plt.legend([port, bench], ['Portfolio', 'Benchmark']) 73 | plt.title("Portfolio vs Benchmark") 74 | plt.show() 75 | 76 | if __name__ == "__main__": 77 | test_code() 78 | -------------------------------------------------------------------------------- /Project 7/ManualStrategy.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import datetime as dt 4 | import os 5 | from util import get_data, plot_data 6 | import matplotlib.pyplot as plt 7 | #import marketsimcode as ms 8 | import marketsimcode as mm 9 | from BestPossibleStrategy import * 10 | from indicators import * 11 | 12 | def author(): 13 | return 'nmenon34' 14 | 15 | def testPolicy(symbol = ['JPM'], sd=dt.datetime(2010,1,1), ed=dt.datetime(2011,12,31), sv = 100000): 16 | orders = [] 17 | lookback = 10 18 | 19 | holdings={sym:0 for sym in symbol} 20 | 21 | dates = pd.date_range(sd,ed) 22 | prices_1 = get_data(['JPM'],dates) 23 | price = prices_1['JPM'] 24 | #prices = prices/prices[0] 25 | 26 | 27 | df_i = get_indicators(price) 28 | 29 | sma = df_i['SMA'] 30 | 31 | 32 | bbp = df_i['bb_value'] 33 | vol = df_i['volatility'] 34 | moment = df_i['momentum'] 35 | cci = df_i['CCI'] 36 | 37 | 38 | sym = 'JPM' 39 | orders.append([sd,'JPM','HOLD',0]) 40 | for day in range(lookback+1,df_i.shape[0]): 41 | 42 | if (sma.ix[day]<0.5) and (bbp.ix[day]<0.9) and (moment.ix[day]<0): 43 | 44 | if holdings[sym]<1000: 45 | holdings[sym] += 1000 46 | orders.append([price.index[day].date(),sym,'BUY',1000]) 47 | 48 | 49 | elif (sma.ix[day]>2) and (bbp.ix[day]>1) and (moment.ix[day]<0): 50 | 51 | if holdings[sym]>0: 52 | holdings[sym] -= 2000 53 | orders.append([price.index[day].date(),sym,'SELL',2000]) 54 | 55 | elif (holdings[sym]<=0) and (holdings[sym] >= -1000): 56 | holdings[sym] -= 1000 57 | orders.append([price.index[day].date(),sym,'SELL',1000]) 58 | 59 | elif (sma.ix[day]>1) and (sma.ix[day-1]<1) and (holdings[sym]>0): 60 | holdings[sym]=0 61 | orders.append([price.index[day].date(),sym,'SELL',1000]) 62 | 63 | elif (sma.ix[day]<=1) and (sma.ix[day-1]>1) and (holdings[sym]<0): 64 | holdings[sym]=0 65 | orders.append([price.index[day].date(),sym,'BUY',1000]) 66 | 67 | orders.append([ed,sym,'HOLD',0]) 68 | 69 | 70 | 71 | res=pd.DataFrame(orders) 72 | res.columns=['Date','Symbol','Order','Shares'] 73 | #res['Date'] = pd.to_datetime(res['Date']) 74 | #res['Date'] = res['Date'].dt.date 75 | #res = res.set_index('Date') 76 | 77 | 78 | #print res 79 | p = compute_portvals(res) 80 | my_colors = ['black', 'blue'] 81 | start_val = 100000 82 | ben = benchmark_trades('JPM') 83 | p3 = mm.compute_portvals(ben,start_val) 84 | 85 | plt.figure(figsize=(20,10)) 86 | plt.gca().set_color_cycle(['black','blue']) 87 | plt.legend(loc="upper left") 88 | p = p/p[0] 89 | p3 = p3/p3[0] 90 | pp, = plt.plot(p) 91 | pb, = plt.plot(p3) 92 | plt.legend([pp,pb],['Manual','Benchmark']) 93 | 94 | plt.xlabel('Dates') 95 | plt.ylabel('Prices(normalized)') 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | plt.show() 104 | 105 | 106 | 107 | 108 | #print port 109 | return port 110 | 111 | 112 | 113 | def compute_portvals(orderss, start_val = 100000, commission=0.00, impact=0.00): 114 | 115 | syms=orderss.Symbol.unique() 116 | 117 | orderss = orderss.sort_values(['Date']) 118 | 119 | sd=orderss['Date'].iloc[0] 120 | 121 | ed=orderss['Date'].iloc[-1] 122 | 123 | pricess=get_data(list(syms),pd.date_range(sd, ed)) 124 | #FWD FILL 125 | 126 | pricess.fillna(method='ffill',inplace=True) 127 | pricess.fillna(method='bfill',inplace=True) 128 | pricess['Cash']= 1.0 129 | 130 | #COPY 131 | trades=pricess.copy() 132 | trades[:]=0.0 133 | 134 | for index,order in orderss.iterrows(): 135 | action=order['Order'] 136 | sym=order['Symbol'] 137 | date=order['Date'] 138 | if action=='SELL': 139 | trades[sym].loc[date] += -1*order['Shares'] 140 | trades['Cash'].loc[date] += (order['Shares']*(pricess[sym].loc[date]-(impact*pricess[sym].loc[date])) - commission) 141 | elif action=='BUY': 142 | trades[sym].loc[date] += order['Shares'] 143 | trades['Cash'].loc[date] += (-1*order['Shares']*(pricess[sym].loc[date]+(impact*pricess[sym].loc[date])) - commission) 144 | 145 | 146 | holding=trades.copy() 147 | 148 | holding['Cash'].iloc[0] += start_val 149 | 150 | 151 | for i in range(1,len(holding['Cash'])): 152 | holding.iloc[i] += holding.iloc[i-1] 153 | 154 | df_value = holding.copy() 155 | df_value = df_value*pricess 156 | 157 | portvals = df_value.sum(axis=1) 158 | 159 | 160 | sf = 252 161 | rfr = 0 162 | cum_ret = (portvals[-1] / portvals[0]) - 1 163 | dr = (portvals / portvals.shift(1)) - 1 164 | # 165 | avg_daily_ret = dr.mean() 166 | std_daily_ret = dr.std() 167 | # 168 | sharpe_ratio = np.sqrt(sf) * ((dr.subtract(rfr)).mean() / (dr.std())) 169 | 170 | 171 | print 172 | print "Cumulative Return of Fund: {}".format(cum_ret) 173 | 174 | print 175 | print "Standard Deviation of Fund: {}".format(std_daily_ret) 176 | 177 | print 178 | print "Average Daily Return of Fund: {}".format(avg_daily_ret) 179 | 180 | print 181 | #print "Final Portfolio Value: {}".format(portvals[-1]) 182 | 183 | 184 | return portvals 185 | 186 | 187 | 188 | 189 | 190 | 191 | def main(): 192 | testPolicy() 193 | 194 | 195 | pass 196 | 197 | 198 | if __name__ == "__main__": main() 199 | -------------------------------------------------------------------------------- /Project 7/indicators.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import datetime as dt 4 | import util as ut 5 | import matplotlib.pyplot as plt 6 | 7 | def get_indicators(df_prices, sym): 8 | prices=df_prices[sym] 9 | prices=prices/prices[0] 10 | 11 | df_indicators = pd.DataFrame(index=prices.index) 12 | 13 | #1] SMA 14 | df_indicators['price'] = prices 15 | df_indicators['rolling mean'] = prices.rolling(window=10,center=False).mean() 16 | 17 | #2] bollinger bands 18 | rm = prices.rolling(window=10,center=False).mean() 19 | sd = prices.rolling(window=10,center=False).std() 20 | upband = rm + (2*sd) 21 | dnband = rm - (2*sd) 22 | df_indicators['upper band'] = upband 23 | df_indicators['lower band'] = dnband 24 | #BB value 25 | bb_value = (prices - rm)/(25 * sd) 26 | df_indicators['bb value'] = bb_value 27 | 28 | #3] Commodity channel index 29 | cci = (prices-rm)/(2.5 * prices.std()) 30 | df_indicators['Commodity Channel Index'] = cci 31 | 32 | #4] Volatility 33 | volatility = prices.rolling(window=7,center=False).std() 34 | df_indicators['Volatility'] = volatility*3.5 35 | 36 | return df_indicators.dropna() 37 | 38 | def author(): 39 | return 'nmenon34' 40 | 41 | def test_code(): 42 | dev_sd=dt.datetime(2008,1,1) 43 | dev_ed=dt.datetime(2009,12,31) 44 | test_sd=dt.datetime(2010,1,1) 45 | test_ed=dt.datetime(2011,12,31) 46 | 47 | symbol='JPM' 48 | 49 | dates = pd.date_range(dev_sd, dev_ed) 50 | prices_all = ut.get_data([symbol], dates) 51 | 52 | #SMA 53 | get_indicators(prices_all, symbol)[['price', 'rolling mean']].plot(figsize=(20, 7)) 54 | plt.show() 55 | 56 | #Bollinger 57 | get_indicators(prices_all, symbol)[['upper band', 'lower band', 'bb value', 'rolling mean']].plot(figsize=(20, 7)) 58 | plt.show() 59 | 60 | #CCI 61 | get_indicators(prices_all, symbol)[['Commodity Channel Index']].plot(figsize=(20, 7)) 62 | plt.show() 63 | 64 | #Volatility 65 | get_indicators(prices_all, symbol)[['Volatility']].plot(figsize=(20, 7)) 66 | plt.show() 67 | 68 | plt.axhline(y=0, linestyle=':') 69 | plt.axhline(y=0.04, linestyle='--') 70 | plt.axhline(y=-0.04, linestyle='--') 71 | 72 | if __name__ == "__main__": 73 | test_code() -------------------------------------------------------------------------------- /Project 7/marketsimcode.py: -------------------------------------------------------------------------------- 1 | """MC2-P1: Market simulator.""" 2 | 3 | import pandas as pd 4 | import numpy as np 5 | import datetime as dt 6 | import os 7 | from util import get_data, plot_data 8 | 9 | 10 | def author(): 11 | return 'nmenon34' 12 | 13 | def compute_portvals(order, start_val = 1000000, commission=0.00, impact=0.00): 14 | # this is the function the autograder will call to test your code 15 | # NOTE: orders_file may be a string, or it may be a file object. Your 16 | # code should work correctly with either input 17 | # TODO: Your code here 18 | 19 | print 20 | #order = pd.read_csv(orders_file, index_col='Date',parse_dates=True, na_values=['nan']) 21 | order = order.sort_index() 22 | Symbol_list = list(set(order['Symbol'].tolist())) 23 | symbol_list = order['Symbol'].tolist() 24 | symbols = list(set(symbol_list)) 25 | #print order 26 | 27 | #order['Date'] = pd.to_datetime(order['Date']).dt.date 28 | #order = order.set_index('Date') 29 | #order.index = pd.to_datetime(order.index) 30 | #order['Date'] = order['Date'].dt.date 31 | #order.index = (order.index).normalize() 32 | 33 | start_date = order.index[0] 34 | print start_date 35 | end_date = order.index[-1] 36 | dates_range = pd.date_range(start_date, end_date) 37 | 38 | sym = list(set(order['Symbol'].values)) 39 | prices = get_data(sym, dates_range) 40 | prices['Cash'] = 1.00 41 | 42 | 43 | trade = pd.DataFrame(index = prices.index, columns = prices.columns) 44 | trade = trade.fillna(0) 45 | trade['Cash'] = 0.00 46 | 47 | holdings = pd.DataFrame(index=prices.index, columns=prices.columns) 48 | holdings = holdings.fillna(0) 49 | holdings['Cash'] = 0.00 50 | 51 | 52 | value = 0.0 53 | share_amt = 0.0 54 | no_of_shares = 0 55 | hold_value = start_val 56 | share_gain_loss = 0 57 | 58 | action = 0 59 | 60 | for index, o in order.iterrows(): 61 | #print index 62 | #index = pd.DatetimeIndex(index).normalize() 63 | 64 | if o['Order'] == 'BUY': 65 | action = -1 66 | elif o['Order'] == 'SELL': 67 | action = 1 68 | 69 | no_of_shares = o['Shares'] 70 | symbol = o['Symbol'] 71 | date = index 72 | 73 | 74 | 75 | 76 | share_amt = prices.ix[date,symbol] 77 | 78 | value = action * no_of_shares * share_amt 79 | share_gain_loss = action * no_of_shares * -1 80 | 81 | trade.ix[index,symbol] += share_gain_loss 82 | trade.ix[index,'Cash'] += value 83 | trade.ix[index,'Cash'] -= commission 84 | imp = impact * no_of_shares * share_amt 85 | trade.ix[index,'Cash'] -=imp 86 | 87 | holdings.ix[0,:] = trade.ix[0,:] 88 | holdings['Cash'][0] += start_val 89 | 90 | df_values = pd.DataFrame(index=prices.index, columns=prices.columns) 91 | df_values = df_values.fillna(0) 92 | df_values['Cash'] = 1.00 93 | 94 | 95 | 96 | for i in range(1,len(prices.index)): 97 | holdings.ix[i,:] = holdings.ix[i-1,:] + trade.ix[i,:] 98 | 99 | 100 | 101 | 102 | 103 | 104 | df_values = prices * holdings 105 | 106 | 107 | portvals = df_values.sum(axis=1) 108 | 109 | 110 | sf = 252 111 | rfr = 0 112 | cum_ret = (portvals[-1] / portvals[0]) - 1 113 | dr = (portvals / portvals.shift(1)) - 1 114 | # 115 | avg_daily_ret = dr.mean() 116 | std_daily_ret = dr.std() 117 | # 118 | sharpe_ratio = np.sqrt(sf) * ((dr.subtract(rfr)).mean() / (dr.std())) 119 | 120 | #print "Sharpe Ratio of Fund: {}".format(sharpe_ratio) 121 | 122 | print 123 | print "Cumulative Return of Fund: {}".format(cum_ret) 124 | 125 | print 126 | print "Standard Deviation of Fund: {}".format(std_daily_ret) 127 | 128 | print 129 | print "Average Daily Return of Fund: {}".format(avg_daily_ret) 130 | 131 | print 132 | print "Final Portfolio Value: {}".format(portvals[-1]) 133 | 134 | return portvals 135 | 136 | def test_code(): 137 | # this is a helper function you can use to test your code 138 | # note that during autograding his function will not be called. 139 | # Define input parameters 140 | 141 | of = "./orders/orders2.csv" 142 | sv = 1000000 143 | 144 | # Process orders 145 | portvals = compute_portvals(orders_file = of, start_val = sv) 146 | if isinstance(portvals, pd.DataFrame): 147 | portvals = portvals[portvals.columns[0]] # just get the first column 148 | else: 149 | "warning, code did not return a DataFrame" 150 | 151 | # Get portfolio stats 152 | # Here we just fake the data. you should use your code from previous assignments. 153 | start_date = dt.datetime(2008,1,1) 154 | end_date = dt.datetime(2008,6,1) 155 | 156 | cum_ret, avg_daily_ret, std_daily_ret, sharpe_ratio = [0.2, 0.01, 0.02, 1.5] 157 | cum_ret_SPY, avg_daily_ret_SPY, std_daily_ret_SPY, sharpe_ratio_SPY = [0.2, 0.01, 0.02, 1.5] 158 | 159 | # sf = 252 160 | # rfr = 0 161 | # cum_ret = (portvals[-1] / portvals[0]) - 1 162 | # dr = (portvals / portvals.shift(1)) - 1 163 | # 164 | # avg_daily_ret = dr.mean() 165 | # std_daily_ret = dr.std() 166 | # 167 | # sharpe_ratio = np.sqrt(sf) * ((dr.subtract(rfr)).mean() / (dr.std())) 168 | 169 | 170 | 171 | 172 | 173 | 174 | # Compare portfolio against $SPX 175 | print "Date Range: {} to {}".format(start_date, end_date) 176 | print 177 | #print "Sharpe Ratio of Fund: {}".format(sharpe_ratio) 178 | print "Sharpe Ratio of SPY : {}".format(sharpe_ratio_SPY) 179 | print 180 | print "Cumulative Return of Fund: {}".format(cum_ret) 181 | print "Cumulative Return of SPY : {}".format(cum_ret_SPY) 182 | print 183 | print "Standard Deviation of Fund: {}".format(std_daily_ret) 184 | print "Standard Deviation of SPY : {}".format(std_daily_ret_SPY) 185 | print 186 | print "Average Daily Return of Fund: {}".format(avg_daily_ret) 187 | print "Average Daily Return of SPY : {}".format(avg_daily_ret_SPY) 188 | print 189 | #print "Final Portfolio Value: {}".format(portvals[-1]) 190 | 191 | if __name__ == "__main__": 192 | test_code() 193 | -------------------------------------------------------------------------------- /Project 7/readme.txt: -------------------------------------------------------------------------------- 1 | #README 2 | 3 | The submissions include: 4 | 1. indicators.py 5 | 2. marketsimcode.py 6 | 3. ManualStrategy.py 7 | 4. BestPossibleStrategy.py 8 | 9 | Part 1 of the project can be run by executing file-1 by giving a call to get_indicators(). 10 | Part 2 of the project can be run by executing files-2 and 4. In file-4, API has been implemented as necessary. 11 | Part 3 of the project can be run by executing files-2 and 3. In file-3, API has been implemented as necessary. -------------------------------------------------------------------------------- /Project 7/report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anu003/CS7646-Machine-Learning-for-Trading/921a69fd1dc6135624f4560b4dcce80111df78ca/Project 7/report.pdf -------------------------------------------------------------------------------- /Project 8/BagLearner.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementing a Bag learner 3 | 4 | @Name : Nidhi Nirmal Menon 5 | @UserID : nmenon34 6 | """ 7 | 8 | import numpy as np 9 | from random import randint 10 | from scipy import stats 11 | 12 | class BagLearner(object): 13 | 14 | def __init__(self, learner, kwargs = {"leaf_size":1}, bags = 20, boost = False, verbose = False): 15 | 16 | self.learner = learner 17 | self.learner_list = [] 18 | for i in range(0,bags): 19 | self.learner_list.append(learner(**kwargs)) 20 | self.bags = bags 21 | pass 22 | 23 | def author(self): 24 | """ 25 | @summary Returning the author user ID 26 | """ 27 | return 'nmenon34' 28 | 29 | 30 | def addEvidence(self, dataX, dataY): 31 | """ 32 | @summary Adding the training data 33 | """ 34 | train_rows = int(0.6* dataX.shape[0]) 35 | for learner in self.learner_list: 36 | newX =[] 37 | newY =[] 38 | for i in range(0,train_rows): 39 | index = randint(0,dataX.shape[0]-1) 40 | tempX = dataX[index,:] 41 | tempY = dataY[index] 42 | newX.append(tempX) 43 | newY.append(tempY) 44 | 45 | learner.addEvidence(np.array(newX),np.array(newY)) 46 | 47 | 48 | def query(self, points): 49 | """ 50 | @summary: Estimate a set of test points given the model we built 51 | """ 52 | temp=[] 53 | 54 | for learner in self.learner_list: 55 | temp.append(learner.query(points)) 56 | 57 | temp_array = np.array(temp) 58 | res = stats.mode(temp_array) #changing from regression to classification! 59 | #res = np.mean(temp_array,axis=0) 60 | #return res.tolist() 61 | return res[0][0] 62 | 63 | 64 | if __name__ == "__main__": 65 | print "Bag learner" 66 | -------------------------------------------------------------------------------- /Project 8/ManualStrategy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementing Manual Rule Based Strategy using all the indicators from indicators.py 3 | 4 | @Name : Nidhi Nirmal Menon 5 | @UserID : nmenon34 6 | 7 | """ 8 | 9 | 10 | import pandas as pd 11 | import numpy as np 12 | import datetime as dt 13 | from util import get_data, plot_data 14 | from marketsimcode import compute_portvals 15 | from indicators import * 16 | import matplotlib.pyplot as plt 17 | 18 | def author(self): 19 | """ 20 | @summary Returning the author user ID 21 | """ 22 | return 'nmenon34' 23 | 24 | def testPolicy(symbol,sd,ed,sv): 25 | dates = pd.date_range(sd, ed) 26 | prices_all = get_data(symbol, dates) 27 | prices = prices_all[symbol] 28 | flag = 0 # flag=1: have shares, flag=0 no shares, flag=-1 shorted 29 | lookback=20 30 | sym='JPM' 31 | sma = getSMA(prices,lookback,symbol) 32 | bollinger = getBollinger(prices,symbol,lookback,sma) 33 | psma = priceBySMA(prices, lookback, sma, symbol) 34 | volatility = getVolatility(prices,lookback,symbol) 35 | trades = pd.DataFrame(columns=['Date', 'Symbol', 'Order', 'Shares']) 36 | index=0 37 | buydate =[] 38 | selldate=[] 39 | 40 | for i in range(0, prices.shape[0]- 1): 41 | if flag == 0: 42 | if bollinger.ix[i,sym] < 0.2 or psma.ix[i,sym] < 0.6 or volatility.ix[i,sym] < -0.1: 43 | trades.loc[index] = [prices.index[i].strftime('%Y-%m-%d'),'JPM','BUY',1000] 44 | flag = 1 45 | index +=1 46 | buydate.append(prices.index[i].date()) 47 | elif bollinger.ix[i,sym] > 0.8 or psma.ix[i,sym] > 1.1 or volatility.ix[i,sym] > 0.1: 48 | trades.loc[index] = [prices.index[i].strftime('%Y-%m-%d'),'JPM','SELL',1000] 49 | flag=-1 50 | index+=1 51 | selldate.append(prices.index[i].date()) 52 | elif flag == -1: 53 | if bollinger.ix[i,sym] < 0.1 or psma.ix[i,sym] < 0.65 or volatility.ix[i,sym] < -0.2: 54 | trades.loc[index] = [prices.index[i].strftime('%Y-%m-%d'),'JPM','BUY',2000] 55 | flag = 1 56 | index +=1 57 | buydate.append(prices.index[i].date()) 58 | elif bollinger.ix[i,sym] < 0.2 or psma.ix[i,sym] < 0.6 or volatility.ix[i,sym] < -0.1: 59 | trades.loc[index] = [prices.index[i].strftime('%Y-%m-%d'),'JPM','BUY',1000] 60 | flag = 0 61 | index +=1 62 | elif flag == 1: 63 | if bollinger.ix[i,sym] > 0.9 or psma.ix[i,sym] > 1.5 or volatility.ix[i,sym] > 0.2: 64 | trades.loc[index] = [prices.index[i].strftime('%Y-%m-%d'),'JPM','SELL',2000] 65 | flag = -1 66 | index +=1 67 | selldate.append(prices.index[i].date()) 68 | elif bollinger.ix[i,sym] > 0.8 or psma.ix[i,sym] > 1.1 or volatility.ix[i,sym]>0.1: 69 | trades.loc[index] = [prices.index[i].strftime('%Y-%m-%d'),'JPM','SELL',1000] 70 | flag = 0 71 | index +=1 72 | 73 | if flag==1: 74 | trades.loc[index] = [prices.index[i].strftime('%Y-%m-%d'),'JPM','SELL',1000] 75 | if flag == -1: 76 | trades.loc[index] = [prices.index[i].strftime('%Y-%m-%d'),'JPM','BUY',1000] 77 | 78 | 79 | return trades, buydate, selldate 80 | 81 | 82 | if __name__ == "__main__": 83 | sd = dt.datetime(2010,1,1) 84 | ed = dt.datetime(2011, 12, 31) 85 | symbol = ['JPM'] 86 | dates = dates = pd.date_range(sd, ed) 87 | trades,buydate,selldate = testPolicy(symbol=['JPM'], sd=sd ,ed=ed, sv=100000) 88 | prices_all = get_data(symbol, dates) 89 | 90 | 91 | bench = pd.DataFrame(columns=['Date', 'Symbol', 'Order', 'Shares']) 92 | bench.loc[0] = [prices_all.index[0].strftime('%Y-%m-%d'),'JPM','BUY',1000] 93 | bench.loc[1] = [prices_all.index[-1].strftime('%Y-%m-%d'),'JPM','SELL',1000] 94 | 95 | # Manual Strategy 96 | ms_port_val = compute_portvals(trades,100000,9.95,0.005) 97 | 98 | # Benchmark 99 | bench_port_val = compute_portvals(bench,100000,9.95,0.005) 100 | 101 | # Printing Portfolio statistics 102 | daily_returns = (ms_port_val / ms_port_val.shift(1)) - 1 103 | daily_returns = daily_returns[1:] 104 | cr = (ms_port_val.iloc[-1] / ms_port_val.iloc[0]) - 1 105 | adr = daily_returns.mean() 106 | sddr = daily_returns.std() 107 | 108 | print "Manual Strategy Stats" 109 | print "CR " + str(cr) 110 | print "Avg of daily returns " + str(adr) 111 | print "Std deviation of daily returns " + str(sddr) 112 | 113 | 114 | # Printing Benchmark statistics 115 | bench_dr = (bench_port_val / bench_port_val.shift(1)) - 1 116 | bench_dr = bench_dr[1:] 117 | cr = (bench_port_val.iloc[-1] / bench_port_val.iloc[0]) - 1 118 | adr = bench_dr.mean() 119 | sddr = bench_dr.std() 120 | 121 | print "\nBenchmark Stats" 122 | print "CR " + str(cr) 123 | print "Avg of daily returns " + str(adr) 124 | print "Std deviation of daily returns " + str(sddr) 125 | 126 | # Plotting charts 127 | ms_port_val = ms_port_val / ms_port_val[0] 128 | bench_port_val = bench_port_val / bench_port_val[0] 129 | ax = ms_port_val.plot(fontsize=12, color="black", label="Manual Strategy") 130 | bench_port_val.plot(ax=ax, color="blue", label='Benchmark') 131 | for date in buydate: 132 | ax.axvline(date,color="green") 133 | for date in selldate: 134 | ax.axvline(date,color="red") 135 | plt.title(" Manual Strategy - out sample ") 136 | ax.set_xlabel("Date") 137 | ax.set_ylabel("Portfolio Value") 138 | plt.legend() 139 | plt.show() 140 | -------------------------------------------------------------------------------- /Project 8/RTLearner.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementing a Random Tree (RT) learner 3 | 4 | @Name : Nidhi Nirmal Menon 5 | @UserID : nmenon34 6 | """ 7 | 8 | import pandas as pd 9 | import numpy as np 10 | 11 | class RTLearner(object): 12 | def __init__(self,leaf_size,verbose=False): 13 | self.leaf_size=leaf_size 14 | self.verbose=verbose 15 | self.learner=[] 16 | 17 | def author(self): 18 | """ 19 | @summary Returning the author user ID 20 | """ 21 | return 'nmenon34' 22 | 23 | def build_tree(self, data): 24 | tree=np.array([]) 25 | flag=0 26 | if(data.shape[0]<=self.leaf_size): 27 | tree = np.array([['leaf', data[0][-1],'-1','-1']]) 28 | return tree 29 | 30 | X_attr = int(np.random.randint(data.shape[1]-1)) 31 | 32 | #if values of Xattribute are the same 33 | if(np.all(data[:,X_attr] == data[0][X_attr])): 34 | return np.array([['leaf', np.mean(data[:, -1]), '-1', '-1']]) 35 | 36 | 37 | data = data[np.argsort(data[:, X_attr])] 38 | splitVal = np.median(data[0:, X_attr]) 39 | if max(data[:,X_attr])==splitVal: 40 | return np.array([['leaf', np.mean(data[:, -1]), '-1', '-1']]) 41 | 42 | 43 | #building left and right sub-trees 44 | leftTree=self.build_tree(data[data[:,X_attr]<=splitVal]) 45 | rightTree=self.build_tree(data[data[:,X_attr]>splitVal]) 46 | root=[X_attr,splitVal, 1, leftTree.shape[0]+1] 47 | tree= np.vstack((root,leftTree,rightTree)) 48 | return tree 49 | 50 | 51 | def addEvidence(self, Xtrain, Ytrain): 52 | data=[] 53 | tree=[] 54 | data=np.concatenate(([Xtrain,Ytrain[:,None]]),axis=1) 55 | tree=self.build_tree(data) 56 | self.learner = np.array(tree) 57 | 58 | 59 | def query(self, trainX): 60 | row=0 61 | predY=np.array([]) 62 | for data in trainX: 63 | while(self.learner[row][0]!='leaf'): 64 | X_attr=self.learner[row][0] 65 | X_attr = int(float(X_attr)) 66 | if(float(data[X_attr]) <= float(self.learner[row][1])): 67 | row=row+int(float(self.learner[row][2])) 68 | else: 69 | row=row+int(float(self.learner[row][3])) 70 | row=int(float(row)) 71 | if(self.learner[row][0]=='leaf'): 72 | predY=np.append(predY, float(self.learner[row][1])) 73 | row=0 74 | return predY 75 | -------------------------------------------------------------------------------- /Project 8/StrategyLearner.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementing a Strategy Learner 3 | 4 | @Name : Nidhi Nirmal Menon 5 | @UserID : nmenon34 6 | """ 7 | 8 | import datetime as dt 9 | import pandas as pd 10 | import util as ut 11 | import random 12 | import RTLearner as rt 13 | import BagLearner as bl 14 | from indicators import * 15 | 16 | class StrategyLearner(object): 17 | 18 | def author(self): 19 | """ 20 | @summary Returning the author user ID 21 | """ 22 | return 'nmenon34' 23 | 24 | # constructor 25 | def __init__(self, verbose = False, impact=0.0): 26 | self.verbose = verbose 27 | self.impact = impact 28 | self.learner = bl.BagLearner(learner = rt.RTLearner, kwargs = {"leaf_size":5}, bags = 20, boost = False, verbose = False) 29 | self.impact = impact 30 | 31 | # this method should create a QLearner, and train it for trading 32 | def addEvidence(self, symbol = "IBM", \ 33 | sd=dt.datetime(2008,1,1), \ 34 | ed=dt.datetime(2009,1,1), \ 35 | sv = 10000): 36 | 37 | # example usage of the old backward compatible util function 38 | syms=[symbol] 39 | dates = pd.date_range(sd, ed) 40 | prices_all = ut.get_data(syms, dates) # automatically adds SPY 41 | prices = prices_all[syms] # only portfolio symbols 42 | # prices_SPY = prices_all['SPY'] # only SPY, for comparison later 43 | 44 | # Getting the technical indicators 45 | lookback = 2 46 | #Indicator no. 1 : SMA 47 | sma = getSMA(prices,lookback,syms) 48 | copysma = sma.copy() 49 | #Indicator no. 2 : Bollinger bands 50 | bba = getBollinger(prices,syms,lookback,copysma) 51 | #Indicator no. 3 : Volatility 52 | volatility = getVolatility(prices,lookback,syms) 53 | 54 | # Constructing trainX 55 | df1=sma.rename(columns={symbol:'SMA'}) 56 | df2=bba.rename(columns={symbol:'BBA'}) 57 | df3=volatility.rename(columns={symbol:'VOL'}) 58 | 59 | indicators = pd.concat((df1,df2,df3),axis=1) 60 | indicators.fillna(0,inplace=True) 61 | indicators=indicators[:-5] 62 | trainX = indicators.values 63 | 64 | # Constructing trainY 65 | trainY=[] 66 | for i in range(prices.shape[0]-5): 67 | ratio = (prices.ix[i+5,symbol]-prices.ix[i,symbol])/prices.ix[i,symbol] 68 | if ratio > (0.02 + self.impact): 69 | trainY.append(1) 70 | elif ratio < (-0.02 - self.impact): 71 | trainY.append(-1) 72 | else: 73 | trainY.append(0) 74 | trainY=np.array(trainY) 75 | 76 | # Training 77 | self.learner.addEvidence(trainX,trainY) 78 | 79 | 80 | # this method should use the existing policy and test it against new data 81 | def testPolicy(self, symbol = "IBM", \ 82 | sd=dt.datetime(2009,1,1), \ 83 | ed=dt.datetime(2010,1,1), \ 84 | sv = 10000): 85 | 86 | syms=[symbol] 87 | dates = pd.date_range(sd, ed) 88 | prices_all = ut.get_data(syms, dates) # automatically adds SPY 89 | prices = prices_all[syms] # only portfolio symbols 90 | # prices_SPY = prices_all['SPY'] # only SPY, for comparison later 91 | 92 | 93 | # Getting the technical indicators 94 | lookback = 2 95 | #Indicator no. 1 : SMA 96 | sma = getSMA(prices,lookback,syms) 97 | copysma = sma.copy() 98 | #Indicator no. 2 : Bollinger bands 99 | bba = getBollinger(prices,syms,lookback,copysma) 100 | #Indicator no. 3 : Volatility 101 | volatility = getVolatility(prices,lookback,syms) 102 | 103 | 104 | # Constructing testX 105 | df1=sma.rename(columns={symbol:'SMA'}) 106 | df2=bba.rename(columns={symbol:'BBA'}) 107 | df3=volatility.rename(columns={symbol:'VOL'}) 108 | 109 | indicators = pd.concat((df1,df2,df3),axis=1) 110 | indicators.fillna(0,inplace=True) 111 | testX = indicators.values 112 | 113 | # Querying the learner for testY 114 | testY=self.learner.query(testX) 115 | 116 | # Constructing trades DataFrame 117 | trades = prices_all[syms].copy() 118 | trades.loc[:]=0 119 | flag=0 120 | for i in range(0,prices.shape[0]-1): 121 | if flag==0: 122 | if testY[i]>0: 123 | trades.values[i,:] = 1000 124 | flag = 1 125 | elif testY[i]<0: 126 | trades.values[i,:] = -1000 127 | flag = -1 128 | 129 | elif flag==1: 130 | if testY[i]<0: 131 | trades.values[i,:]=-2000 132 | flag=-1 133 | elif testY[i]==0: 134 | trades.values[i,:]=-1000 135 | flag = 0 136 | 137 | else: 138 | if testY[i]>0: 139 | trades.values[i,:]=2000 140 | flag=1 141 | elif testY[i]==0: 142 | trades.values[i,:]=1000 143 | flag=0 144 | 145 | if flag==-1: 146 | trades.values[prices.shape[0]-1,:]=1000 147 | elif flag==1: 148 | trades.values[prices.shape[0]-1,:]=-1000 149 | 150 | return trades 151 | 152 | if __name__=="__main__": 153 | print "One does not simply think up a strategy" 154 | st = StrategyLearner() 155 | 156 | st.addEvidence(symbol="AAPL",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000) 157 | st.testPolicy(symbol="AAPL",sd=dt.datetime(2010,1,1),ed=dt.datetime(2011,12,31),sv=100000) 158 | -------------------------------------------------------------------------------- /Project 8/experiment1.py: -------------------------------------------------------------------------------- 1 | """ 2 | Experiment 1 3 | 4 | @Name : Nidhi Nirmal Menon 5 | @UserID : nmenon34 6 | """ 7 | 8 | import datetime as dt 9 | import pandas as pd 10 | import util as ut 11 | import random 12 | import numpy as np 13 | import StrategyLearner as st 14 | from ManualStrategy import testPolicy 15 | from marketsimcode import compute_portvals 16 | from util import get_data, plot_data 17 | import matplotlib.pyplot as plt 18 | 19 | def author(self): 20 | """ 21 | @summary Returning the author user ID 22 | """ 23 | return 'nmenon34' 24 | 25 | 26 | def trades_ST(prices,symbol): 27 | trades = pd.DataFrame(columns=['Date', 'Symbol', 'Order', 'Shares']) 28 | index = 0 29 | for i in range(0, prices.shape[0]): 30 | if prices.ix[i,symbol] == 2000: 31 | trades.loc[index] = [prices.index[i].strftime('%Y-%m-%d'),symbol,'BUY',2000] 32 | index = index + 1 33 | elif prices.ix[i,symbol] == 1000: 34 | trades.loc[index] = [prices.index[i].strftime('%Y-%m-%d'),symbol,'BUY',1000] 35 | index = index + 1 36 | if prices.ix[i,symbol] == -2000: 37 | trades.loc[index] = [prices.index[i].strftime('%Y-%m-%d'),symbol,'SELL',2000] 38 | index = index + 1 39 | if prices.ix[i,symbol] == -1000: 40 | trades.loc[index] = [prices.index[i].strftime('%Y-%m-%d'),symbol,'SELL',1000] 41 | index = index + 1 42 | return trades 43 | 44 | 45 | if __name__=="__main__": 46 | 47 | # setting the random seed 48 | np.random.seed(1234) 49 | 50 | # input 51 | sd = dt.datetime(2008,1,1) 52 | ed = dt.datetime(2009, 12, 31) 53 | symbol = ['JPM'] 54 | dates = dates = pd.date_range(sd, ed) 55 | prices_all = ut.get_data(symbol, dates) 56 | 57 | # Strategy Learner 58 | learner = st.StrategyLearner(verbose = False, impact=0.0) 59 | learner.addEvidence(symbol="JPM",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000) 60 | test = learner.testPolicy(symbol="JPM",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000) 61 | st_trades = trades_ST(test,'JPM') 62 | st_port_val = compute_portvals(st_trades,sd,ed,100000,0,0) 63 | #trades = learner.testPolicy(symbol="JPM",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000) 64 | #st_port_val = evalPolicy2("JPM",trades,100000,dt.datetime(2008,1,1),dt.datetime(2009,12,31),0,0) 65 | 66 | # Benchmark 67 | bench = pd.DataFrame(columns=['Date', 'Symbol', 'Order', 'Shares']) 68 | bench.loc[0] = [prices_all.index[0].strftime('%Y-%m-%d'),'JPM','BUY',1000] 69 | bench.loc[1] = [prices_all.index[-1].strftime('%Y-%m-%d'),'JPM','SELL',1000] 70 | bench_port_val = compute_portvals(bench,sd,ed,100000,0,0) 71 | 72 | # ManualStrategy 73 | trades,buydate,selldate = testPolicy(symbol=['JPM'], sd=sd ,ed=ed, sv=100000) 74 | ms_port_val = compute_portvals(trades,sd,ed,100000,0,0) 75 | 76 | # Printing Portfolio statistics 77 | daily_returns = (ms_port_val / ms_port_val.shift(1)) - 1 78 | daily_returns = daily_returns[1:] 79 | cr = (ms_port_val.iloc[-1] / ms_port_val.iloc[0]) - 1 80 | adr = daily_returns.mean() 81 | sddr = daily_returns.std() 82 | a = np.sqrt(252.0) 83 | sr = (a*(adr))/sddr 84 | 85 | ''' 86 | print "Manual Strategy Stats" 87 | print "CR " + str(cr) 88 | print "Avg of daily returns " + str(adr) 89 | print "Std deviation of daily returns " + str(sddr) 90 | print "Sharpe Ratio " + str(sr) 91 | ''' 92 | 93 | # Printing Benchmark statistics 94 | bench_dr = (bench_port_val / bench_port_val.shift(1)) - 1 95 | bench_dr = bench_dr[1:] 96 | cr = (bench_port_val.iloc[-1] / bench_port_val.iloc[0]) - 1 97 | adr = bench_dr.mean() 98 | sddr = bench_dr.std() 99 | a = np.sqrt(252.0) 100 | sr = (a*(adr))/sddr 101 | 102 | ''' 103 | print "\nBenchmark Stats" 104 | print "CR " + str(cr) 105 | print "Avg of daily returns " + str(adr) 106 | print "Std deviation of daily returns " + str(sddr) 107 | print "Sharpe Ratio " + str(sr) 108 | ''' 109 | 110 | # Printing StrategyLearner statistics 111 | st_dr = (st_port_val / st_port_val.shift(1)) - 1 112 | st_dr = st_dr[1:] 113 | cr = (st_port_val.iloc[-1] / st_port_val.iloc[0]) - 1 114 | adr = st_dr.mean() 115 | sddr = st_dr.std() 116 | a = np.sqrt(252.0) 117 | sr = (a*(adr))/sddr 118 | 119 | ''' 120 | print "\nStrategy Learner Stats" 121 | print "CR " + str(cr) 122 | print "Avg of daily returns " + str(adr) 123 | print "Std deviation of daily returns " + str(sddr) 124 | print "Sharpe Ratio " + str(sr) 125 | ''' 126 | 127 | # Plotting charts 128 | ms_port_val = ms_port_val / ms_port_val[0] 129 | bench_port_val = bench_port_val / bench_port_val[0] 130 | st_port_val = st_port_val / st_port_val[0] 131 | ax = ms_port_val.plot(fontsize=12, color="black", label="Manual Strategy") 132 | bench_port_val.plot(ax=ax, color="blue", label='Benchmark') 133 | st_port_val.plot(ax=ax, color="green", label='Strategy Learner') 134 | plt.title("Experiment 1") 135 | ax.set_xlabel("Date") 136 | ax.set_ylabel("Portfolio Value") 137 | plt.legend() 138 | plt.show() 139 | -------------------------------------------------------------------------------- /Project 8/experiment2.py: -------------------------------------------------------------------------------- 1 | """ 2 | Experiment 2 3 | 4 | @Name : Nidhi Nirmal Menon 5 | @UserID : nmenon34 6 | """ 7 | 8 | import datetime as dt 9 | import pandas as pd 10 | import util as ut 11 | import random 12 | import numpy as np 13 | import StrategyLearner as st 14 | from ManualStrategy import testPolicy 15 | from marketsimcode import compute_portvals 16 | from util import get_data, plot_data 17 | import matplotlib.pyplot as plt 18 | 19 | def author(self): 20 | """ 21 | @summary Returning the author user ID 22 | """ 23 | return 'nmenon34' 24 | 25 | def trades_ST(prices,symbol): 26 | trades = pd.DataFrame(columns=['Date', 'Symbol', 'Order', 'Shares']) 27 | index = 0 28 | for i in range(0, prices.shape[0]): 29 | if prices.ix[i,symbol] == 2000: 30 | trades.loc[index] = [prices.index[i].strftime('%Y-%m-%d'),symbol,'BUY',2000] 31 | index = index + 1 32 | elif prices.ix[i,symbol] == 1000: 33 | trades.loc[index] = [prices.index[i].strftime('%Y-%m-%d'),symbol,'BUY',1000] 34 | index = index + 1 35 | if prices.ix[i,symbol] == -2000: 36 | trades.loc[index] = [prices.index[i].strftime('%Y-%m-%d'),symbol,'SELL',2000] 37 | index = index + 1 38 | if prices.ix[i,symbol] == -1000: 39 | trades.loc[index] = [prices.index[i].strftime('%Y-%m-%d'),symbol,'SELL',1000] 40 | index = index + 1 41 | return trades 42 | 43 | 44 | if __name__=="__main__": 45 | 46 | # setting the random seed 47 | np.random.seed(1234) 48 | 49 | # input 50 | sd = dt.datetime(2008,1,1) 51 | ed = dt.datetime(2009, 12, 31) 52 | symbol = ['JPM'] 53 | dates = dates = pd.date_range(sd, ed) 54 | prices_all = ut.get_data(symbol, dates) 55 | 56 | 57 | # Strategy Learner - impact = 0.0005 58 | learner = st.StrategyLearner(verbose = False, impact=0.0005) 59 | learner.addEvidence(symbol="JPM",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000) 60 | test = learner.testPolicy(symbol="JPM",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000) 61 | st_trades = trades_ST(test,'JPM') 62 | st_port_val = compute_portvals(st_trades,sd,ed,100000,0,0.0005) 63 | 64 | 65 | # Strategy Learner - impact = 0.005 66 | learner = st.StrategyLearner(verbose = False, impact=0.005) 67 | learner.addEvidence(symbol="JPM",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000) 68 | test = learner.testPolicy(symbol="JPM",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000) 69 | st_trades = trades_ST(test,'JPM') 70 | st_port_val2 = compute_portvals(st_trades,sd,ed,100000,0,0.005) 71 | 72 | 73 | # Strategy Learner - impact = 0.05 74 | learner = st.StrategyLearner(verbose = False, impact=0.05) 75 | learner.addEvidence(symbol="JPM",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000) 76 | test = learner.testPolicy(symbol="JPM",sd=dt.datetime(2008,1,1),ed=dt.datetime(2009,12,31),sv=100000) 77 | st_trades = trades_ST(test,'JPM') 78 | st_port_val3 = compute_portvals(st_trades,sd,ed,100000,0,0.05) 79 | 80 | 81 | # Printing StrategyLearner statistics 82 | st_dr = (st_port_val / st_port_val.shift(1)) - 1 83 | st_dr = st_dr[1:] 84 | cr = (st_port_val.iloc[-1] / st_port_val.iloc[0]) - 1 85 | adr = st_dr.mean() 86 | sddr = st_dr.std() 87 | a = np.sqrt(252.0) 88 | sr = (a*(adr))/sddr 89 | 90 | ''' 91 | print "\nStrategy Learner Stats - impact=0.0005" 92 | print "CR " + str(cr) 93 | print "Avg of daily returns " + str(adr) 94 | print "Std deviation of daily returns " + str(sddr) 95 | print "Sharpe Ratio " + str(sr) 96 | ''' 97 | 98 | # Printing StrategyLearner statistics 99 | st_dr = (st_port_val2 / st_port_val2.shift(1)) - 1 100 | st_dr = st_dr[1:] 101 | cr = (st_port_val2.iloc[-1] / st_port_val2.iloc[0]) - 1 102 | adr = st_dr.mean() 103 | sddr = st_dr.std() 104 | a = np.sqrt(252.0) 105 | sr = (a*(adr))/sddr 106 | 107 | ''' 108 | print "\nStrategy Learner Stats - impact=0.005" 109 | print "CR " + str(cr) 110 | print "Avg of daily returns " + str(adr) 111 | print "Std deviation of daily returns " + str(sddr) 112 | print "Sharpe Ratio " + str(sr) 113 | ''' 114 | 115 | # Printing StrategyLearner statistics 116 | st_dr = (st_port_val3 / st_port_val3.shift(1)) - 1 117 | st_dr = st_dr[1:] 118 | cr = (st_port_val3.iloc[-1] / st_port_val3.iloc[0]) - 1 119 | adr = st_dr.mean() 120 | sddr = st_dr.std() 121 | a = np.sqrt(252.0) 122 | sr = (a*(adr))/sddr 123 | 124 | ''' 125 | print "\nStrategy Learner Stats - impact=0.05" 126 | print "CR " + str(cr) 127 | print "Avg of daily returns " + str(adr) 128 | print "Std deviation of daily returns " + str(sddr) 129 | print "Sharpe Ratio " + str(sr) 130 | ''' 131 | 132 | # Plotting charts 133 | st_port_val = st_port_val / st_port_val[0] 134 | st_port_val2 = st_port_val2 / st_port_val2[0] 135 | st_port_val3 = st_port_val3 / st_port_val3[0] 136 | ax = st_port_val.plot(fontsize=12, color="black", label="Strategy Learner - impact = 0.0005") 137 | st_port_val2.plot(ax=ax, color="blue", label='Strategy Learner - impact = 0.005') 138 | st_port_val3.plot(ax=ax, color="green", label='Strategy Learner - impact = 0.05') 139 | plt.title("Experiment 2") 140 | ax.set_xlabel("Date") 141 | ax.set_ylabel("Portfolio Value") 142 | plt.legend() 143 | plt.show() 144 | -------------------------------------------------------------------------------- /Project 8/indicators.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementing Technical Indicators 3 | 1. Volatility 4 | 2. Bollinger Bands 5 | 3. Simple Moving Average 6 | 7 | @Name : Nidhi Nirmal Menon 8 | @UserID : nmenon34 9 | 10 | """ 11 | import pandas as pd 12 | import numpy as np 13 | import datetime as dt 14 | from datetime import datetime 15 | import os 16 | from util import get_data, plot_data 17 | import matplotlib.pyplot as plt 18 | import math 19 | 20 | def author(self): 21 | """ 22 | @summary Returning the author user ID 23 | """ 24 | return 'nmenon34' 25 | 26 | # Calculating the Simple Moving Average 27 | def getSMA(prices,lookback, symbols): 28 | price = prices[symbols] 29 | sma = price.rolling(window=lookback, center=False).mean() 30 | return sma 31 | 32 | 33 | def getBollinger(prices, symbols, lookback, sma): 34 | price = prices[symbols] 35 | bollinger = price.copy() 36 | avg = price.rolling(window=lookback, center=False).mean() 37 | std = price.rolling(window=lookback, center=False).std() 38 | bollinger = (price - avg)/(2*std) 39 | return bollinger 40 | 41 | 42 | def getVolatility(prices, lookback, symbols): 43 | price = prices[symbols] 44 | volatility = price.rolling(window=lookback, center=False).std() 45 | return volatility 46 | 47 | 48 | # Calculating Price/SMA 49 | def priceBySMA(prices, lookback, sma, symbols): 50 | for day in range(lookback,prices.shape[0]): 51 | for sym in symbols: 52 | sma.ix[day,sym]=prices.ix[day,sym]/sma.ix[day,sym] 53 | return sma 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /Project 8/marketsimcode.py: -------------------------------------------------------------------------------- 1 | """ 2 | Marketsim code modified to handle a pandas dataframe as input. 3 | Code taken from the marketsim assignment and slightly modified. 4 | 5 | @Name : Nidhi Nirmal Menon 6 | @UserID : nmenon34 7 | 8 | """ 9 | 10 | import pandas as pd 11 | import numpy as np 12 | import datetime as dt 13 | from datetime import datetime 14 | import os 15 | from util import get_data, plot_data 16 | 17 | def author(self): 18 | """ 19 | @summary Returning the author user ID 20 | """ 21 | return 'nmenon34' 22 | 23 | def compute_portvals(df, sd=dt.datetime(2008,1,1), ed=dt.datetime(2009,12,31), start_val = 1000000, commission=9.95, impact=0.005): 24 | # this is the function the autograder will call to test your code 25 | # NOTE: orders_file may be a string, or it may be a file object. Your 26 | # code should work correctly with either input 27 | #df = pd.read_csv(orders_file, index_col='Date', parse_dates=True) 28 | # df = pd.read_csv(orders_file, parse_dates=True) 29 | # print df 30 | # df = df.sort_values(by='Date',ascending=True) 31 | # print "Sorted" 32 | # print df 33 | 34 | # Building the prices data frame 35 | start_date = datetime.strptime(df['Date'].min(),'%Y-%m-%d') 36 | #end_date = datetime.strptime(df['Date'].max(),'%Y-%m-%d') 37 | end_date = ed 38 | dates = pd.date_range(start_date, end_date) 39 | syms = df['Symbol'].unique().tolist() 40 | prices_all = get_data(syms, dates) 41 | prices_all.fillna(method="ffill",inplace=True) 42 | prices_all.fillna(method="bfill",inplace=True) 43 | prices = prices_all[syms] 44 | prices['Cash'] = 1 45 | 46 | # Building the trades data frame 47 | trades = prices.copy() 48 | trades[:] = 0 49 | 50 | # Populating the trades DataFrame 51 | nrow = df.shape[0] 52 | for k in range(0,nrow): 53 | i = datetime.strptime(df.ix[k,'Date'],'%Y-%m-%d') 54 | 55 | if i not in prices.index: 56 | continue 57 | 58 | j = df.ix[k,'Symbol'] 59 | if df.ix[k,'Order'] == 'BUY': 60 | trades.ix[i,j] += df.ix[k,'Shares'] 61 | trades.ix[i,'Cash'] += (-1*df.ix[k,'Shares']*prices.ix[i,j]*(1+impact)) 62 | elif df.ix[k,'Order'] == 'SELL': 63 | trades.ix[i,j] -= df.ix[k,'Shares'] 64 | trades.ix[i,'Cash'] += (1*df.ix[k,'Shares']*prices.ix[i,j]*(1-impact)) 65 | trades.ix[i,'Cash'] -= commission 66 | 67 | # Building the initial holdings 68 | holdings = trades.copy() 69 | holdings.ix[0,'Cash'] += start_val 70 | 71 | # Populating holdings 72 | nrow = holdings.shape[0] 73 | for i in range(1,nrow): 74 | holdings.ix[i,:] += holdings.ix[i-1,:] 75 | 76 | # Populating values 77 | values = pd.DataFrame(prices.values*holdings.values, columns=prices.columns, index=prices.index) 78 | 79 | # Populating portvals 80 | portvals = values.sum(axis=1) 81 | 82 | return portvals 83 | 84 | 85 | def test_code(): 86 | # this is a helper function you can use to test your code 87 | # note that during autograding his function will not be called. 88 | # Define input parameters 89 | 90 | of = "./orders/orders-02.csv" 91 | sv = 1000000 92 | 93 | # Process orders 94 | portvals = compute_portvals(orders_file = of, start_val = sv) 95 | if isinstance(portvals, pd.DataFrame): 96 | portvals = portvals[portvals.columns[0]] # just get the first column 97 | else: 98 | "warning, code did not return a DataFrame" 99 | 100 | # Get portfolio stats 101 | # Here we just fake the data. you should use your code from previous assignments. 102 | start_date = dt.datetime(2008,1,1) 103 | end_date = dt.datetime(2008,6,1) 104 | cum_ret, avg_daily_ret, std_daily_ret, sharpe_ratio = [0.2,0.01,0.02,1.5] 105 | cum_ret_SPY, avg_daily_ret_SPY, std_daily_ret_SPY, sharpe_ratio_SPY = [0.2,0.01,0.02,1.5] 106 | 107 | # Compare portfolio against $SPX 108 | print "Date Range: {} to {}".format(start_date, end_date) 109 | print 110 | print "Sharpe Ratio of Fund: {}".format(sharpe_ratio) 111 | print "Sharpe Ratio of SPY : {}".format(sharpe_ratio_SPY) 112 | print 113 | print "Cumulative Return of Fund: {}".format(cum_ret) 114 | print "Cumulative Return of SPY : {}".format(cum_ret_SPY) 115 | print 116 | print "Standard Deviation of Fund: {}".format(std_daily_ret) 117 | print "Standard Deviation of SPY : {}".format(std_daily_ret_SPY) 118 | print 119 | print "Average Daily Return of Fund: {}".format(avg_daily_ret) 120 | print "Average Daily Return of SPY : {}".format(avg_daily_ret_SPY) 121 | print 122 | print "Final Portfolio Value: {}".format(portvals[-1]) 123 | 124 | if __name__ == "__main__": 125 | test_code() 126 | -------------------------------------------------------------------------------- /Project 8/report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anu003/CS7646-Machine-Learning-for-Trading/921a69fd1dc6135624f4560b4dcce80111df78ca/Project 8/report.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CS7646-Machine-Learning-for-Trading 2 | Assignments as part of CS 7646 at GeorgiaTech under Dr. Tucker Balch in Fall 2017 3 | 4 | 5 | ## Course links 6 | Course Page : http://quantsoftware.gatech.edu/CS7646_Fall_2017 7 | 8 | Udacity course by Tucker Balch : https://www.udacity.com/course/machine-learning-for-trading--ud501 9 | 10 | ## Course structure 11 | This course is composed of three mini-courses: 12 | 13 | Mini-course 1: Manipulating Financial Data in Python 14 | 15 | Mini-course 2: Computational Investing 16 | 17 | Mini-course 3: Machine Learning Algorithms for Trading 18 | 19 | ## Projects 20 | 21 | Completed 8 projects in total: 22 | ### Project 1: 23 | Title : Assess portfolio 24 | 25 | Goal : To evaluate and compare different portfolios by computing certain metrics based on available historical data, and plot a comparison graph 26 | 27 | Link : http://quantsoftware.gatech.edu/Assess_portfolio 28 | 29 | ### Project 2: 30 | Title : Optimize portfolio 31 | 32 | Goal : To find how much of a portfolio's funds should be allocated to each stock so as to optimize it's performance by considering 'minimum volatility' as the optimizer metric 33 | 34 | Link : http://quantsoftware.gatech.edu/Optimize_something 35 | 36 | ### Project 3: 37 | Title : Market simulator 38 | 39 | Goal : To create a market simulator that accepts trading orders and keeps track of a portfolio's value over time and then assesses the performance of that portfolio 40 | 41 | Link : http://quantsoftware.gatech.edu/Marketsim 42 | 43 | ### Project 4: 44 | Title : Defeat learners 45 | 46 | Goal : To generate data that will work better for one learner than another. The two learners are: 47 | 48 | (1) A decision tree learner with leaf_size = 1 (DTLearner). Note that for testing purposes we will use our implementation of DTLearner 49 | 50 | (2) The LinRegLearner provided as part of the repo. 51 | 52 | Data generation should use a random number generator as part of its data generation process. Generators will be passed a random number seed. Whenever the seed is the same return exactly the same data set. Different seeds should result in different data sets. 53 | 54 | Link : http://quantsoftware.gatech.edu/Defeat_learners 55 | 56 | ### Project 5: 57 | Title : Assess learners 58 | 59 | Goal : To implement and evaluate three learning algorithms as Python classes: A "classic" Decision Tree learner, a Random Tree learner, and a Bootstrap Aggregating learner (Assume data to be static, and consider this to be a regression problem) 60 | 61 | Link : http://quantsoftware.gatech.edu/Assess_learners 62 | 63 | ### Project 6: 64 | Title : Qlearning robot 65 | 66 | Goal : To implement the Q-Learning and Dyna-Q solutions to the reinforcement learning problem, and apply them to a navigation problem in this project 67 | 68 | Link : http://quantsoftware.gatech.edu/Qlearning_robot 69 | 70 | ### Project 7: 71 | Title : Manual strategy 72 | 73 | Goal : To develop a trading strategy using your intuition and Technical Analysis, and test it against a stock using the market simulator built in project 3 74 | 75 | Link : http://quantsoftware.gatech.edu/Manual_strategy 76 | 77 | ### Project 8: 78 | Title : Strategy learner 79 | 80 | Goal : To design a learning trading agent and perform following tasks: 81 | - Devise numerical/technical indicators to evaluate the state of a stock on each day 82 | - Build a strategy learner based on one of the learners described above that uses the indicators 83 | - Test/debug the strategy learner on specific symbol/time period problems 84 | - Write a report describing your learning strategy 85 | 86 | Link : http://quantsoftware.gatech.edu/Strategy_learner 87 | 88 | --------------------------------------------------------------------------------