├── README.md ├── momentum_identifier.py ├── factor_momentum_annual_results.txt └── common_methods.py /README.md: -------------------------------------------------------------------------------- 1 | # factor_momentum 2 | An equity analysis on momentum factor investing. 3 | -------------------------------------------------------------------------------- /momentum_identifier.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Oct 27 11:14:49 2018 4 | 5 | @author: antonio constandinou 6 | """ 7 | 8 | 9 | import psycopg2 10 | import datetime 11 | import seaborn 12 | import pandas as pd 13 | import os 14 | import functools 15 | import matplotlib.pyplot as plt 16 | 17 | import common_methods as cm 18 | 19 | def backtest_momentum(ticker_dict_by_year, conn): 20 | """ 21 | return a dictionary where each key is our year, value is list of average returns of stocks 22 | args: 23 | ticker_dict: dictionary of keys [year] and values [list of tickers to hold in portfolio] 24 | conn: a Postgres DB connection object 25 | returns: 26 | dictionary where each key is our year, value is list of average returns of stocks 27 | """ 28 | annual_collector = {} 29 | 30 | for key, value in ticker_dict_by_year.items(): 31 | # find the last trading day for our years range 32 | year_start = key - 1 33 | year = key 34 | print("Working on {} momentum portfolio".format(year)) 35 | last_tr_day_start = cm.fetch_last_day_mth(year_start, conn) 36 | last_tr_day_end = cm.fetch_last_day_mth(year, conn) 37 | mth = 12 38 | 39 | trd_start_dt = datetime.date(year_start,mth,last_tr_day_start) 40 | trd_end_dt = datetime.date(year,mth,last_tr_day_end) 41 | # need to convert list of tickers to tuple of tickers 42 | tuple_ticker_values = tuple(value) 43 | year_data = cm.load_df_stock_data_array(tuple_ticker_values, trd_start_dt, trd_end_dt, conn) 44 | 45 | 46 | for ticker_data in year_data: 47 | ticker = ticker_data.columns[1] 48 | # annual return 49 | """DEBUG THIS HERE""" 50 | annual_return = [(ticker_data[ticker].iloc[-1] - ticker_data[ticker].iloc[0]) / ticker_data[ticker].iloc[0]] 51 | print('Ticker {} annual return {}'.format(ticker, annual_return)) 52 | 53 | if year not in annual_collector: 54 | annual_collector[year] = annual_return 55 | else: 56 | annual_collector[year] = annual_collector[year] + annual_return 57 | 58 | return annual_collector 59 | 60 | 61 | def main(): 62 | # main function to isolate pairs 63 | skip_etfs = True 64 | db_credential_info_p = "\\" + "database_info.txt" 65 | 66 | # create our instance variables for host, username, password and database name 67 | db_host, db_user, db_password, db_name = cm.load_db_credential_info(db_credential_info_p) 68 | conn = psycopg2.connect(host=db_host,database=db_name, user=db_user, password=db_password) 69 | 70 | # original 71 | year_array = list(range(2004, 2015)) 72 | # year_array = list(range(2004, 2006)) 73 | # collect each year's stocks to hold. key = year, values = list of tickers 74 | ticker_dict_by_year = {} 75 | 76 | for yr in year_array: 77 | # create a pairs file for each one year chunk in our range 78 | year = yr 79 | end_year = year + 1 80 | # find the last trading day for our years range 81 | last_tr_day_start = cm.fetch_last_day_mth(year, conn) 82 | last_tr_day_end = cm.fetch_last_day_mth(end_year, conn) 83 | 84 | # date range to pull data from 85 | start_dt = datetime.date(year,12,last_tr_day_start) 86 | end_dt = datetime.date(end_year,12,last_tr_day_end) 87 | start_dt_str = start_dt.strftime("%Y%m%d") 88 | end_dt_str = end_dt.strftime("%Y%m%d") 89 | 90 | # list of stocks and their sector 91 | list_of_stocks = cm.load_db_tickers_sectors(start_dt, conn) 92 | # dict: key = sector with values = array of all tickers pertaining to a sector 93 | sector_dict = cm.build_dict_of_arrays(list_of_stocks) 94 | 95 | for sector, ticker_arr in sector_dict.items(): 96 | if skip_etfs and sector != "ETF": 97 | # for next_year's portfolio 98 | next_year = end_year + 1 99 | data_array_of_dfs = cm.load_df_stock_data_array(ticker_arr, start_dt, end_dt, conn) 100 | merged_data = cm.data_array_merge(data_array_of_dfs) 101 | return_data_series = (merged_data.iloc[-1] - merged_data.iloc[0]) / merged_data.iloc[0] 102 | top_five = return_data_series.nlargest(5).index.tolist() 103 | if next_year not in ticker_dict_by_year: 104 | ticker_dict_by_year[next_year] = top_five 105 | else: 106 | ticker_dict_by_year[next_year] = ticker_dict_by_year[next_year] + top_five 107 | print("Done {}: {}".format(end_year, sector)) 108 | 109 | # annual returns of all stocks per year 110 | portfolio_performance = backtest_momentum(ticker_dict_by_year, conn) 111 | # file name to output 112 | f_name = "factor_momentum_annual_results" + ".txt" 113 | # let's start outputting data 114 | file_to_write = open(f_name, 'w') 115 | 116 | for year, returns_arr in portfolio_performance.items(): 117 | str_rtns_list = ','.join(str(e) for e in returns_arr) 118 | file_to_write.write('{},{}\n'.format(year,str_rtns_list)) 119 | 120 | print("LETS CHECK PERFORMANCE") 121 | 122 | if __name__ == "__main__": 123 | main() -------------------------------------------------------------------------------- /factor_momentum_annual_results.txt: -------------------------------------------------------------------------------- 1 | 2006,0.4712041884816756,0.05407058312307132,0.23368401959005985,0.181567833506207,0.1857609248313344,-0.15085527881851574,0.4434427746604128,-0.0354941180279659,0.10181630960556066,0.4106742522406564,0.6246984081041967,0.19450249239446396,0.17917833549284182,0.06636695133089265,0.24053814513044924,0.21351280021113742,0.3221323570565369,-0.17748166143315888,0.0555885883462802,-0.16476713175115693,0.13498323395352502,-0.007293703574621507,-0.18003562139164425,0.05801527358018234,0.05801528364593704,0.09953288956733554,0.33484652947922894,0.600639975639215,0.33321463805154494,-0.20360478097075385,0.04262567130679135,0.14776626651207722,0.06587700542177116,0.1680920474800461,0.24640549414916588,0.49294245385450586,0.5630704085622135,0.20074352080577781,0.3655681236940924,-0.01688475753629272,-0.1718479622719152,0.29357039671682633,0.34121856630824376,0.773154507628294,-0.17540020730162392,-0.0534520907956912,0.6988935150779719,-0.2042422876911159,0.01971598724209373,-0.24510264679702204,0.5178754703328793,0.3272743934374464,0.3418496472744304 2 | 2007,0.16151824132193016,0.36951983298538615,0.3038761310072942,1.043181468803325,0.3166934473534427,0.6634977590560817,0.3616727795691875,0.07850843028475372,-0.1344307908629298,0.3509124443927148,0.3057194755730823,-0.08518415650958577,0.4672150851362904,-0.05815625894986535,-0.05439656279904416,1.6122355785254583,-0.28558509676252486,0.220832152374788,-0.061652593199918775,0.7556349792876264,-0.33550992894180914,0.41435684492683267,0.17062119701179895,-0.02380105203997745,-0.1270578912490613,0.15795410051748152,3.195341375366545,0.10627000593387084,0.0017728566109112016,0.4656843678270479,0.13033962390309217,0.14600821512416237,-0.025068585420018226,0.3158560993963015,0.26365335920155913,0.34080681978394534,-0.2816891848896042,0.14410726605022808,-0.15928328816650605,-0.0005137826924821088,0.5428273366310857,0.24943820224719102,0.8475744890421079,-0.19213286713286715,0.10604982206405682,0.1115687623911301,0.04541991184972153,0.4567919627921535,0.9204063332544815,0.28514087775768254,0.23369455891068133,0.2068478985880142,-0.04645125246249255 3 | 2008,-0.5021734616889125,-0.5607235124415249,-0.5669844098837429,-0.4367013372465727,-0.3717111036032976,-0.555579403211276,-0.48766315781083797,-0.36596126074726687,-0.24719675297581173,-0.0025711145837693052,-0.35262840554595215,-0.013145916610007871,-0.22181287237990507,-0.21527597290715053,-0.2234831763092553,-0.36089896495832985,-0.577418689267317,-0.46722078961038965,-0.44502617801047123,-0.7822085277795466,-0.5619482350233509,-0.31432850719257766,-0.01445538452011526,-0.3091404584875021,-0.4689781021897811,-0.6341322078251413,-0.546563725659823,-0.7508176754027015,-0.3099221499793586,-0.15680051668944409,-0.3724976060806745,-0.4551612039986315,-0.28597325420030734,-0.2675321583041933,-0.2181443987488131,-0.31133408688692266,-0.46065715285586645,-0.5618145084804228,-0.5075437389120017,-0.11631130101176096,-0.6079950303440356,-0.24839373210133342,-0.04396890358539335,-0.04145156713274731,-0.6202404809619239,-0.6756898712344795,-0.4547317922100605,-0.5613733135641608,-0.457748459426024,-0.37080897388070266,-0.27070873713660426,-0.1687726496325131,-0.2843890327878433 4 | 2009,1.0188849820143886,-0.1454257193949044,0.04459945138310749,0.3363502139415925,0.4052379980894622,0.16796184227054745,0.07217799705515007,0.04271821302496283,0.22675146530774115,0.6586292477309699,0.03967376920077719,-0.07327599479447759,-0.025945837162839193,-0.07814878564147124,-0.13737804641629428,-0.04495150652589024,0.24841883723642239,0.12380181427472975,-0.20009074611773978,-0.06836553291164654,0.14863263489334771,0.4503793217785571,0.12449312889342035,0.11626808217498483,0.014177139027500707,0.8443253483654052,-0.023280889186837406,0.030541427596341042,0.2687020543392674,0.1024111608266978,0.05470774874331466,0.25304550014652627,-0.21906221452614497,0.2521002578266506,-0.003205419240940265,-0.06107753090686237,0.19852306714039658,0.24798401858638863,0.09885514381654834,0.1839452053303773,0.11418972364545808,0.19519549226674696,0.25931169748446525,0.4850408739718395,0.6628804299816592,0.396220156525899,-0.04102387824735582,0.5511698517592426,-0.014338855927107319,-0.17075494035929406,0.019661260514743657,0.014042831241755193,0.3942770507749793 5 | 2010,0.7053707963392083,0.4445112958538179,0.5010976289137582,0.8499683445867977,0.6117206161099717,0.3115132611169966,0.2708682613829513,0.4576923936936277,0.2006043926184135,0.40978829510274684,0.19015538255997433,0.06783079063125184,-0.019206309739540836,-0.11314119138614936,0.01698813997602389,0.6962290607866939,0.08335088258486517,0.06472834816678304,0.022645136832081483,0.32834820967890666,0.6332684495662843,-0.022438692946477816,0.3294411592994161,0.7840238235034526,0.1411903264926384,-0.15670103092783502,-0.2608294930875576,-0.20434099226053168,-0.2509943711361594,0.93184890783629,0.485554202474892,0.019249185740313483,0.43829012905308035,0.19445230578698125,0.5042270656267893,0.31461072299496795,0.22582616584138848,-0.1089975916787976,-0.03211345899483283,0.09069482054401139,0.7400218637981302,0.45529240358436407,0.10653867943694847,0.38881352680439335,0.16406031640945787,0.7431577894736839,-0.16523627509455283,0.12424350892338953,0.4947507580311666,0.05621627027027028,0.3629267271904205,0.22246489848872822,0.0945477593487744 6 | 2011,0.05443702375719159,0.013414954414568397,-0.3088540947616079,0.18307163817183172,0.22882251375307336,-0.20184306546703015,0.29208560157037144,-0.5239436800968353,-0.24910458611637815,-0.13105477028315465,-0.5181847007532504,-0.21309563258676179,-0.3518431841207638,-0.13928727712802075,-0.11923179332205343,0.4232514155193923,0.19926221200476382,0.2679628137253204,-0.04349832838434164,0.35348319585848387,0.5102624086602332,-0.13113404631252262,0.29188411013136595,0.4460700291960448,0.4473172221741459,-0.6116249279097308,-0.21000520047636126,-0.4512855120417369,0.05744614603328597,-0.33112307630016896,0.09862000779478387,-0.36317224192620434,-0.06611099838342753,-0.04622937286828423,0.08356766646705933,0.2953467554376865,0.5013827785399905,0.16545745065667505,0.24120912035179493,0.22858561899566562,-0.017606422671084067,-0.11112656487958276,0.11692700024498098,-0.18706790215798105,0.3909212862966836,-0.5262666786612793,-0.10788648580441643,-0.09140435835351089,0.7582687382695501,0.5325108997797023,-0.14012131555027924,0.16078541293050339,0.07958313641157072 7 | 2012,0.30053004659461935,0.027243969217174875,0.2691478307944936,0.013711754130038657,0.8423063283559189,0.09674606892843264,0.22818576286848008,0.10698152782281951,0.587838273106492,0.1653827017347038,0.20102648635621087,0.611650033564141,0.4543138952250765,0.28123023916379847,0.08128694988421167,0.142486440162455,0.07806596082496156,0.10498265495724786,0.22513102102322974,0.21560909420917723,0.5500698257065936,0.26517836239712694,0.2869127205941522,0.16549799497186882,0.34711781820103704,0.3750662457495949,0.5186630026285509,0.4087193921172295,0.03762417293821098,0.21730098541094095,0.25503471620834123,0.036203093369073805,0.3777027718873334,0.3756788293605641,0.3492720230400449,0.18680775341676226,0.0081672304017324,-0.04616661332149382,0.1052962913825182,0.027716291676168642,0.5330487283398241,0.1067951305494164,0.0663981276228786,0.2580586639779384,0.16390900281976503,-0.2635616438356165,0.32832645600113375,0.03955821016401109,2.031543629275208,0.28960348017621135,0.14287126394726007,0.169462792108269,0.1271637214820415 8 | 2013,0.13831008483886428,0.4921916898732941,0.31034961961486207,0.5867303238997269,0.5785023611442142,0.8061515751357499,0.4980099187915478,0.7292387279538067,0.3324422457908218,0.627008912544354,0.29814430842820067,0.36755477453877866,0.432589886694438,0.2716269884781711,0.35251163435271154,0.8738022184728838,0.0782057193117491,0.1856726288852951,0.2592616565883651,0.33476791602308686,0.10463998735952103,0.49122777790659117,0.11212786342034245,-0.003609092844322877,0.5761988963773599,0.8069847625661989,0.13897860491603461,1.12341424429141,0.28971195887129764,0.02388507342633557,0.39646964402673013,0.17633559421335587,0.18447669511603845,0.6705001694044385,0.1861746658922732,0.2815199603883083,0.27516342032455765,0.1343954986954565,0.2558582631169542,0.4150154803270239,-0.17468948006115084,0.23763605111646513,-0.022237219202689056,0.1836228318095151,0.11312481630645517,0.5189845174224809,1.0010656210982602,0.9954889929804761,0.5242812529492836,0.1276733052430245,0.056780385095991044,0.15923840253935853,-0.15492838693511912 9 | 2014,0.059923891386349286,-0.15643976343599203,0.013143083872359278,-0.22248166497911118,-0.0789695758181791,0.7903471592627346,0.29150635452971974,1.2582648891245827,-0.026748972919030067,1.1254297824520858,0.1751619996039976,0.04173271874171044,0.2408113923697897,0.2246340850395805,0.03475777566319715,0.4178220281834892,0.21780086763828838,0.6827842466840859,0.23494305599003068,0.37924116200000707,-0.01456809794890704,0.6953860133419719,-0.09865075886010913,-0.07231608825859506,-0.004565913770593816,-0.05845872009432739,0.6163433979686055,0.2562089100238786,0.3597185925283928,0.5091773215645969,0.27640606649665433,0.26173910096199987,0.07455016571095024,0.017771913554400446,0.03700682522035293,0.40846556904696807,-0.02125237905460153,0.29194521193236694,-0.039916441108513154,0.25140897118920486,0.2526578206889352,0.2738809974664093,0.31953910791446277,0.4514564286687397,0.38051543960059453,0.4270935194222136,0.3011515609962596,0.1125104953820319,-0.018433971531495282,0.253290654275208,-0.002634204844787334,0.012642931522666845,0.32500128526722566 10 | 2015,0.4229879655752566,-0.6320324443838098,-0.39517415850150395,-0.2266327146408782,0.22406018796992475,0.015502685283759752,-0.20768072271552418,0.040679734828363186,-0.1362676747824947,0.3496371234343232,-0.09779693384134151,-0.1654622078794768,0.038627459704001645,0.060093090017431974,0.020655122819975665,0.33811505116510315,0.37721891066215696,0.4719280266223977,0.04210540644678551,0.2990296938480172,0.2543206621543628,0.08990220256235036,-0.12702601909587558,0.20511287187920466,0.3191400587208366,0.057547985162259625,0.4661831121438683,0.4676657943579713,-0.5925179856115108,0.23985765124555167,-0.0012332507352117458,0.08074466356290606,-0.08219740757384271,-0.2187531090571898,0.05591833508859956,-0.18361447119406019,-0.06965719264403768,-0.23169768115888748,0.2260750824451752,0.007561447448378504,-0.26251870773276537,-0.07367528731124892,-0.0012347275252338672,0.08981234721183984,0.16753443659787653,0.23676795745743867,0.2602450543806,0.0352173393939957,0.022426277728122324,-0.08571045981202652,-0.3185636502235037,0.07424309132356274,0.03222634008277024 11 | 2016,0.017679195953235624,-0.16148829108854537,0.2550356368143786,0.019039746373397275,0.11321503945360659,0.2861996522964991,0.092270650808843,0.14996425873515454,0.26068857551800767,0.15181554863208904,0.1300535174828761,0.20874719040030923,0.18625607053619975,0.203961111745618,0.17411667563803562,-0.08627726063378484,0.1001962983960318,-0.07842597067943836,0.1768316923947948,-0.15036514264512993,0.17720844404478,-0.032769983831196565,-0.05384200603178666,0.40223312691527546,-0.16160477859825365,0.12586398804896323,-0.03232714493969918,2.329196830063018,0.10995672669259049,-0.09386536368786295,0.3637781937194872,0.6459904202827466,0.1730958624219754,0.017691469719621124,0.1866575357673022,0.16635337765848418,0.22405735306128063,0.20422408033752598,0.24812341122411308,0.1845913973835818,-0.08781606857583642,0.23268268328628158,-0.05608598232366001,0.12568348875778315,0.33861433232673077,0.32191462510358115,-0.02951996709252797,0.06053388524801029,-0.089538454625465,0.20233457476375763,0.30101257138098764,0.2161927339225275,0.024146241974332884 12 | -------------------------------------------------------------------------------- /common_methods.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Jul 27 10:59:17 2018 4 | 5 | @author: antonio constandinou 6 | """ 7 | 8 | # COMMON METHODS 9 | 10 | import numpy as np 11 | import pandas as pd 12 | import statsmodels.tsa.stattools as ts 13 | import os 14 | import matplotlib.pyplot as plt 15 | import matplotlib.dates as mdates 16 | import functools 17 | 18 | from itertools import combinations 19 | import statsmodels.api as sm 20 | 21 | def build_dict_of_arrays(list_of_tups): 22 | """ 23 | create a dictionary from list of tuples. key = sector, values = array of tickers 24 | pertaining to a given sector 25 | args: 26 | list_of_tups: list of tickers matched with their sector 27 | returns: 28 | dictionary 29 | """ 30 | sector_dict = {} 31 | 32 | for stock_sector in list_of_tups: 33 | sector = stock_sector[1] 34 | ticker = stock_sector[0] 35 | 36 | if sector not in sector_dict: 37 | sector_dict[sector] = [ticker] 38 | else: 39 | sector_dict[sector].append(ticker) 40 | 41 | return sector_dict 42 | 43 | def data_array_merge(data_array): 44 | """ 45 | merge all dfs into one dfs 46 | args: 47 | data_array: array of pandas df 48 | returns: 49 | merged_df, single pandas dataframe 50 | """ 51 | merged_df = functools.reduce(lambda left,right: pd.merge(left,right,on='Date'), data_array) 52 | merged_df.set_index('Date', inplace=True) 53 | return merged_df 54 | 55 | def fetch_last_day_mth(year_, conn): 56 | """ 57 | return date of the last day of data we have for a given year in our Postgres DB. 58 | args: 59 | year_: year, type int 60 | conn: a Postgres DB connection object 61 | return: 62 | integer, last trading day of year that we have data for 63 | """ 64 | cur = conn.cursor() 65 | SQL = """ 66 | SELECT MAX(date_part('day', date_price)) FROM daily_data 67 | WHERE date_price BETWEEN '%s-12-01' AND '%s-12-31' 68 | """ 69 | cur.execute(SQL, [year_,year_]) 70 | data = cur.fetchall() 71 | cur.close() 72 | last_day = int(data[0][0]) 73 | return last_day 74 | 75 | def fetch_last_day_any_mth(year_, mth_, conn): 76 | """ 77 | return date of the last day of data we have for a given month and year in our Postgres DB. 78 | args: 79 | year_: year, type int 80 | mth_: month, type int 81 | conn: a Postgres DB connection object 82 | return: 83 | integer, last trading day of year that we have data for 84 | """ 85 | cur = conn.cursor() 86 | SQL = """ 87 | SELECT MAX(date_part('day', date_price)) FROM daily_data 88 | WHERE date_price BETWEEN '%s-%s-01' AND '%s-%s-30' 89 | """ 90 | cur.execute(SQL, [year_,mth_, year_, mth_]) 91 | data = cur.fetchall() 92 | cur.close() 93 | last_day = int(data[0][0]) 94 | return last_day 95 | 96 | def find_cointegrated_pairs(data, p_value=0.01): 97 | """ 98 | statsmodels.tsa.stattools coint method for identifying pairs 99 | args: 100 | data: needs to be pd_df where each column = individual ticker Adj_Close 101 | p_value: threshold for accepting a pairs model (float), default 0.01 102 | returns: 103 | score_matrix (np.array), pvalue_matrix (np.array), pairs (array) 104 | """ 105 | n = data.shape[1] 106 | score_matrix = np.zeros((n, n)) 107 | pvalue_matrix = np.ones((n, n)) 108 | keys = data.keys() 109 | pairs = [] 110 | for i in range(n): 111 | for j in range(i+1, n): 112 | S1 = data[keys[i]] 113 | S2 = data[keys[j]] 114 | result = ts.coint(S1, S2) 115 | score = result[0] 116 | pvalue = result[1] 117 | score_matrix[i, j] = score 118 | pvalue_matrix[i, j] = pvalue 119 | if pvalue < p_value: 120 | pairs.append((keys[i], keys[j])) 121 | return score_matrix, pvalue_matrix, pairs 122 | 123 | def load_db_credential_info(f_name_path): 124 | """ 125 | load text file holding our database credential info and the database name 126 | args: 127 | f_name_path: name of file preceded with "\\", type string 128 | returns: 129 | array of 4 values that should match text file info 130 | """ 131 | cur_path = os.getcwd() 132 | # lets load our database credentials and info 133 | f = open(cur_path + f_name_path, 'r') 134 | lines = f.readlines()[1:] 135 | lines = lines[0].split(',') 136 | return lines 137 | 138 | def load_db_tickers_start_date(start_date, conn): 139 | """ 140 | return a list of stock tickers that have data on the start_date arg provided 141 | args: 142 | start_date: datetime object to be used to query or PostgreSQL database 143 | conn: a Postgres DB connection object 144 | returns: 145 | list of tuples 146 | """ 147 | # convert start_date to string for our SQL query 148 | date_string = start_date.strftime("%Y-%m-%d") 149 | 150 | cur = conn.cursor() 151 | SQL = """ 152 | SELECT ticker FROM symbol 153 | WHERE id IN 154 | (SELECT DISTINCT(stock_id) 155 | FROM daily_data 156 | WHERE date_price = %s) 157 | """ 158 | cur.execute(SQL, (date_string,)) 159 | data = cur.fetchall() 160 | return data 161 | 162 | def load_db_tickers_sectors(start_date, conn): 163 | """ 164 | return a list of tuples. each tuple is a ticker paired with it's sector 165 | args: 166 | start_date: datetime object to be used to query or PostgreSQL database 167 | conn: a Postgres DB connection object 168 | returns: 169 | list of tuples 170 | """ 171 | # convert start_date to string for our SQL query 172 | date_string = start_date.strftime("%Y-%m-%d") 173 | cur = conn.cursor() 174 | SQL = """ 175 | SELECT ticker, sector FROM symbol 176 | WHERE id IN 177 | (SELECT DISTINCT(stock_id) 178 | FROM daily_data 179 | WHERE date_price = %s) 180 | """ 181 | cur.execute(SQL, (date_string,)) 182 | data = cur.fetchall() 183 | return data 184 | 185 | def load_df_stock_data_array(stocks, start_date, end_date, conn): 186 | """ 187 | return an array where each element is a dataframe of loaded data 188 | args: 189 | stocks: tuple of strings, each string is ticker 190 | start_date: datetime object to filter our pandas dataframe 191 | end_date: datetime object to filter our pandas dataframe 192 | conn: a Postgres DB connection object 193 | returns: 194 | array of pandas dataframe, each dataframe is stock data 195 | """ 196 | array_pd_dfs = [] 197 | 198 | cur = conn.cursor() 199 | SQL = """ 200 | SELECT date_price, adj_close_price 201 | FROM daily_data 202 | INNER JOIN symbol ON symbol.id = daily_data.stock_id 203 | WHERE symbol.ticker LIKE %s 204 | """ 205 | # for each ticker in our pair 206 | for ticker in stocks: 207 | # fetch our stock data from our Postgres DB 208 | cur.execute(SQL, (ticker,)) 209 | results = cur.fetchall() 210 | # create a pandas dataframe of our results 211 | stock_data = pd.DataFrame(results, columns=['Date', ticker]) 212 | # ensure our data is in order of date 213 | stock_data = stock_data.sort_values(by=['Date'], ascending = True) 214 | # convert our column to float 215 | stock_data[ticker] = stock_data[ticker].astype(float) 216 | # filter our column based on a date range 217 | mask = (stock_data['Date'] > start_date) & (stock_data['Date'] <= end_date) 218 | # rebuild our dataframe 219 | stock_data = stock_data.loc[mask] 220 | # re-index the data 221 | stock_data = stock_data.reset_index(drop=True) 222 | # append our df to our array 223 | array_pd_dfs.append(stock_data) 224 | 225 | return array_pd_dfs 226 | 227 | def load_pairs_stock_data(pair, start_date, end_date, conn): 228 | """ 229 | return a list of tuples. each tuple is a ticker paired with it's sector 230 | args: 231 | pair: tuple of two strings, each string is ticker 232 | start_date: datetime object to filter our pandas dataframe 233 | end_date: datetime object to filter our pandas dataframe 234 | conn: a Postgres DB connection object 235 | returns: 236 | array of pandas dataframe, each dataframe is stock data 237 | """ 238 | array_pd_dfs = [] 239 | 240 | cur = conn.cursor() 241 | SQL = """ 242 | SELECT date_price, adj_close_price 243 | FROM daily_data 244 | INNER JOIN symbol ON symbol.id = daily_data.stock_id 245 | WHERE symbol.ticker LIKE %s 246 | """ 247 | # for each ticker in our pair 248 | for ticker in pair: 249 | # fetch our stock data from our Postgres DB 250 | cur.execute(SQL, (ticker,)) 251 | results = cur.fetchall() 252 | # create a pandas dataframe of our results 253 | stock_data = pd.DataFrame(results, columns=['Date', 'Adj_Close']) 254 | # ensure our data is in order of date 255 | stock_data = stock_data.sort_values(by=['Date'], ascending = True) 256 | # convert our column to float 257 | stock_data['Adj_Close'] = stock_data['Adj_Close'].astype(float) 258 | # filter our column based on a date range 259 | mask = (stock_data['Date'] > start_date) & (stock_data['Date'] <= end_date) 260 | # rebuild our dataframe 261 | stock_data = stock_data.loc[mask] 262 | # re-index the data 263 | stock_data = stock_data.reset_index(drop=True) 264 | # append our df to our array 265 | array_pd_dfs.append(stock_data) 266 | 267 | return array_pd_dfs 268 | 269 | def pair_data_verifier(array_df_data, pair_tickers, threshold=10): 270 | """ 271 | merge two dataframes, verify if we still have the same number of data we originally had. 272 | use an inputted threshold that tells us whether we've lost too much data in our merge or not. 273 | args: 274 | array_df_data: array of two pandas dataframes 275 | pair_tickers: tuple of both tickers 276 | threshold: integer, max number of days of data we can be missing after merging two 277 | dataframes of data. 278 | default = 10 to represent 10 days. 279 | returns: 280 | boolean False or new merged pandas dataframe 281 | 282 | False: if our new merged dataframe is missing too much data (> threshold) 283 | merged pandas dataframe: if our pd.dataframe index length is < threshold 284 | """ 285 | stock_1 = pair_tickers[0] 286 | stock_2 = pair_tickers[1] 287 | df_merged = pd.merge(array_df_data[0], array_df_data[1], left_on=['Date'], right_on=['Date'], how='inner') 288 | 289 | new_col_names = ['Date', stock_1, stock_2] 290 | df_merged.columns = new_col_names 291 | # round columns 292 | df_merged[stock_1] = df_merged[stock_1].round(decimals = 2) 293 | df_merged[stock_2] = df_merged[stock_2].round(decimals = 2) 294 | 295 | new_size = len(df_merged.index) 296 | old_size_1 = len(array_df_data[0].index) 297 | old_size_2 = len(array_df_data[1].index) 298 | 299 | # print("Pairs: {0} and {1}".format(stock_1, stock_2)) 300 | # print("New merged df size: {0}".format(new_size)) 301 | # print("{0} old size: {1}".format(stock_1, old_size_1)) 302 | # print("{0} old size: {1}".format(stock_2, old_size_2)) 303 | # time.sleep(2) 304 | 305 | if (old_size_1 - new_size) > threshold or (old_size_2 - new_size) > threshold: 306 | print("This pair {0} and {1} were missing data.".format(stock_1, stock_2)) 307 | return False 308 | else: 309 | return df_merged 310 | 311 | """PLOT METHODS""" 312 | def plot_price_series(df, ts1, ts2, start_date, end_date): 313 | months = mdates.MonthLocator() # every month 314 | fig, ax = plt.subplots() 315 | ax.plot(df.index, df[ts1], label=ts1) 316 | ax.plot(df.index, df[ts2], label=ts2) 317 | ax.xaxis.set_major_locator(months) 318 | ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y')) 319 | ax.grid(True) 320 | fig.autofmt_xdate() 321 | plt.xlabel('Month/Year') 322 | plt.ylabel('Price ($)') 323 | plt.title('%s and %s Daily Prices' % (ts1, ts2)) 324 | plt.legend() 325 | plt.show() 326 | 327 | def plot_scatter_series(df, ts1, ts2): 328 | plt.xlabel('%s Price ($)' % ts1) 329 | plt.ylabel('%s Price ($)' % ts2) 330 | plt.title('%s and %s Price Scatterplot' % (ts1, ts2)) 331 | plt.scatter(df[ts1], df[ts2]) 332 | plt.show() 333 | 334 | def plot_residuals(df): 335 | months = mdates.MonthLocator() # every month 336 | fig, ax = plt.subplots() 337 | ax.plot(df.index, df["res"], label="Residuals") 338 | ax.xaxis.set_major_locator(months) 339 | ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y')) 340 | ax.grid(True) 341 | fig.autofmt_xdate() 342 | plt.xlabel('Month/Year') 343 | plt.ylabel('Price ($)') 344 | plt.title('Residual Plot') 345 | plt.legend() 346 | plt.plot(df["res"]) 347 | plt.show() 348 | 349 | def remove_ticker(ticker, array_pairs_to_clean): 350 | """ 351 | output a new array of tuples with specific ticker removed 352 | args: 353 | ticker: ticker to remove, type string 354 | array_pairs_to_clean: array of tuples 355 | returns: 356 | array of tuples 357 | """ 358 | clean_pairs = [] 359 | 360 | for pair in array_pairs_to_clean: 361 | if ticker in pair: 362 | continue 363 | else: 364 | clean_pairs.append(pair) 365 | return clean_pairs 366 | 367 | def write_dict_text(f_name, dict_): 368 | """ 369 | write dictionary info to text file. 370 | each line in text file = key, value 371 | value is stripped of brackts "(" and ")" and single quotes "'" 372 | args: 373 | f_name: file_name to create, type string 374 | dict_: python dictionary 375 | returns: 376 | NoneType 377 | """ 378 | f_name = f_name + ".txt" 379 | file_to_write = open(f_name, 'w') 380 | 381 | for sector, ticker_arr in dict_.items(): 382 | for ele in ticker_arr: 383 | new_str = (sector + "," + str(ele)).replace("(","").replace(")","").replace("'","").replace(" ","") 384 | file_to_write.write("%s\n" % (new_str,)) 385 | 386 | print("{0} file created.".format(f_name)) 387 | 388 | def write_results_text_file(f_name, sub_array): 389 | """ 390 | write an array to text file. This python script will write data into script directory. 391 | args: 392 | f_name: name of our file to be written, type string 393 | sub_array: array of our data 394 | returns: 395 | None 396 | """ 397 | # lets write elements of array to a file 398 | f_name = f_name + ".txt" 399 | file_to_write = open(f_name, 'w') 400 | 401 | for ele in sub_array: 402 | file_to_write.write("%s\n" % (ele,)) --------------------------------------------------------------------------------