├── README.md
├── momentum_identifier.py
├── factor_momentum_annual_results.txt
└── common_methods.py


/README.md:
--------------------------------------------------------------------------------
1 | # factor_momentum
2 | An equity analysis on momentum factor investing.
3 | 


--------------------------------------------------------------------------------
/momentum_identifier.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sat Oct 27 11:14:49 2018
  4 | 
  5 | @author: antonio constandinou
  6 | """
  7 | 
  8 | 
  9 | import psycopg2
 10 | import datetime
 11 | import seaborn
 12 | import pandas as pd
 13 | import os
 14 | import functools
 15 | import matplotlib.pyplot as plt
 16 | 
 17 | import common_methods as cm
 18 | 
 19 | def backtest_momentum(ticker_dict_by_year, conn):
 20 |     """
 21 |     return a dictionary where each key is our year, value is list of average returns of stocks
 22 |     args:
 23 |         ticker_dict: dictionary of keys [year] and values [list of tickers to hold in portfolio]
 24 |         conn: a Postgres DB connection object
 25 |     returns:
 26 |         dictionary where each key is our year, value is list of average returns of stocks
 27 |     """    
 28 |     annual_collector = {}
 29 |     
 30 |     for key, value in ticker_dict_by_year.items():
 31 |         # find the last trading day for our years range
 32 |         year_start = key - 1
 33 |         year = key
 34 |         print("Working on {} momentum portfolio".format(year))
 35 |         last_tr_day_start = cm.fetch_last_day_mth(year_start, conn)
 36 |         last_tr_day_end = cm.fetch_last_day_mth(year, conn)
 37 |         mth = 12
 38 |          
 39 |         trd_start_dt = datetime.date(year_start,mth,last_tr_day_start)
 40 |         trd_end_dt = datetime.date(year,mth,last_tr_day_end)
 41 |         # need to convert list of tickers to tuple of tickers
 42 |         tuple_ticker_values = tuple(value)
 43 |         year_data = cm.load_df_stock_data_array(tuple_ticker_values, trd_start_dt, trd_end_dt, conn)
 44 |         
 45 |         
 46 |         for ticker_data in year_data:
 47 |             ticker = ticker_data.columns[1]
 48 |             # annual return
 49 |             """DEBUG THIS HERE"""
 50 |             annual_return = [(ticker_data[ticker].iloc[-1] - ticker_data[ticker].iloc[0]) / ticker_data[ticker].iloc[0]]
 51 |             print('Ticker {} annual return {}'.format(ticker, annual_return))
 52 | 
 53 |             if year not in annual_collector:
 54 |                 annual_collector[year] = annual_return
 55 |             else:
 56 |                 annual_collector[year] = annual_collector[year] + annual_return
 57 |     
 58 |     return annual_collector
 59 | 
 60 | 
 61 | def main():
 62 |     # main function to isolate pairs
 63 |     skip_etfs = True
 64 |     db_credential_info_p = "\\" + "database_info.txt"
 65 |     
 66 |     # create our instance variables for host, username, password and database name
 67 |     db_host, db_user, db_password, db_name = cm.load_db_credential_info(db_credential_info_p)
 68 |     conn = psycopg2.connect(host=db_host,database=db_name, user=db_user, password=db_password)
 69 |     
 70 |     # original
 71 |     year_array = list(range(2004, 2015))
 72 |     # year_array = list(range(2004, 2006))
 73 |     # collect each year's stocks to hold. key = year, values = list of tickers
 74 |     ticker_dict_by_year = {}
 75 |     
 76 |     for yr in year_array:
 77 |         # create a pairs file for each one year chunk in our range
 78 |         year = yr
 79 |         end_year = year + 1
 80 |         # find the last trading day for our years range
 81 |         last_tr_day_start = cm.fetch_last_day_mth(year, conn)
 82 |         last_tr_day_end = cm.fetch_last_day_mth(end_year, conn)
 83 |         
 84 |         # date range to pull data from
 85 |         start_dt = datetime.date(year,12,last_tr_day_start)
 86 |         end_dt = datetime.date(end_year,12,last_tr_day_end)
 87 |         start_dt_str = start_dt.strftime("%Y%m%d")
 88 |         end_dt_str = end_dt.strftime("%Y%m%d")
 89 |         
 90 |         # list of stocks and their sector
 91 |         list_of_stocks = cm.load_db_tickers_sectors(start_dt, conn)
 92 |         # dict: key = sector with values = array of all tickers pertaining to a sector
 93 |         sector_dict = cm.build_dict_of_arrays(list_of_stocks)
 94 |         
 95 |         for sector, ticker_arr in sector_dict.items():
 96 |             if skip_etfs and sector != "ETF":
 97 |                 # for next_year's portfolio
 98 |                 next_year = end_year + 1
 99 |                 data_array_of_dfs = cm.load_df_stock_data_array(ticker_arr, start_dt, end_dt, conn)
100 |                 merged_data = cm.data_array_merge(data_array_of_dfs)
101 |                 return_data_series = (merged_data.iloc[-1] - merged_data.iloc[0]) / merged_data.iloc[0]
102 |                 top_five = return_data_series.nlargest(5).index.tolist()
103 |                 if next_year not in ticker_dict_by_year:
104 |                     ticker_dict_by_year[next_year] = top_five
105 |                 else:
106 |                     ticker_dict_by_year[next_year] = ticker_dict_by_year[next_year] + top_five
107 |                 print("Done {}: {}".format(end_year, sector))
108 |     
109 |     # annual returns of all stocks per year
110 |     portfolio_performance = backtest_momentum(ticker_dict_by_year, conn)
111 |     # file name to output
112 |     f_name = "factor_momentum_annual_results" + ".txt"
113 |     # let's start outputting data
114 |     file_to_write = open(f_name, 'w')
115 |     
116 |     for year, returns_arr in portfolio_performance.items():
117 |         str_rtns_list = ','.join(str(e) for e in returns_arr)
118 |         file_to_write.write('{},{}\n'.format(year,str_rtns_list))
119 |             
120 |     print("LETS CHECK PERFORMANCE")
121 |     
122 | if __name__ == "__main__":
123 |     main()


--------------------------------------------------------------------------------
/factor_momentum_annual_results.txt:
--------------------------------------------------------------------------------
 1 | 2006,0.4712041884816756,0.05407058312307132,0.23368401959005985,0.181567833506207,0.1857609248313344,-0.15085527881851574,0.4434427746604128,-0.0354941180279659,0.10181630960556066,0.4106742522406564,0.6246984081041967,0.19450249239446396,0.17917833549284182,0.06636695133089265,0.24053814513044924,0.21351280021113742,0.3221323570565369,-0.17748166143315888,0.0555885883462802,-0.16476713175115693,0.13498323395352502,-0.007293703574621507,-0.18003562139164425,0.05801527358018234,0.05801528364593704,0.09953288956733554,0.33484652947922894,0.600639975639215,0.33321463805154494,-0.20360478097075385,0.04262567130679135,0.14776626651207722,0.06587700542177116,0.1680920474800461,0.24640549414916588,0.49294245385450586,0.5630704085622135,0.20074352080577781,0.3655681236940924,-0.01688475753629272,-0.1718479622719152,0.29357039671682633,0.34121856630824376,0.773154507628294,-0.17540020730162392,-0.0534520907956912,0.6988935150779719,-0.2042422876911159,0.01971598724209373,-0.24510264679702204,0.5178754703328793,0.3272743934374464,0.3418496472744304
 2 | 2007,0.16151824132193016,0.36951983298538615,0.3038761310072942,1.043181468803325,0.3166934473534427,0.6634977590560817,0.3616727795691875,0.07850843028475372,-0.1344307908629298,0.3509124443927148,0.3057194755730823,-0.08518415650958577,0.4672150851362904,-0.05815625894986535,-0.05439656279904416,1.6122355785254583,-0.28558509676252486,0.220832152374788,-0.061652593199918775,0.7556349792876264,-0.33550992894180914,0.41435684492683267,0.17062119701179895,-0.02380105203997745,-0.1270578912490613,0.15795410051748152,3.195341375366545,0.10627000593387084,0.0017728566109112016,0.4656843678270479,0.13033962390309217,0.14600821512416237,-0.025068585420018226,0.3158560993963015,0.26365335920155913,0.34080681978394534,-0.2816891848896042,0.14410726605022808,-0.15928328816650605,-0.0005137826924821088,0.5428273366310857,0.24943820224719102,0.8475744890421079,-0.19213286713286715,0.10604982206405682,0.1115687623911301,0.04541991184972153,0.4567919627921535,0.9204063332544815,0.28514087775768254,0.23369455891068133,0.2068478985880142,-0.04645125246249255
 3 | 2008,-0.5021734616889125,-0.5607235124415249,-0.5669844098837429,-0.4367013372465727,-0.3717111036032976,-0.555579403211276,-0.48766315781083797,-0.36596126074726687,-0.24719675297581173,-0.0025711145837693052,-0.35262840554595215,-0.013145916610007871,-0.22181287237990507,-0.21527597290715053,-0.2234831763092553,-0.36089896495832985,-0.577418689267317,-0.46722078961038965,-0.44502617801047123,-0.7822085277795466,-0.5619482350233509,-0.31432850719257766,-0.01445538452011526,-0.3091404584875021,-0.4689781021897811,-0.6341322078251413,-0.546563725659823,-0.7508176754027015,-0.3099221499793586,-0.15680051668944409,-0.3724976060806745,-0.4551612039986315,-0.28597325420030734,-0.2675321583041933,-0.2181443987488131,-0.31133408688692266,-0.46065715285586645,-0.5618145084804228,-0.5075437389120017,-0.11631130101176096,-0.6079950303440356,-0.24839373210133342,-0.04396890358539335,-0.04145156713274731,-0.6202404809619239,-0.6756898712344795,-0.4547317922100605,-0.5613733135641608,-0.457748459426024,-0.37080897388070266,-0.27070873713660426,-0.1687726496325131,-0.2843890327878433
 4 | 2009,1.0188849820143886,-0.1454257193949044,0.04459945138310749,0.3363502139415925,0.4052379980894622,0.16796184227054745,0.07217799705515007,0.04271821302496283,0.22675146530774115,0.6586292477309699,0.03967376920077719,-0.07327599479447759,-0.025945837162839193,-0.07814878564147124,-0.13737804641629428,-0.04495150652589024,0.24841883723642239,0.12380181427472975,-0.20009074611773978,-0.06836553291164654,0.14863263489334771,0.4503793217785571,0.12449312889342035,0.11626808217498483,0.014177139027500707,0.8443253483654052,-0.023280889186837406,0.030541427596341042,0.2687020543392674,0.1024111608266978,0.05470774874331466,0.25304550014652627,-0.21906221452614497,0.2521002578266506,-0.003205419240940265,-0.06107753090686237,0.19852306714039658,0.24798401858638863,0.09885514381654834,0.1839452053303773,0.11418972364545808,0.19519549226674696,0.25931169748446525,0.4850408739718395,0.6628804299816592,0.396220156525899,-0.04102387824735582,0.5511698517592426,-0.014338855927107319,-0.17075494035929406,0.019661260514743657,0.014042831241755193,0.3942770507749793
 5 | 2010,0.7053707963392083,0.4445112958538179,0.5010976289137582,0.8499683445867977,0.6117206161099717,0.3115132611169966,0.2708682613829513,0.4576923936936277,0.2006043926184135,0.40978829510274684,0.19015538255997433,0.06783079063125184,-0.019206309739540836,-0.11314119138614936,0.01698813997602389,0.6962290607866939,0.08335088258486517,0.06472834816678304,0.022645136832081483,0.32834820967890666,0.6332684495662843,-0.022438692946477816,0.3294411592994161,0.7840238235034526,0.1411903264926384,-0.15670103092783502,-0.2608294930875576,-0.20434099226053168,-0.2509943711361594,0.93184890783629,0.485554202474892,0.019249185740313483,0.43829012905308035,0.19445230578698125,0.5042270656267893,0.31461072299496795,0.22582616584138848,-0.1089975916787976,-0.03211345899483283,0.09069482054401139,0.7400218637981302,0.45529240358436407,0.10653867943694847,0.38881352680439335,0.16406031640945787,0.7431577894736839,-0.16523627509455283,0.12424350892338953,0.4947507580311666,0.05621627027027028,0.3629267271904205,0.22246489848872822,0.0945477593487744
 6 | 2011,0.05443702375719159,0.013414954414568397,-0.3088540947616079,0.18307163817183172,0.22882251375307336,-0.20184306546703015,0.29208560157037144,-0.5239436800968353,-0.24910458611637815,-0.13105477028315465,-0.5181847007532504,-0.21309563258676179,-0.3518431841207638,-0.13928727712802075,-0.11923179332205343,0.4232514155193923,0.19926221200476382,0.2679628137253204,-0.04349832838434164,0.35348319585848387,0.5102624086602332,-0.13113404631252262,0.29188411013136595,0.4460700291960448,0.4473172221741459,-0.6116249279097308,-0.21000520047636126,-0.4512855120417369,0.05744614603328597,-0.33112307630016896,0.09862000779478387,-0.36317224192620434,-0.06611099838342753,-0.04622937286828423,0.08356766646705933,0.2953467554376865,0.5013827785399905,0.16545745065667505,0.24120912035179493,0.22858561899566562,-0.017606422671084067,-0.11112656487958276,0.11692700024498098,-0.18706790215798105,0.3909212862966836,-0.5262666786612793,-0.10788648580441643,-0.09140435835351089,0.7582687382695501,0.5325108997797023,-0.14012131555027924,0.16078541293050339,0.07958313641157072
 7 | 2012,0.30053004659461935,0.027243969217174875,0.2691478307944936,0.013711754130038657,0.8423063283559189,0.09674606892843264,0.22818576286848008,0.10698152782281951,0.587838273106492,0.1653827017347038,0.20102648635621087,0.611650033564141,0.4543138952250765,0.28123023916379847,0.08128694988421167,0.142486440162455,0.07806596082496156,0.10498265495724786,0.22513102102322974,0.21560909420917723,0.5500698257065936,0.26517836239712694,0.2869127205941522,0.16549799497186882,0.34711781820103704,0.3750662457495949,0.5186630026285509,0.4087193921172295,0.03762417293821098,0.21730098541094095,0.25503471620834123,0.036203093369073805,0.3777027718873334,0.3756788293605641,0.3492720230400449,0.18680775341676226,0.0081672304017324,-0.04616661332149382,0.1052962913825182,0.027716291676168642,0.5330487283398241,0.1067951305494164,0.0663981276228786,0.2580586639779384,0.16390900281976503,-0.2635616438356165,0.32832645600113375,0.03955821016401109,2.031543629275208,0.28960348017621135,0.14287126394726007,0.169462792108269,0.1271637214820415
 8 | 2013,0.13831008483886428,0.4921916898732941,0.31034961961486207,0.5867303238997269,0.5785023611442142,0.8061515751357499,0.4980099187915478,0.7292387279538067,0.3324422457908218,0.627008912544354,0.29814430842820067,0.36755477453877866,0.432589886694438,0.2716269884781711,0.35251163435271154,0.8738022184728838,0.0782057193117491,0.1856726288852951,0.2592616565883651,0.33476791602308686,0.10463998735952103,0.49122777790659117,0.11212786342034245,-0.003609092844322877,0.5761988963773599,0.8069847625661989,0.13897860491603461,1.12341424429141,0.28971195887129764,0.02388507342633557,0.39646964402673013,0.17633559421335587,0.18447669511603845,0.6705001694044385,0.1861746658922732,0.2815199603883083,0.27516342032455765,0.1343954986954565,0.2558582631169542,0.4150154803270239,-0.17468948006115084,0.23763605111646513,-0.022237219202689056,0.1836228318095151,0.11312481630645517,0.5189845174224809,1.0010656210982602,0.9954889929804761,0.5242812529492836,0.1276733052430245,0.056780385095991044,0.15923840253935853,-0.15492838693511912
 9 | 2014,0.059923891386349286,-0.15643976343599203,0.013143083872359278,-0.22248166497911118,-0.0789695758181791,0.7903471592627346,0.29150635452971974,1.2582648891245827,-0.026748972919030067,1.1254297824520858,0.1751619996039976,0.04173271874171044,0.2408113923697897,0.2246340850395805,0.03475777566319715,0.4178220281834892,0.21780086763828838,0.6827842466840859,0.23494305599003068,0.37924116200000707,-0.01456809794890704,0.6953860133419719,-0.09865075886010913,-0.07231608825859506,-0.004565913770593816,-0.05845872009432739,0.6163433979686055,0.2562089100238786,0.3597185925283928,0.5091773215645969,0.27640606649665433,0.26173910096199987,0.07455016571095024,0.017771913554400446,0.03700682522035293,0.40846556904696807,-0.02125237905460153,0.29194521193236694,-0.039916441108513154,0.25140897118920486,0.2526578206889352,0.2738809974664093,0.31953910791446277,0.4514564286687397,0.38051543960059453,0.4270935194222136,0.3011515609962596,0.1125104953820319,-0.018433971531495282,0.253290654275208,-0.002634204844787334,0.012642931522666845,0.32500128526722566
10 | 2015,0.4229879655752566,-0.6320324443838098,-0.39517415850150395,-0.2266327146408782,0.22406018796992475,0.015502685283759752,-0.20768072271552418,0.040679734828363186,-0.1362676747824947,0.3496371234343232,-0.09779693384134151,-0.1654622078794768,0.038627459704001645,0.060093090017431974,0.020655122819975665,0.33811505116510315,0.37721891066215696,0.4719280266223977,0.04210540644678551,0.2990296938480172,0.2543206621543628,0.08990220256235036,-0.12702601909587558,0.20511287187920466,0.3191400587208366,0.057547985162259625,0.4661831121438683,0.4676657943579713,-0.5925179856115108,0.23985765124555167,-0.0012332507352117458,0.08074466356290606,-0.08219740757384271,-0.2187531090571898,0.05591833508859956,-0.18361447119406019,-0.06965719264403768,-0.23169768115888748,0.2260750824451752,0.007561447448378504,-0.26251870773276537,-0.07367528731124892,-0.0012347275252338672,0.08981234721183984,0.16753443659787653,0.23676795745743867,0.2602450543806,0.0352173393939957,0.022426277728122324,-0.08571045981202652,-0.3185636502235037,0.07424309132356274,0.03222634008277024
11 | 2016,0.017679195953235624,-0.16148829108854537,0.2550356368143786,0.019039746373397275,0.11321503945360659,0.2861996522964991,0.092270650808843,0.14996425873515454,0.26068857551800767,0.15181554863208904,0.1300535174828761,0.20874719040030923,0.18625607053619975,0.203961111745618,0.17411667563803562,-0.08627726063378484,0.1001962983960318,-0.07842597067943836,0.1768316923947948,-0.15036514264512993,0.17720844404478,-0.032769983831196565,-0.05384200603178666,0.40223312691527546,-0.16160477859825365,0.12586398804896323,-0.03232714493969918,2.329196830063018,0.10995672669259049,-0.09386536368786295,0.3637781937194872,0.6459904202827466,0.1730958624219754,0.017691469719621124,0.1866575357673022,0.16635337765848418,0.22405735306128063,0.20422408033752598,0.24812341122411308,0.1845913973835818,-0.08781606857583642,0.23268268328628158,-0.05608598232366001,0.12568348875778315,0.33861433232673077,0.32191462510358115,-0.02951996709252797,0.06053388524801029,-0.089538454625465,0.20233457476375763,0.30101257138098764,0.2161927339225275,0.024146241974332884
12 | 


--------------------------------------------------------------------------------
/common_methods.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Jul 27 10:59:17 2018
  4 | 
  5 | @author: antonio constandinou
  6 | """
  7 | 
  8 | # COMMON METHODS
  9 | 
 10 | import numpy as np
 11 | import pandas as pd
 12 | import statsmodels.tsa.stattools as ts
 13 | import os
 14 | import matplotlib.pyplot as plt
 15 | import matplotlib.dates as mdates
 16 | import functools
 17 | 
 18 | from itertools import combinations
 19 | import statsmodels.api as sm
 20 | 
 21 | def build_dict_of_arrays(list_of_tups):
 22 |     """
 23 |     create a dictionary from list of tuples. key = sector, values = array of tickers
 24 |            pertaining to a given sector
 25 |     args:
 26 |         list_of_tups: list of tickers matched with their sector
 27 |     returns:
 28 |         dictionary
 29 |     """
 30 |     sector_dict = {}
 31 |     
 32 |     for stock_sector in list_of_tups:
 33 |         sector = stock_sector[1]
 34 |         ticker = stock_sector[0]
 35 |         
 36 |         if sector not in sector_dict:
 37 |             sector_dict[sector] = [ticker]
 38 |         else:
 39 |             sector_dict[sector].append(ticker)
 40 |             
 41 |     return sector_dict
 42 | 
 43 | def data_array_merge(data_array):
 44 |     """
 45 |     merge all dfs into one dfs
 46 |     args:
 47 |         data_array: array of pandas df
 48 |     returns:
 49 |         merged_df, single pandas dataframe
 50 |     """
 51 |     merged_df = functools.reduce(lambda left,right: pd.merge(left,right,on='Date'), data_array)
 52 |     merged_df.set_index('Date', inplace=True)
 53 |     return merged_df
 54 | 
 55 | def fetch_last_day_mth(year_, conn):
 56 |     """
 57 |     return date of the last day of data we have for a given year in our Postgres DB. 
 58 |     args:
 59 |         year_: year, type int
 60 |         conn: a Postgres DB connection object
 61 |     return:
 62 |         integer, last trading day of year that we have data for
 63 |     """  
 64 |     cur = conn.cursor()
 65 |     SQL =   """
 66 |             SELECT MAX(date_part('day', date_price)) FROM daily_data
 67 |             WHERE date_price BETWEEN '%s-12-01' AND '%s-12-31'
 68 |             """
 69 |     cur.execute(SQL, [year_,year_])        
 70 |     data = cur.fetchall()
 71 |     cur.close()
 72 |     last_day = int(data[0][0])
 73 |     return last_day
 74 | 
 75 | def fetch_last_day_any_mth(year_, mth_, conn):
 76 |     """
 77 |     return date of the last day of data we have for a given month and year in our Postgres DB. 
 78 |     args:
 79 |         year_: year, type int
 80 |         mth_: month, type int
 81 |         conn: a Postgres DB connection object
 82 |     return:
 83 |         integer, last trading day of year that we have data for
 84 |     """  
 85 |     cur = conn.cursor()
 86 |     SQL =   """
 87 |             SELECT MAX(date_part('day', date_price)) FROM daily_data
 88 |             WHERE date_price BETWEEN '%s-%s-01' AND '%s-%s-30'
 89 |             """
 90 |     cur.execute(SQL, [year_,mth_, year_, mth_])        
 91 |     data = cur.fetchall()
 92 |     cur.close()
 93 |     last_day = int(data[0][0])
 94 |     return last_day
 95 | 
 96 | def find_cointegrated_pairs(data, p_value=0.01):
 97 |     """
 98 |     statsmodels.tsa.stattools coint method for identifying pairs
 99 |     args:
100 |         data: needs to be pd_df where each column = individual ticker Adj_Close
101 |         p_value: threshold for accepting a pairs model (float), default 0.01
102 |     returns:
103 |         score_matrix (np.array), pvalue_matrix (np.array), pairs (array)
104 |     """
105 |     n = data.shape[1]
106 |     score_matrix = np.zeros((n, n))
107 |     pvalue_matrix = np.ones((n, n))
108 |     keys = data.keys()
109 |     pairs = []
110 |     for i in range(n):
111 |         for j in range(i+1, n):
112 |             S1 = data[keys[i]]
113 |             S2 = data[keys[j]]
114 |             result = ts.coint(S1, S2)
115 |             score = result[0]
116 |             pvalue = result[1]
117 |             score_matrix[i, j] = score
118 |             pvalue_matrix[i, j] = pvalue
119 |             if pvalue < p_value:
120 |                 pairs.append((keys[i], keys[j]))
121 |     return score_matrix, pvalue_matrix, pairs
122 |    
123 | def load_db_credential_info(f_name_path):
124 |     """
125 |     load text file holding our database credential info and the database name
126 |     args:
127 |         f_name_path: name of file preceded with "\\", type string
128 |     returns:
129 |         array of 4 values that should match text file info
130 |     """
131 |     cur_path = os.getcwd()
132 |     # lets load our database credentials and info
133 |     f = open(cur_path + f_name_path, 'r')
134 |     lines = f.readlines()[1:]
135 |     lines = lines[0].split(',')
136 |     return lines
137 | 
138 | def load_db_tickers_start_date(start_date, conn):
139 |     """
140 |     return a list of stock tickers that have data on the start_date arg provided
141 |     args:
142 |         start_date: datetime object to be used to query or PostgreSQL database
143 |         conn: a Postgres DB connection object
144 |     returns:
145 |         list of tuples
146 |     """
147 |     # convert start_date to string for our SQL query
148 |     date_string = start_date.strftime("%Y-%m-%d")
149 |     
150 |     cur = conn.cursor()
151 |     SQL =   """
152 |             SELECT ticker FROM symbol
153 |             WHERE id IN
154 |               (SELECT DISTINCT(stock_id) 
155 |                FROM daily_data
156 |                WHERE date_price = %s)
157 |             """
158 |     cur.execute(SQL, (date_string,))        
159 |     data = cur.fetchall()
160 |     return data
161 | 
162 | def load_db_tickers_sectors(start_date, conn):
163 |     """
164 |     return a list of tuples. each tuple is a ticker paired with it's sector
165 |     args:
166 |         start_date: datetime object to be used to query or PostgreSQL database
167 |         conn: a Postgres DB connection object
168 |     returns:
169 |         list of tuples
170 |     """
171 |     # convert start_date to string for our SQL query
172 |     date_string = start_date.strftime("%Y-%m-%d")
173 |     cur = conn.cursor()
174 |     SQL =   """
175 |             SELECT ticker, sector FROM symbol
176 |             WHERE id IN
177 |               (SELECT DISTINCT(stock_id) 
178 |                FROM daily_data
179 |                WHERE date_price = %s)
180 |             """
181 |     cur.execute(SQL, (date_string,))        
182 |     data = cur.fetchall()
183 |     return data
184 | 
185 | def load_df_stock_data_array(stocks, start_date, end_date, conn):
186 |     """
187 |     return an array where each element is a dataframe of loaded data
188 |     args:
189 |         stocks: tuple of strings, each string is ticker
190 |         start_date: datetime object to filter our pandas dataframe
191 |         end_date: datetime object to filter our pandas dataframe
192 |         conn: a Postgres DB connection object
193 |     returns:
194 |         array of pandas dataframe, each dataframe is stock data
195 |     """    
196 |     array_pd_dfs = []    
197 | 
198 |     cur = conn.cursor()
199 |     SQL = """
200 |           SELECT date_price, adj_close_price 
201 |           FROM daily_data 
202 |           INNER JOIN symbol ON symbol.id = daily_data.stock_id 
203 |           WHERE symbol.ticker LIKE %s
204 |           """
205 |     # for each ticker in our pair
206 |     for ticker in stocks:
207 |         # fetch our stock data from our Postgres DB
208 |         cur.execute(SQL, (ticker,))
209 |         results = cur.fetchall()
210 |         # create a pandas dataframe of our results
211 |         stock_data = pd.DataFrame(results, columns=['Date', ticker])
212 |         # ensure our data is in order of date
213 |         stock_data = stock_data.sort_values(by=['Date'], ascending = True)
214 |         # convert our column to float
215 |         stock_data[ticker] = stock_data[ticker].astype(float)
216 |         # filter our column based on a date range
217 |         mask = (stock_data['Date'] > start_date) & (stock_data['Date'] <= end_date)
218 |         # rebuild our dataframe
219 |         stock_data = stock_data.loc[mask]
220 |         # re-index the data
221 |         stock_data = stock_data.reset_index(drop=True)
222 |         # append our df to our array
223 |         array_pd_dfs.append(stock_data)
224 |         
225 |     return array_pd_dfs
226 | 
227 | def load_pairs_stock_data(pair, start_date, end_date, conn):
228 |     """
229 |     return a list of tuples. each tuple is a ticker paired with it's sector
230 |     args:
231 |         pair: tuple of two strings, each string is ticker
232 |         start_date: datetime object to filter our pandas dataframe
233 |         end_date: datetime object to filter our pandas dataframe
234 |         conn: a Postgres DB connection object
235 |     returns:
236 |         array of pandas dataframe, each dataframe is stock data
237 |     """    
238 |     array_pd_dfs = []    
239 | 
240 |     cur = conn.cursor()
241 |     SQL = """
242 |           SELECT date_price, adj_close_price 
243 |           FROM daily_data 
244 |           INNER JOIN symbol ON symbol.id = daily_data.stock_id 
245 |           WHERE symbol.ticker LIKE %s
246 |           """
247 |     # for each ticker in our pair
248 |     for ticker in pair:
249 |         # fetch our stock data from our Postgres DB
250 |         cur.execute(SQL, (ticker,))
251 |         results = cur.fetchall()
252 |         # create a pandas dataframe of our results
253 |         stock_data = pd.DataFrame(results, columns=['Date', 'Adj_Close'])
254 |         # ensure our data is in order of date
255 |         stock_data = stock_data.sort_values(by=['Date'], ascending = True)
256 |         # convert our column to float
257 |         stock_data['Adj_Close'] = stock_data['Adj_Close'].astype(float)
258 |         # filter our column based on a date range
259 |         mask = (stock_data['Date'] > start_date) & (stock_data['Date'] <= end_date)
260 |         # rebuild our dataframe
261 |         stock_data = stock_data.loc[mask]
262 |         # re-index the data
263 |         stock_data = stock_data.reset_index(drop=True)
264 |         # append our df to our array
265 |         array_pd_dfs.append(stock_data)
266 |         
267 |     return array_pd_dfs
268 | 
269 | def pair_data_verifier(array_df_data, pair_tickers, threshold=10):
270 |     """
271 |     merge two dataframes, verify if we still have the same number of data we originally had.
272 |     use an inputted threshold that tells us whether we've lost too much data in our merge or not.
273 |     args:
274 |         array_df_data: array of two pandas dataframes
275 |         pair_tickers: tuple of both tickers
276 |         threshold: integer, max number of days of data we can be missing after merging two
277 |                             dataframes of data.
278 |                    default = 10 to represent 10 days.
279 |     returns:
280 |         boolean False or new merged pandas dataframe
281 |         
282 |         False: if our new merged dataframe is missing too much data (> threshold)
283 |         merged pandas dataframe: if our pd.dataframe index length is < threshold
284 |     """
285 |     stock_1 = pair_tickers[0]
286 |     stock_2 = pair_tickers[1]
287 |     df_merged = pd.merge(array_df_data[0], array_df_data[1], left_on=['Date'], right_on=['Date'], how='inner')
288 |     
289 |     new_col_names = ['Date', stock_1, stock_2] 
290 |     df_merged.columns = new_col_names
291 |     # round columns
292 |     df_merged[stock_1] = df_merged[stock_1].round(decimals = 2)
293 |     df_merged[stock_2] = df_merged[stock_2].round(decimals = 2)
294 |     
295 |     new_size = len(df_merged.index)
296 |     old_size_1 = len(array_df_data[0].index)
297 |     old_size_2 = len(array_df_data[1].index)
298 | 
299 | #        print("Pairs: {0} and {1}".format(stock_1, stock_2))
300 | #        print("New merged df size: {0}".format(new_size))
301 | #        print("{0} old size: {1}".format(stock_1, old_size_1))
302 | #        print("{0} old size: {1}".format(stock_2, old_size_2))
303 | #        time.sleep(2)
304 |     
305 |     if (old_size_1 - new_size) > threshold or (old_size_2 - new_size) > threshold:
306 |         print("This pair {0} and {1} were missing data.".format(stock_1, stock_2))
307 |         return False
308 |     else:
309 |         return df_merged
310 | 
311 | """PLOT METHODS"""
312 | def plot_price_series(df, ts1, ts2, start_date, end_date):
313 |     months = mdates.MonthLocator() # every month
314 |     fig, ax = plt.subplots()
315 |     ax.plot(df.index, df[ts1], label=ts1)
316 |     ax.plot(df.index, df[ts2], label=ts2)
317 |     ax.xaxis.set_major_locator(months)
318 |     ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
319 |     ax.grid(True)
320 |     fig.autofmt_xdate()
321 |     plt.xlabel('Month/Year')
322 |     plt.ylabel('Price ($)')
323 |     plt.title('%s and %s Daily Prices' % (ts1, ts2))
324 |     plt.legend()
325 |     plt.show()
326 | 
327 | def plot_scatter_series(df, ts1, ts2):
328 |     plt.xlabel('%s Price ($)' % ts1)
329 |     plt.ylabel('%s Price ($)' % ts2)
330 |     plt.title('%s and %s Price Scatterplot' % (ts1, ts2))
331 |     plt.scatter(df[ts1], df[ts2])
332 |     plt.show()
333 | 
334 | def plot_residuals(df):
335 |     months = mdates.MonthLocator() # every month
336 |     fig, ax = plt.subplots()
337 |     ax.plot(df.index, df["res"], label="Residuals")
338 |     ax.xaxis.set_major_locator(months)
339 |     ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
340 |     ax.grid(True)
341 |     fig.autofmt_xdate()
342 |     plt.xlabel('Month/Year')
343 |     plt.ylabel('Price ($)')
344 |     plt.title('Residual Plot')
345 |     plt.legend()
346 |     plt.plot(df["res"])
347 |     plt.show()
348 |     
349 | def remove_ticker(ticker, array_pairs_to_clean):
350 |     """
351 |     output a new array of tuples with specific ticker removed
352 |     args:
353 |         ticker: ticker to remove, type string
354 |         array_pairs_to_clean: array of tuples
355 |     returns:
356 |         array of tuples
357 |     """
358 |     clean_pairs = []
359 |     
360 |     for pair in array_pairs_to_clean:
361 |         if ticker in pair:
362 |             continue
363 |         else:
364 |             clean_pairs.append(pair)
365 |     return clean_pairs
366 | 
367 | def write_dict_text(f_name, dict_):
368 |     """
369 |     write dictionary info to text file.
370 |     each line in text file = key, value
371 |         value is stripped of brackts "(" and ")" and single quotes "'"
372 |     args:
373 |         f_name: file_name to create, type string
374 |         dict_: python dictionary
375 |     returns:
376 |         NoneType
377 |     """
378 |     f_name = f_name + ".txt"
379 |     file_to_write = open(f_name, 'w')
380 |     
381 |     for sector, ticker_arr in dict_.items():
382 |         for ele in ticker_arr:
383 |             new_str = (sector + "," + str(ele)).replace("(","").replace(")","").replace("'","").replace(" ","")
384 |             file_to_write.write("%s\n" % (new_str,)) 
385 | 
386 |     print("{0} file created.".format(f_name))
387 |     
388 | def write_results_text_file(f_name, sub_array):
389 |     """
390 |     write an array to text file. This python script will write data into script directory.
391 |     args:
392 |         f_name: name of our file to be written, type string
393 |         sub_array: array of our data
394 |     returns:
395 |         None
396 |     """
397 |     # lets write elements of array to a file
398 |     f_name = f_name + ".txt"
399 |     file_to_write = open(f_name, 'w')
400 | 
401 |     for ele in sub_array:
402 |         file_to_write.write("%s\n" % (ele,)) 


--------------------------------------------------------------------------------