├── .gitignore ├── README.md ├── assets └── sample.png ├── dataoperations.py ├── earningsyield.py ├── fscore.py ├── index6m.py ├── main.py ├── roc.py └── webscraping.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | __pycache__/ 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Built with Python using Pandas and Beautiful Soup. 2 | 3 | ![Image](/assets/sample.png) 4 | -------------------------------------------------------------------------------- /assets/sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/borisng0112ca/StockScreeningScript/9dcb769202c60b00470282b54402b1479927b779/assets/sample.png -------------------------------------------------------------------------------- /dataoperations.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def data_cleansing(tickers, currentData, pastData, pastPastData): 4 | 5 | try: 6 | currentCombined = pd.DataFrame(currentData) 7 | pastCombined = pd.DataFrame(pastData) 8 | pastPastCombined = pd.DataFrame(pastPastData) 9 | 10 | ''' 11 | for ticker in tickers: 12 | currentCombined = currentCombined[~currentCombined[ticker].str.contains("[a-z]").fillna(False)] 13 | pastCombined = pastCombined[~pastCombined[ticker].str.contains("[a-z]").fillna(False)] 14 | pastPastCombined = pastPastCombined[~pastPastCombined[ticker].str.contains("[a-z]").fillna(False)] 15 | ''' 16 | 17 | currentCombined[tickers] = currentCombined[tickers].replace({',': ''}, regex=True) 18 | currentCombined[tickers] = currentCombined[tickers].replace({'M': 'E+03'}, regex=True) 19 | currentCombined[tickers] = currentCombined[tickers].replace({'B': 'E+06'}, regex=True) 20 | currentCombined[tickers] = currentCombined[tickers].replace({'T': 'E+09'}, regex=True) 21 | currentCombined[tickers] = currentCombined[tickers].replace({'%': 'E-2'}, regex=True) 22 | pastCombined[tickers] = pastCombined[tickers].replace({',': ''}, regex=True) 23 | pastCombined[tickers] = pastCombined[tickers].replace({'M': 'E+03'}, regex=True) 24 | pastCombined[tickers] = pastCombined[tickers].replace({'B': 'E+06'}, regex=True) 25 | pastCombined[tickers] = pastCombined[tickers].replace({'T': 'E+09'}, regex=True) 26 | pastCombined[tickers] = pastCombined[tickers].replace({'%': 'E-2'}, regex=True) 27 | pastPastCombined[tickers] = pastPastCombined[tickers].replace({',': ''}, regex=True) 28 | pastPastCombined[tickers] = pastPastCombined[tickers].replace({'M': 'E+03'}, regex=True) 29 | pastPastCombined[tickers] = pastPastCombined[tickers].replace({'B': 'E+06'}, regex=True) 30 | pastPastCombined[tickers] = pastPastCombined[tickers].replace({'T': 'E+09'}, regex=True) 31 | pastPastCombined[tickers] = pastPastCombined[tickers].replace({'%': 'E-2'}, regex=True) 32 | 33 | for ticker in currentCombined: 34 | currentCombined[ticker] = pd.to_numeric(currentCombined[ticker].values, errors='coerce') 35 | pastCombined[ticker] = pd.to_numeric(pastCombined[ticker].values, errors='coerce') 36 | pastPastCombined[ticker] = pd.to_numeric(pastPastCombined[ticker].values, errors='coerce') 37 | 38 | return currentCombined, pastCombined, pastPastCombined 39 | 40 | except: 41 | print("\nError in data cleansing...") 42 | 43 | -------------------------------------------------------------------------------- /earningsyield.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def eyCalculation(currentCombined, percent): 4 | 5 | try: 6 | magic_df = pd.DataFrame() 7 | temp_df = pd.DataFrame() 8 | temp_df["EBITDA"] = currentCombined.loc["EBITDA", :] 9 | temp_df["Depreciation & amortization"] = currentCombined.loc["Depreciation & amortization", :] 10 | temp_df["Market Cap (intraday)"] = currentCombined.loc["Market Cap (intraday)"] 11 | temp_df["Long Term Debt"] = currentCombined.loc["Long Term Debt"] 12 | temp_df["Total Current Assets"] = currentCombined.loc["Total Current Assets"] 13 | temp_df["Total Current Liabilities"] = currentCombined.loc["Total Current Liabilities"] 14 | temp_df = temp_df.dropna() 15 | 16 | # Earnings Yield 17 | magic_df["EBIT"] = temp_df["EBITDA"] - temp_df["Depreciation & amortization"] 18 | magic_df["Enterprise Value"] = temp_df["Market Cap (intraday)"] + temp_df["Long Term Debt"] \ 19 | - (temp_df["Total Current Assets"] - temp_df["Total Current Liabilities"]) 20 | magic_df["Earnings Yield"] = magic_df["EBIT"] / magic_df["Enterprise Value"] 21 | magic_df["EY Rank"] = magic_df["Earnings Yield"].rank(ascending=False, na_option='bottom') 22 | magic_df.sort_values(by=["EY Rank"], inplace=True) 23 | print('\n',magic_df.loc[:, ["EY Rank", "Earnings Yield"]]) 24 | 25 | tickers = magic_df.index.values 26 | tickers = tickers[:int(len(tickers)*percent)] 27 | 28 | # magic_df.drop(magic_df.index[len(tickers)//5:], inplace=True) 29 | 30 | return list(tickers) 31 | 32 | except: 33 | print("\nError in Earnings Yield calculation...") 34 | return [] -------------------------------------------------------------------------------- /fscore.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def fscoreCalculation(currentCombined, pastCombined, pastPastCombined, originalTickers, percent): 4 | 5 | try: 6 | originalTickers = currentCombined.columns 7 | 8 | fscore = {} 9 | for ticker in originalTickers: 10 | 11 | ROA_FS = int(currentCombined.loc["Net Income available to common shareholders", ticker] / ((currentCombined.loc["Total Assets", ticker] + pastCombined.loc["Total Assets", ticker]) / 2) > 0) 12 | CFO_FS = int(currentCombined.loc["Operating Cash Flow", ticker] > 0) 13 | ROA_D_FS = int( 14 | currentCombined.loc["Net Income available to common shareholders", ticker] / (currentCombined.loc["Total Assets", ticker] + pastCombined.loc["Total Assets", ticker]) / 2 > 15 | pastCombined.loc["Net Income available to common shareholders", ticker] / (pastCombined.loc["Total Assets", ticker] + pastPastCombined.loc["Total Assets", ticker]) / 2) 16 | 17 | CFO_ROA_FS = int( 18 | currentCombined.loc["Operating Cash Flow", ticker] / currentCombined.loc["Total Assets", ticker] > currentCombined.loc["Net Income available to common shareholders", ticker] / ( 19 | (currentCombined.loc["Total Assets", ticker] + pastCombined.loc["Total Assets", ticker]) / 2)) 20 | 21 | LTD_FS = int((currentCombined.loc["Long Term Debt", ticker] + currentCombined.loc["Other long-term liabilities", ticker]) < ( 22 | pastCombined.loc["Long Term Debt", ticker] + pastCombined.loc["Other long-term liabilities", ticker])) 23 | 24 | 25 | CR_FS = int((currentCombined.loc["Total Current Assets", ticker] / currentCombined.loc["Total Current Liabilities", ticker]) > ( 26 | pastCombined.loc["Total Current Assets", ticker] / pastCombined.loc["Total Current Liabilities", ticker])) 27 | 28 | DILUTION_FS = int(currentCombined.loc["Common Stock", ticker] <= pastCombined.loc["Common Stock", ticker]) 29 | 30 | GM_FS = int((currentCombined.loc["Gross Profit", ticker] / currentCombined.loc["Total Revenue", ticker]) > ( 31 | pastCombined.loc["Gross Profit", ticker] / pastCombined.loc["Total Revenue", ticker])) 32 | 33 | ATO_FS = int( 34 | currentCombined.loc["Total Revenue", ticker] / ((currentCombined.loc["Total Assets", ticker] + pastCombined.loc["Total Assets", ticker]) / 2) > 35 | pastCombined.loc["Total Revenue", ticker] / ((pastCombined.loc["Total Assets", ticker] + pastPastCombined.loc["Total Assets", ticker]) / 2)) 36 | 37 | fscore[ticker] = [ROA_FS, CFO_FS, ROA_D_FS, CFO_ROA_FS, LTD_FS, CR_FS, DILUTION_FS, GM_FS, ATO_FS] 38 | 39 | fscore_df = pd.DataFrame(fscore,index=["PosROA", "PosCFO", "ROAChange", "Accruals", "Leverage", "Liquidity", "Dilution","GM", "ATO"]) 40 | fscore_df = fscore_df.transpose() 41 | fscore_df['Sum'] = fscore_df[["PosROA", "PosCFO", "ROAChange", "Accruals", "Leverage", "Liquidity", "Dilution", "GM", "ATO"]].sum(axis=1) 42 | fscore_df.sort_values(by=["Sum"], inplace = True, ascending=False) 43 | print('\n',fscore_df) 44 | 45 | filteredTickers = fscore_df.index 46 | filteredTickers = filteredTickers[:int(len(filteredTickers)*percent)] 47 | 48 | return list(filteredTickers) 49 | 50 | except: 51 | print("\nError in F-Score calculation...") 52 | return [] -------------------------------------------------------------------------------- /index6m.py: -------------------------------------------------------------------------------- 1 | import yfinance as yf 2 | import datetime as dt 3 | import pandas as pd 4 | 5 | def sixMonthIndex(tickers, percent): 6 | 7 | try: 8 | start = dt.datetime.today() - dt.timedelta(180) 9 | end = dt.datetime.today() 10 | cl_price = pd.DataFrame() 11 | 12 | print('\n') 13 | for ticker in tickers: 14 | cl_price[ticker]= yf.download(ticker, start, end, period = "6mo")["Adj Close"] 15 | 16 | list6m = cl_price.iloc[-1] / cl_price.iloc[0] 17 | list6m.sort_values(ascending = False, inplace = True) 18 | print("\n6 month Index") 19 | print(list6m) 20 | 21 | finalList = list(list6m.index.values) 22 | finalList = finalList[:int(len(finalList)*percent)] 23 | 24 | return finalList 25 | 26 | except: 27 | print("\nError in 6-month Index calculation...") 28 | return [] -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import webscraping 2 | import dataoperations 3 | import roc 4 | import earningsyield 5 | import fscore 6 | import index6m 7 | 8 | if __name__ == '__main__': 9 | 10 | #stocks to be filtered 11 | '''RECOMMENDED: 20+ Stocks to work optimally''' 12 | 13 | startingTickers = ["TSLA", "AAPL", "GOOG"] 14 | 15 | # startingTickers = ["TSLA", "AAPL", "GOOG", "GME", "AMC", "PLTR", "AMC", "IBM", "AC.TO", "FB", "NFLX", "DDOG", "SPLK", "GM", "MSFT", "LMND", "CM", 16 | # "RY", "TD", "BMO"] 17 | 18 | #Retreive and store primitive data 19 | '''YAHOO FINANCE WEBSCRAPING TEMPORORY BAN ISSUE AWAITING FIX''' 20 | currentData, pastData, pastPastData = webscraping.getFinancialData(startingTickers) 21 | 22 | #data operations 23 | currentCombined, pastCombined, pastPastCombined = dataoperations.data_cleansing(startingTickers, currentData, pastData, pastPastData) 24 | del currentData, pastData, pastPastData 25 | 26 | #filter 1 (ROC Top 50%) 27 | rocTickers = roc.rocCalculation(currentCombined, 0.5) 28 | 29 | #filter 2 (Earnings Yield Top 20%) 30 | eyTickers = earningsyield.eyCalculation(currentCombined, 0.2) 31 | 32 | #filter 3 (F-Score Top 40%) 33 | fscoreTickers = fscore.fscoreCalculation(currentCombined, pastCombined, pastPastCombined, startingTickers, 0.4) 34 | 35 | #filter 4 (Momentum 6m Top 20%) 36 | index6mTickers = index6m.sixMonthIndex(startingTickers, 0.2) 37 | 38 | finalTickers = [] 39 | 40 | #stocks that passed the 4 filters will be the remaining stocks 41 | for ticker in startingTickers: 42 | if (ticker in rocTickers) and (ticker in eyTickers) and (ticker in index6mTickers) and (ticker in fscoreTickers): 43 | finalTickers.append(ticker) 44 | 45 | print("\nStocks remaining after 4 filters:") 46 | if len(finalTickers) == 0: 47 | print("0 stocks passed.\n") 48 | else: 49 | print(finalTickers,'\n') -------------------------------------------------------------------------------- /roc.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def rocCalculation(currentCombined, percent): 4 | 5 | try: 6 | magic_df = pd.DataFrame() 7 | temp_df = pd.DataFrame() 8 | temp_df["EBITDA"] = currentCombined.loc["EBITDA",:] 9 | temp_df["Depreciation & amortization"] = currentCombined.loc["Depreciation & amortization",:] 10 | temp_df["Total Current Assets"] = currentCombined.loc["Total Current Assets"] 11 | temp_df["Total Current Liabilities"] = currentCombined.loc["Total Current Liabilities"] 12 | temp_df["Net property, plant and equipment"] = currentCombined.loc["Net property, plant and equipment"] 13 | temp_df = temp_df.dropna() 14 | 15 | #ROC 16 | magic_df["Returns On Capital"] = (temp_df["EBITDA"] - temp_df["Depreciation & amortization"]) / ( 17 | temp_df["Net property, plant and equipment"] + temp_df["Total Current Assets"] - temp_df["Total Current Liabilities"]) 18 | magic_df["ROC Rank"] = magic_df["Returns On Capital"].rank(ascending=False,na_option='bottom') 19 | magic_df.sort_values(by=["ROC Rank"], inplace = True) 20 | print('\n',magic_df.loc[:,["ROC Rank", "Returns On Capital"]]) 21 | 22 | tickers = magic_df.index.values 23 | tickers = tickers[:int(len(tickers)*percent)] 24 | 25 | # magic_df.drop(magic_df.index[len(tickers)//2:], inplace=True) 26 | 27 | return list(tickers) 28 | 29 | except: 30 | print("\nError in ROC calculation...") 31 | return [] -------------------------------------------------------------------------------- /webscraping.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | 4 | def getFinancialData(tickers): 5 | 6 | yahooURL = 'https://ca.finance.yahoo.com/quote/' 7 | currentData = {} 8 | pastData = {} 9 | pastPastData = {} 10 | 11 | print('\n****Value Investing Stock Screener V1.1****\n') 12 | for ticker in tickers: 13 | try: 14 | temp_dir1 = {} 15 | temp_dir2 = {} 16 | temp_dir3 = {} 17 | 18 | print("Scraping financial data for "+ ticker) 19 | # getting balance sheet data from yahoo finance for the given ticker 20 | url = yahooURL +ticker+'/balance-sheet?p='+ticker 21 | page = requests.get(url) 22 | page_content = page.content 23 | soup = BeautifulSoup(page_content,'html.parser') 24 | tabl = soup.find_all("div", {"class" : "M(0) Whs(n) BdEnd Bdc($seperatorColor) D(itb)"}) 25 | for t in tabl: 26 | rows = t.find_all("div", {"class" : "rw-expnded"}) 27 | for row in rows: 28 | temp_dir1[row.get_text(separator='|').split("|")[0]]=row.get_text(separator='|').split("|")[1] 29 | temp_dir2[row.get_text(separator='|').split("|")[0]] = row.get_text(separator='|').split("|")[2] 30 | temp_dir3[row.get_text(separator='|').split("|")[0]] = row.get_text(separator='|').split("|")[3] 31 | 32 | #getting income statement data from yahoo finance for the given ticker 33 | url = yahooURL +ticker+'/financials?p='+ticker 34 | page = requests.get(url) 35 | page_content = page.content 36 | soup = BeautifulSoup(page_content,'html.parser') 37 | tabl = soup.find_all("div", {"class" : "M(0) Whs(n) BdEnd Bdc($seperatorColor) D(itb)"}) 38 | for t in tabl: 39 | rows = t.find_all("div", {"class" : "rw-expnded"}) 40 | for row in rows: 41 | temp_dir1[row.get_text(separator='|').split("|")[0]] = row.get_text(separator='|').split("|")[1] 42 | temp_dir2[row.get_text(separator='|').split("|")[0]] = row.get_text(separator='|').split("|")[2] 43 | temp_dir3[row.get_text(separator='|').split("|")[0]] = row.get_text(separator='|').split("|")[3] 44 | 45 | #getting cashflow statement data from yahoo finance for the given ticker 46 | url = yahooURL +ticker+'/cash-flow?p='+ticker 47 | page = requests.get(url) 48 | page_content = page.content 49 | soup = BeautifulSoup(page_content,'html.parser') 50 | tabl = soup.find_all("div", {"class" : "M(0) Whs(n) BdEnd Bdc($seperatorColor) D(itb)"}) 51 | for t in tabl: 52 | rows = t.find_all("div", {"class" : "rw-expnded"}) 53 | for row in rows: 54 | temp_dir1[row.get_text(separator='|').split("|")[0]] = row.get_text(separator='|').split("|")[1] 55 | temp_dir2[row.get_text(separator='|').split("|")[0]] = row.get_text(separator='|').split("|")[2] 56 | temp_dir3[row.get_text(separator='|').split("|")[0]] = row.get_text(separator='|').split("|")[3] 57 | 58 | #getting key statistics data from yahoo finance for the given ticker 59 | url = yahooURL +ticker+'/key-statistics?p='+ticker 60 | page = requests.get(url) 61 | page_content = page.content 62 | soup = BeautifulSoup(page_content,'html.parser') 63 | tabl = soup.findAll("div", {"class": "Mstart(a) Mend(a)"}) 64 | for t in tabl: 65 | rows = t.find_all("tr") 66 | for row in rows: 67 | if len(row.get_text(separator='|').split("|")[0:2])>0: 68 | temp_dir1[row.get_text(separator='|').split("|")[0]] = row.get_text(separator='|').split("|")[-1] 69 | 70 | #combining all extracted information with the corresponding ticker 71 | currentData[ticker] = temp_dir1 72 | pastData[ticker] = temp_dir2 73 | pastPastData[ticker] = temp_dir3 74 | 75 | except: 76 | print("Error scraping data for " + ticker) 77 | 78 | return currentData, pastData, pastPastData 79 | 80 | 81 | --------------------------------------------------------------------------------