├── .gitignore
├── README.md
├── assets
    └── sample.png
├── dataoperations.py
├── earningsyield.py
├── fscore.py
├── index6m.py
├── main.py
├── roc.py
└── webscraping.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | __pycache__/
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Built with Python using Pandas and Beautiful Soup.  
2 | 
3 | ![Image](/assets/sample.png)
4 | 


--------------------------------------------------------------------------------
/assets/sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/borisng0112ca/StockScreeningScript/9dcb769202c60b00470282b54402b1479927b779/assets/sample.png


--------------------------------------------------------------------------------
/dataoperations.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | def data_cleansing(tickers, currentData, pastData, pastPastData):
 4 | 
 5 |     try:
 6 |         currentCombined = pd.DataFrame(currentData)
 7 |         pastCombined = pd.DataFrame(pastData)
 8 |         pastPastCombined = pd.DataFrame(pastPastData)
 9 | 
10 |         '''
11 |         for ticker in tickers:
12 |             currentCombined = currentCombined[~currentCombined[ticker].str.contains("[a-z]").fillna(False)]
13 |             pastCombined = pastCombined[~pastCombined[ticker].str.contains("[a-z]").fillna(False)]
14 |             pastPastCombined = pastPastCombined[~pastPastCombined[ticker].str.contains("[a-z]").fillna(False)]
15 |         '''
16 | 
17 |         currentCombined[tickers] = currentCombined[tickers].replace({',': ''}, regex=True)
18 |         currentCombined[tickers] = currentCombined[tickers].replace({'M': 'E+03'}, regex=True)
19 |         currentCombined[tickers] = currentCombined[tickers].replace({'B': 'E+06'}, regex=True)
20 |         currentCombined[tickers] = currentCombined[tickers].replace({'T': 'E+09'}, regex=True)
21 |         currentCombined[tickers] = currentCombined[tickers].replace({'%': 'E-2'}, regex=True)
22 |         pastCombined[tickers] = pastCombined[tickers].replace({',': ''}, regex=True)
23 |         pastCombined[tickers] = pastCombined[tickers].replace({'M': 'E+03'}, regex=True)
24 |         pastCombined[tickers] = pastCombined[tickers].replace({'B': 'E+06'}, regex=True)
25 |         pastCombined[tickers] = pastCombined[tickers].replace({'T': 'E+09'}, regex=True)
26 |         pastCombined[tickers] = pastCombined[tickers].replace({'%': 'E-2'}, regex=True)
27 |         pastPastCombined[tickers] = pastPastCombined[tickers].replace({',': ''}, regex=True)
28 |         pastPastCombined[tickers] = pastPastCombined[tickers].replace({'M': 'E+03'}, regex=True)
29 |         pastPastCombined[tickers] = pastPastCombined[tickers].replace({'B': 'E+06'}, regex=True)
30 |         pastPastCombined[tickers] = pastPastCombined[tickers].replace({'T': 'E+09'}, regex=True)
31 |         pastPastCombined[tickers] = pastPastCombined[tickers].replace({'%': 'E-2'}, regex=True)
32 | 
33 |         for ticker in currentCombined:
34 |             currentCombined[ticker] = pd.to_numeric(currentCombined[ticker].values, errors='coerce')
35 |             pastCombined[ticker] = pd.to_numeric(pastCombined[ticker].values, errors='coerce')
36 |             pastPastCombined[ticker] = pd.to_numeric(pastPastCombined[ticker].values, errors='coerce')
37 | 
38 |         return currentCombined, pastCombined, pastPastCombined
39 | 
40 |     except:
41 |         print("\nError in data cleansing...")
42 | 
43 | 


--------------------------------------------------------------------------------
/earningsyield.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | def eyCalculation(currentCombined, percent):
 4 | 
 5 |     try:
 6 |         magic_df = pd.DataFrame()
 7 |         temp_df = pd.DataFrame()
 8 |         temp_df["EBITDA"] = currentCombined.loc["EBITDA", :]
 9 |         temp_df["Depreciation & amortization"] = currentCombined.loc["Depreciation & amortization", :]
10 |         temp_df["Market Cap (intraday)"] = currentCombined.loc["Market Cap (intraday)"]
11 |         temp_df["Long Term Debt"] = currentCombined.loc["Long Term Debt"]
12 |         temp_df["Total Current Assets"] = currentCombined.loc["Total Current Assets"]
13 |         temp_df["Total Current Liabilities"] = currentCombined.loc["Total Current Liabilities"]
14 |         temp_df = temp_df.dropna()
15 | 
16 |         # Earnings Yield
17 |         magic_df["EBIT"] = temp_df["EBITDA"] - temp_df["Depreciation & amortization"]
18 |         magic_df["Enterprise Value"] = temp_df["Market Cap (intraday)"] + temp_df["Long Term Debt"] \
19 |                                        - (temp_df["Total Current Assets"] - temp_df["Total Current Liabilities"])
20 |         magic_df["Earnings Yield"] = magic_df["EBIT"] / magic_df["Enterprise Value"]
21 |         magic_df["EY Rank"] = magic_df["Earnings Yield"].rank(ascending=False, na_option='bottom')
22 |         magic_df.sort_values(by=["EY Rank"], inplace=True)
23 |         print('\n',magic_df.loc[:, ["EY Rank", "Earnings Yield"]])
24 | 
25 |         tickers = magic_df.index.values
26 |         tickers = tickers[:int(len(tickers)*percent)]
27 | 
28 |         # magic_df.drop(magic_df.index[len(tickers)//5:], inplace=True)
29 | 
30 |         return list(tickers)
31 | 
32 |     except:
33 |         print("\nError in Earnings Yield calculation...")
34 |         return []


--------------------------------------------------------------------------------
/fscore.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | def fscoreCalculation(currentCombined, pastCombined, pastPastCombined, originalTickers, percent):
 4 | 
 5 |     try:
 6 |         originalTickers = currentCombined.columns
 7 | 
 8 |         fscore = {}
 9 |         for ticker in originalTickers:
10 | 
11 |             ROA_FS = int(currentCombined.loc["Net Income available to common shareholders", ticker] / ((currentCombined.loc["Total Assets", ticker] + pastCombined.loc["Total Assets", ticker]) / 2) > 0)
12 |             CFO_FS = int(currentCombined.loc["Operating Cash Flow", ticker] > 0)
13 |             ROA_D_FS = int(
14 |                 currentCombined.loc["Net Income available to common shareholders", ticker] / (currentCombined.loc["Total Assets", ticker] + pastCombined.loc["Total Assets", ticker]) / 2 >
15 |                 pastCombined.loc["Net Income available to common shareholders", ticker] / (pastCombined.loc["Total Assets", ticker] + pastPastCombined.loc["Total Assets", ticker]) / 2)
16 | 
17 |             CFO_ROA_FS = int(
18 |                 currentCombined.loc["Operating Cash Flow", ticker] / currentCombined.loc["Total Assets", ticker] > currentCombined.loc["Net Income available to common shareholders", ticker] / (
19 |                 (currentCombined.loc["Total Assets", ticker] + pastCombined.loc["Total Assets", ticker]) / 2))
20 | 
21 |             LTD_FS = int((currentCombined.loc["Long Term Debt", ticker] + currentCombined.loc["Other long-term liabilities", ticker]) < (
22 |                         pastCombined.loc["Long Term Debt", ticker] + pastCombined.loc["Other long-term liabilities", ticker]))
23 | 
24 | 
25 |             CR_FS = int((currentCombined.loc["Total Current Assets", ticker] / currentCombined.loc["Total Current Liabilities", ticker]) > (
26 |                         pastCombined.loc["Total Current Assets", ticker] / pastCombined.loc["Total Current Liabilities", ticker]))
27 | 
28 |             DILUTION_FS = int(currentCombined.loc["Common Stock", ticker] <= pastCombined.loc["Common Stock", ticker])
29 | 
30 |             GM_FS = int((currentCombined.loc["Gross Profit", ticker] / currentCombined.loc["Total Revenue", ticker]) > (
31 |                         pastCombined.loc["Gross Profit", ticker] / pastCombined.loc["Total Revenue", ticker]))
32 | 
33 |             ATO_FS = int(
34 |                 currentCombined.loc["Total Revenue", ticker] / ((currentCombined.loc["Total Assets", ticker] + pastCombined.loc["Total Assets", ticker]) / 2) >
35 |                 pastCombined.loc["Total Revenue", ticker] / ((pastCombined.loc["Total Assets", ticker] + pastPastCombined.loc["Total Assets", ticker]) / 2))
36 | 
37 |             fscore[ticker] = [ROA_FS, CFO_FS, ROA_D_FS, CFO_ROA_FS, LTD_FS, CR_FS, DILUTION_FS, GM_FS, ATO_FS]
38 | 
39 |         fscore_df = pd.DataFrame(fscore,index=["PosROA", "PosCFO", "ROAChange", "Accruals", "Leverage", "Liquidity", "Dilution","GM", "ATO"])
40 |         fscore_df = fscore_df.transpose()
41 |         fscore_df['Sum'] =  fscore_df[["PosROA", "PosCFO", "ROAChange", "Accruals", "Leverage", "Liquidity", "Dilution", "GM", "ATO"]].sum(axis=1)
42 |         fscore_df.sort_values(by=["Sum"], inplace = True, ascending=False)
43 |         print('\n',fscore_df)
44 | 
45 |         filteredTickers = fscore_df.index
46 |         filteredTickers = filteredTickers[:int(len(filteredTickers)*percent)]
47 | 
48 |         return list(filteredTickers)
49 | 
50 |     except:
51 |         print("\nError in F-Score calculation...")
52 |         return []


--------------------------------------------------------------------------------
/index6m.py:
--------------------------------------------------------------------------------
 1 | import yfinance as yf
 2 | import datetime as dt
 3 | import pandas as pd
 4 | 
 5 | def sixMonthIndex(tickers, percent):
 6 | 
 7 |     try:
 8 |         start = dt.datetime.today() - dt.timedelta(180)
 9 |         end = dt.datetime.today()
10 |         cl_price = pd.DataFrame()
11 |         
12 |         print('\n')
13 |         for ticker in tickers:
14 |             cl_price[ticker]= yf.download(ticker, start, end, period = "6mo")["Adj Close"]
15 | 
16 |         list6m = cl_price.iloc[-1] / cl_price.iloc[0]
17 |         list6m.sort_values(ascending = False, inplace = True)
18 |         print("\n6 month Index")
19 |         print(list6m)
20 | 
21 |         finalList = list(list6m.index.values)
22 |         finalList = finalList[:int(len(finalList)*percent)]
23 | 
24 |         return finalList
25 | 
26 |     except:
27 |         print("\nError in 6-month Index calculation...")
28 |         return []


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import webscraping
 2 | import dataoperations
 3 | import roc
 4 | import earningsyield
 5 | import fscore
 6 | import index6m
 7 | 
 8 | if __name__ == '__main__':
 9 | 
10 |    #stocks to be filtered
11 |    '''RECOMMENDED: 20+ Stocks to work optimally'''
12 | 
13 |    startingTickers = ["TSLA", "AAPL", "GOOG"]
14 | 
15 |    # startingTickers = ["TSLA", "AAPL", "GOOG", "GME", "AMC", "PLTR", "AMC", "IBM", "AC.TO", "FB", "NFLX", "DDOG", "SPLK", "GM", "MSFT", "LMND", "CM",
16 |    #                   "RY", "TD", "BMO"]
17 | 
18 |    #Retreive and store primitive data
19 |    '''YAHOO FINANCE WEBSCRAPING TEMPORORY BAN ISSUE AWAITING FIX'''
20 |    currentData, pastData, pastPastData = webscraping.getFinancialData(startingTickers)
21 | 
22 |    #data operations
23 |    currentCombined, pastCombined, pastPastCombined = dataoperations.data_cleansing(startingTickers, currentData, pastData, pastPastData)
24 |    del currentData, pastData, pastPastData
25 | 
26 |    #filter 1 (ROC Top 50%)
27 |    rocTickers = roc.rocCalculation(currentCombined, 0.5)
28 | 
29 |    #filter 2 (Earnings Yield Top 20%)
30 |    eyTickers = earningsyield.eyCalculation(currentCombined, 0.2)
31 | 
32 |    #filter 3 (F-Score Top 40%)
33 |    fscoreTickers = fscore.fscoreCalculation(currentCombined, pastCombined, pastPastCombined, startingTickers, 0.4)
34 | 
35 |    #filter 4 (Momentum 6m Top 20%)
36 |    index6mTickers = index6m.sixMonthIndex(startingTickers, 0.2)
37 | 
38 |    finalTickers = []
39 | 
40 |    #stocks that passed the 4 filters will be the remaining stocks
41 |    for ticker in startingTickers:
42 |       if (ticker in rocTickers) and (ticker in eyTickers) and (ticker in index6mTickers) and (ticker in fscoreTickers):
43 |           finalTickers.append(ticker)
44 | 
45 |    print("\nStocks remaining after 4 filters:")
46 |    if len(finalTickers) == 0:
47 |       print("0 stocks passed.\n")
48 |    else:
49 |       print(finalTickers,'\n')


--------------------------------------------------------------------------------
/roc.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | def rocCalculation(currentCombined, percent):
 4 | 
 5 |     try:
 6 |         magic_df = pd.DataFrame()
 7 |         temp_df = pd.DataFrame()
 8 |         temp_df["EBITDA"] = currentCombined.loc["EBITDA",:]
 9 |         temp_df["Depreciation & amortization"] = currentCombined.loc["Depreciation & amortization",:]
10 |         temp_df["Total Current Assets"] = currentCombined.loc["Total Current Assets"]
11 |         temp_df["Total Current Liabilities"] = currentCombined.loc["Total Current Liabilities"]
12 |         temp_df["Net property, plant and equipment"] = currentCombined.loc["Net property, plant and equipment"]
13 |         temp_df = temp_df.dropna()
14 | 
15 |         #ROC
16 |         magic_df["Returns On Capital"] = (temp_df["EBITDA"] - temp_df["Depreciation & amortization"]) / (
17 |                 temp_df["Net property, plant and equipment"] + temp_df["Total Current Assets"] - temp_df["Total Current Liabilities"])
18 |         magic_df["ROC Rank"] = magic_df["Returns On Capital"].rank(ascending=False,na_option='bottom')
19 |         magic_df.sort_values(by=["ROC Rank"], inplace = True)
20 |         print('\n',magic_df.loc[:,["ROC Rank", "Returns On Capital"]])
21 | 
22 |         tickers = magic_df.index.values
23 |         tickers = tickers[:int(len(tickers)*percent)]
24 | 
25 |         # magic_df.drop(magic_df.index[len(tickers)//2:], inplace=True)
26 | 
27 |         return list(tickers)
28 | 
29 |     except:
30 |         print("\nError in ROC calculation...")
31 |         return []


--------------------------------------------------------------------------------
/webscraping.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from bs4 import BeautifulSoup
 3 | 
 4 | def getFinancialData(tickers):
 5 | 	
 6 | 	yahooURL = 'https://ca.finance.yahoo.com/quote/'
 7 | 	currentData = {}
 8 | 	pastData = {}
 9 | 	pastPastData = {}
10 | 
11 | 	print('\n****Value Investing Stock Screener V1.1****\n')
12 | 	for ticker in tickers:
13 | 		try:
14 | 			temp_dir1 = {}
15 | 			temp_dir2 = {}
16 | 			temp_dir3 = {}
17 | 
18 | 			print("Scraping financial data for "+ ticker)
19 | 			# getting balance sheet data from yahoo finance for the given ticker
20 | 			url = yahooURL +ticker+'/balance-sheet?p='+ticker
21 | 			page = requests.get(url)
22 | 			page_content = page.content
23 | 			soup = BeautifulSoup(page_content,'html.parser')
24 | 			tabl = soup.find_all("div", {"class" : "M(0) Whs(n) BdEnd Bdc($seperatorColor) D(itb)"})
25 | 			for t in tabl:
26 | 				rows = t.find_all("div", {"class" : "rw-expnded"})
27 | 				for row in rows:
28 | 					temp_dir1[row.get_text(separator='|').split("|")[0]]=row.get_text(separator='|').split("|")[1]
29 | 					temp_dir2[row.get_text(separator='|').split("|")[0]] = row.get_text(separator='|').split("|")[2]
30 | 					temp_dir3[row.get_text(separator='|').split("|")[0]] = row.get_text(separator='|').split("|")[3]
31 | 
32 | 			#getting income statement data from yahoo finance for the given ticker
33 | 			url = yahooURL +ticker+'/financials?p='+ticker
34 | 			page = requests.get(url)
35 | 			page_content = page.content
36 | 			soup = BeautifulSoup(page_content,'html.parser')
37 | 			tabl = soup.find_all("div", {"class" : "M(0) Whs(n) BdEnd Bdc($seperatorColor) D(itb)"})
38 | 			for t in tabl:
39 | 				rows = t.find_all("div", {"class" : "rw-expnded"})
40 | 				for row in rows:
41 | 					temp_dir1[row.get_text(separator='|').split("|")[0]] = row.get_text(separator='|').split("|")[1]
42 | 					temp_dir2[row.get_text(separator='|').split("|")[0]] = row.get_text(separator='|').split("|")[2]
43 | 					temp_dir3[row.get_text(separator='|').split("|")[0]] = row.get_text(separator='|').split("|")[3]
44 | 
45 | 			#getting cashflow statement data from yahoo finance for the given ticker
46 | 			url = yahooURL +ticker+'/cash-flow?p='+ticker
47 | 			page = requests.get(url)
48 | 			page_content = page.content
49 | 			soup = BeautifulSoup(page_content,'html.parser')
50 | 			tabl = soup.find_all("div", {"class" : "M(0) Whs(n) BdEnd Bdc($seperatorColor) D(itb)"})
51 | 			for t in tabl:
52 | 				rows = t.find_all("div", {"class" : "rw-expnded"})
53 | 				for row in rows:
54 | 					temp_dir1[row.get_text(separator='|').split("|")[0]] = row.get_text(separator='|').split("|")[1]
55 | 					temp_dir2[row.get_text(separator='|').split("|")[0]] = row.get_text(separator='|').split("|")[2]
56 | 					temp_dir3[row.get_text(separator='|').split("|")[0]] = row.get_text(separator='|').split("|")[3]
57 | 
58 | 			#getting key statistics data from yahoo finance for the given ticker
59 | 			url = yahooURL +ticker+'/key-statistics?p='+ticker
60 | 			page = requests.get(url)
61 | 			page_content = page.content
62 | 			soup = BeautifulSoup(page_content,'html.parser')
63 | 			tabl = soup.findAll("div", {"class": "Mstart(a) Mend(a)"})
64 | 			for t in tabl:
65 | 				rows = t.find_all("tr")
66 | 				for row in rows:
67 | 					if len(row.get_text(separator='|').split("|")[0:2])>0:
68 | 						temp_dir1[row.get_text(separator='|').split("|")[0]] = row.get_text(separator='|').split("|")[-1]
69 | 
70 | 			#combining all extracted information with the corresponding ticker
71 | 			currentData[ticker] = temp_dir1
72 | 			pastData[ticker] = temp_dir2
73 | 			pastPastData[ticker] = temp_dir3
74 | 
75 | 		except:
76 | 			print("Error scraping data for " + ticker)
77 | 
78 | 	return currentData, pastData, pastPastData
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------