├── doc
└── pot_stocks.ods
├── input
├── sql_cmd
│ ├── select_notupdated1.txt
│ ├── select_notupdated2.txt
│ ├── select_notupdated3.txt
│ └── clean.txt
├── ms_investment-types.csv
├── pot_stocks.json
├── api.json
├── api00.json
├── symbols.csv
└── ctycodes.csv
├── LICENSE
├── main.py
├── README.md
├── dataframes.py
├── sample_rules_output.csv
├── fetch.py
└── parse.py
/doc/pot_stocks.ods:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Todo/mstables/master/doc/pot_stocks.ods
--------------------------------------------------------------------------------
/input/sql_cmd/select_notupdated1.txt:
--------------------------------------------------------------------------------
1 | SELECT 0, '', Tickers.id, Tickers.ticker FROM Tickers LEFT JOIN (
2 |
3 | SELECT url_id AS UID, ticker_id AS TID, exch_id AS EID FROM Fetched_urls
4 | WHERE UID = {} AND strftime('%Y%W', fetch_date) = strftime('%Y%W', 'now')
5 |
6 | ) on Tickers.id = TID WHERE UID IS NULL
7 |
--------------------------------------------------------------------------------
/input/sql_cmd/select_notupdated2.txt:
--------------------------------------------------------------------------------
1 | SELECT Exchanges.id, Exchanges.exchange_sym, Tickers.id, Tickers.ticker FROM Master LEFT JOIN (
2 |
3 | SELECT url_id AS UID, exch_id AS EID0, ticker_id AS TID0 FROM Fetched_urls
4 | WHERE UID = {} AND strftime('%Y%W', fetch_date) = strftime('%Y%W', 'now')
5 |
6 | ) ON EID0 = Master.exchange_id AND TID0 = Master.ticker_id
7 | JOIN Tickers ON Tickers.id = Master.ticker_id
8 | JOIN Exchanges ON Exchanges.id = Master.exchange_id
9 | WHERE UID IS NULL
10 |
--------------------------------------------------------------------------------
/input/sql_cmd/select_notupdated3.txt:
--------------------------------------------------------------------------------
1 | SELECT Exchanges.id, Exchanges.exchange_sym, Tickers.id, Tickers.ticker FROM Master LEFT JOIN (
2 |
3 | SELECT url_id AS UID, exch_id AS EID0, ticker_id AS TID0 FROM Fetched_urls
4 | WHERE UID = {} AND strftime('%Y%j', fetch_date) = strftime('%Y%j', 'now')
5 |
6 | ) ON EID0 = Master.exchange_id AND TID0 = Master.ticker_id
7 | JOIN Tickers ON Tickers.id = Master.ticker_id
8 | JOIN Exchanges ON Exchanges.id = Master.exchange_id
9 | WHERE UID IS NULL AND Exchanges.exchange_sym in ("XNAS", "XNYS")
10 |
--------------------------------------------------------------------------------
/input/sql_cmd/clean.txt:
--------------------------------------------------------------------------------
1 | delete from Tickers where ticker = '';
2 | delete from Exchanges where exchange_sym = '';
3 | delete from Master where exchange_id in (
4 | select Master.exchange_id
5 | from Master left join Exchanges on Master.exchange_id = Exchanges.id
6 | where Exchanges.exchange_sym is null
7 | );
8 | --UPDATE Fetched_urls SET source_text = NULL WHERE source_text IS NOT NULL;
9 | DELETE FROM Fetched_urls; --WHERE strftime('%Y%W', fetch_date) < strftime('%Y%W', 'now');
10 | /*DELETE FROM Master
11 | WHERE update_date_id in (
12 | SELECT id FROM TimeRefs
13 | WHERE substr(dates, 1, 1) = '2' AND length(dates) = 10 AND strftime('%Y%W', dates) < strftime('%Y%W', 'now')
14 | ORDER BY dates DESC
15 | );*/
16 |
--------------------------------------------------------------------------------
/input/ms_investment-types.csv:
--------------------------------------------------------------------------------
1 |
2 | Code,Investment Type
3 | BK,529 Benchmark
4 | PG,529 Peergroup
5 | CP,529 Plan
6 | CT,529 Portfolio
7 | AG,Aggregate
8 | CA,Category Average
9 | FC,Closed-End Fund
10 | CU,Currency Exchange
11 | SP,Private Fund
12 | UA,Account
13 | EI,Economic Indicators
14 | FE,Exchange-Traded Fund
15 | FG,Euro Fund
16 | F0,Fixed Income
17 | FH,Hedge Fund
18 | H1,HFR Hedge Fund
19 | VS,eVestment Separate Accounts
20 | VH,eVestment Hedge Funds
21 | XI,Index
22 | PS,European Pension/Life Fund Wrappers
23 | FV,Insurance Product Fund
24 | PO,MF Objective
25 | FM,Money Market Fund
26 | FO,Open-End Fund
27 | SA,Separate Account
28 | ST,Stock
29 | V1,UK LP SubAccounts
30 | P1,UK Life and Pension Polices
31 | FI,Unit Investment Trust
32 | VP,VA Policy
33 | VA,VA Subaccount
34 | LP,VL Policy
35 | VL,VL Subaccount
36 | DF,Restricted Investors
37 | IF,Internal Only
38 | S1,UBS Separate Accounts
39 | PI,Special Pooled Funds for Unregistered VA
40 |
--------------------------------------------------------------------------------
/input/pot_stocks.json:
--------------------------------------------------------------------------------
1 | [["ACB" , "CAN"],
2 | ["ACRGF" , "USA"],
3 | ["ACRG.U" , "CAN"],
4 | ["CL" , "CAN"],
5 | ["CNNX" , "CAN"],
6 | ["CURA" , "CAN"],
7 | ["CWEB" , "CAN"],
8 | ["EMH" , "CAN"],
9 | ["FIRE" , "CAN"],
10 | ["GGB" , "CAN"],
11 | ["GLH" , "CAN"],
12 | ["GTII" , "CAN"],
13 | ["HARV" , "CAN"],
14 | ["HEXO" , "CAN"],
15 | ["HIP" , "CAN"],
16 | ["IAN" , "CAN"],
17 | ["ISOL" , "CAN"],
18 | ["LHS" , "CAN"],
19 | ["MJAR" , "CAN"],
20 | ["MMEN" , "CAN"],
21 | ["MPXI" , "CAN"],
22 | ["MPXOF", "USA"],
23 | ["N" , "CAN"],
24 | ["OGI" , "CAN"],
25 | ["OH" , "CAN"],
26 | ["PLTH" , "CAN"],
27 | ["RIV" , "CAN"],
28 | ["SNN" , "CAN"],
29 | ["TER" , "CAN"],
30 | ["TGIF" , "CAN"],
31 | ["TGOD" , "CAN"],
32 | ["TILT" , "CAN"],
33 | ["TRST" , "CAN"],
34 | ["TRUL" , "CAN"],
35 | ["VIVO" , "CAN"],
36 | ["WAYL" , "CAN"],
37 | ["XLY" , "CAN"],
38 | ["APHA" , "USA"],
39 | ["CGC" , "USA"],
40 | ["CRON" , "USA"],
41 | ["CVSI" , "USA"],
42 | ["GRWG" , "USA"],
43 | ["GWPH" , "USA"],
44 | ["IIPR" , "USA"],
45 | ["KSHB" , "USA"],
46 | ["MRMD" , "USA"],
47 | ["TLRY" , "USA"],
48 | ["TRTC" , "USA"]]
49 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Caio Brandao
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/input/api.json:
--------------------------------------------------------------------------------
1 | {
2 | "1":"https://www.morningstar.com/api/v2/search/Securities/500/usquote-us/?q={}{}",
3 | "2":"https://www.morningstar.com/api/v2/search/Securities/500/usquote-noneus/?q={}{}",
4 | "3":"https://www.morningstar.com/api/v2/search/securities/500/usquote-v2/?q={}{}",
5 | "4":"http://quotes.morningstar.com/stockq/c-company-profile?t={}:{}",
6 | "5":"http://quotes.morningstar.com/stockq/c-header?t={}:{}",
7 | "6":"http://financials.morningstar.com/valuate/valuation-history.action?type=price-earnings&t={}:{}&culture=en-US&order=asc",
8 | "7":"http://financials.morningstar.com/finan/financials/getKeyStatPart.html?t={}:{}&culture=en-US&order=asc",
9 | "8":"http://financials.morningstar.com/finan/financials/getFinancePart.html?t={}:{}&culture=en-US&order=asc",
10 | "9":"http://performance.morningstar.com/perform/Performance/stock/exportStockPrice.action?t={}:{}&pd=1yr&freq=d&pg=0&culture=en-US",
11 | "10":"http://financials.morningstar.com/ajax/ReportProcess4HtmlAjax.html?t={}:{}®ion=usa&culture=en-US&reportType=is&period=12&dataType=A&order=asc&columnYear=5&curYearPart=1st5year&rounding=3&number=1",
12 | "11":"http://financials.morningstar.com/ajax/ReportProcess4HtmlAjax.html?t={}:{}®ion=usa&culture=en-US&reportType=is&period=3&dataType=A&order=asc&columnYear=5&curYearPart=1st5year&rounding=3&number=1",
13 | "12":"http://financials.morningstar.com/ajax/ReportProcess4HtmlAjax.html?t={}:{}®ion=usa&culture=en-US&reportType=cf&period=12&dataType=A&order=asc&columnYear=5&curYearPart=1st5year&rounding=3&number=1",
14 | "13":"http://financials.morningstar.com/ajax/ReportProcess4HtmlAjax.html?t={}:{}®ion=usa&culture=en-US&reportType=cf&period=3&dataType=A&order=asc&columnYear=5&curYearPart=1st5year&rounding=3&number=1",
15 | "14":"http://financials.morningstar.com/ajax/ReportProcess4HtmlAjax.html?t={}:{}®ion=usa&culture=en-US&reportType=bs&period=12&dataType=A&order=asc&columnYear=5&curYearPart=1st5year&rounding=3&number=1",
16 | "15":"http://financials.morningstar.com/ajax/ReportProcess4HtmlAjax.html?t={}:{}®ion=usa&culture=en-US&reportType=bs&period=3&dataType=A&order=asc&columnYear=5&curYearPart=1st5year&rounding=3&number=1",
17 | "16":"http://insiders.morningstar.com/insiders/trading/insider-activity-data2.action?&t={}:{}®ion=usa&culture=en-US&cur=&yc=1&tc=&pageSize=100&_=1556547256995"
18 | }
19 |
--------------------------------------------------------------------------------
/input/api00.json:
--------------------------------------------------------------------------------
1 | {
2 | "0":"https://finance.yahoo.com/quote/{}{}",
3 | "1":"https://www.morningstar.com/api/v2/search/Securities/500/usquote-us/?q={}{}",
4 | "2":"https://www.morningstar.com/api/v2/search/Securities/500/usquote-noneus/?q={}{}",
5 | "3":"https://www.morningstar.com/api/v2/search/securities/500/usquote-v2/?q={}{}",
6 | "4":"http://quotes.morningstar.com/stockq/c-company-profile?t={}:{}",
7 | "5":"http://quotes.morningstar.com/stockq/c-header?t={}:{}",
8 | "6":"http://financials.morningstar.com/valuate/valuation-history.action?type=price-earnings&t={}:{}&culture=en-US&order=asc",
9 | "7":"http://financials.morningstar.com/finan/financials/getKeyStatPart.html?t={}:{}&culture=en-US&order=asc",
10 | "8":"http://financials.morningstar.com/finan/financials/getFinancePart.html?t={}:{}&culture=en-US&order=asc",
11 | "9":"http://performance.morningstar.com/perform/Performance/stock/exportStockPrice.action?t={}:{}&pd=1yr&freq=d&pg=0&culture=en-US",
12 | "10":"http://financials.morningstar.com/ajax/ReportProcess4HtmlAjax.html?t={}:{}®ion=usa&culture=en-US&reportType=is&period=12&dataType=A&order=asc&columnYear=5&curYearPart=1st5year&rounding=3&number=1",
13 | "11":"http://financials.morningstar.com/ajax/ReportProcess4HtmlAjax.html?t={}:{}®ion=usa&culture=en-US&reportType=is&period=3&dataType=A&order=asc&columnYear=5&curYearPart=1st5year&rounding=3&number=1",
14 | "12":"http://financials.morningstar.com/ajax/ReportProcess4HtmlAjax.html?t={}:{}®ion=usa&culture=en-US&reportType=cf&period=12&dataType=A&order=asc&columnYear=5&curYearPart=1st5year&rounding=3&number=1",
15 | "13":"http://financials.morningstar.com/ajax/ReportProcess4HtmlAjax.html?t={}:{}®ion=usa&culture=en-US&reportType=cf&period=3&dataType=A&order=asc&columnYear=5&curYearPart=1st5year&rounding=3&number=1",
16 | "14":"http://financials.morningstar.com/ajax/ReportProcess4HtmlAjax.html?t={}:{}®ion=usa&culture=en-US&reportType=bs&period=12&dataType=A&order=asc&columnYear=5&curYearPart=1st5year&rounding=3&number=1",
17 | "15":"http://financials.morningstar.com/ajax/ReportProcess4HtmlAjax.html?t={}:{}®ion=usa&culture=en-US&reportType=bs&period=3&dataType=A&order=asc&columnYear=5&curYearPart=1st5year&rounding=3&number=1",
18 | "16":"http://insiders.morningstar.com/insiders/trading/insider-activity-data2.action?&t={}:{}®ion=usa&culture=en-US&cur=&yc=1&tc=&pageSize=100&_=1556547256995"
19 | }
20 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from shutil import copyfile
4 | from datetime import datetime
5 | from importlib import reload
6 | import fetch, time, os, re, sqlite3
7 |
8 | __author__ = "Caio Brandao"
9 | __copyright__ = "Copyright 2019+, Caio Brandao"
10 | __license__ = "MIT"
11 | __version__ = "0.0"
12 | __maintainer__ = "Caio Brandao"
13 | __email__ = "caiobran88@gmail.com"
14 |
15 |
16 | # Create back-up file under /db/backup
17 | def backup_db(file):
18 | #today = datetime.today().strftime('%Y%m%d%H')
19 | new_file = db_file['db_backup'].format(
20 | input('Enter back-up file name:\n'))
21 | fetch.print_('Please wait while the database file is backed-up ...')
22 | copyfile(db_file['path'], new_file)
23 | return '\n~ Back-up file saved\t{}'.format(new_file)
24 |
25 |
26 | # Change variable for .sqlite file name based on user input
27 | def change_name(old_name):
28 | msg = 'Existing database files in directory \'db/\': {}\n'
29 | msg += 'Enter new name for .sqlite file (current = \'{}\'):\n'
30 | fname = lambda x: re.sub('.sqlite', '', x)
31 | files = [fname(f) for f in os.listdir('db/') if '.sqlite' in f]
32 | return input(msg.format(files, old_name))
33 |
34 |
35 | # Print options menu
36 | def print_menu(names):
37 | gap = 22
38 | dash = '='
39 | banner = ' Welcome to msTables '
40 | file = '\'{}.sqlite\''.format(db_file['name'])
41 | menu = {
42 | '0' : 'Change database file name (current name = {})'.format(file),
43 | '1' : 'Create database tables and import latest symbols',
44 | '2' : 'Download Morningstar data into database',
45 | '3' : 'Erase all records from database tables',
46 | '4' : 'Delete all database tables',
47 | '5' : 'Erase all downloaded history from \'Fetched_urls\' table',
48 | #'X' : 'Parse (FOR TESTING PURPOSES)',
49 | '6' : 'Create a database back-up file'
50 | }
51 |
52 | print(dash * (len(banner) + gap * 2))
53 | print('{}{}{}'.format(dash * gap, banner, dash * gap))
54 | print('\nAvailable actions:\n')
55 | for k, v in menu.items():
56 | print(k, '-', v)
57 | print('\n' + dash * (len(banner) + gap * 2))
58 |
59 | return menu
60 |
61 |
62 | # Print command line menu for user input
63 | def main(file):
64 | while True:
65 |
66 | # Print menu and capture user selection
67 | ops = print_menu(file)
68 | while True:
69 | try:
70 | inp0 = input('Enter action no.:\n').strip()
71 | break
72 | except KeyboardInterrupt:
73 | print('\nGoodbye!')
74 | exit()
75 | if inp0 not in ops.keys(): break
76 | reload(fetch) #Comment out after development
77 | start = time.time()
78 | inp = int(inp0)
79 | ans = 'y'
80 |
81 | # Ask user to confirm selection if input > 2
82 | if inp > 2:
83 | msg = '\nAre you sure you would like to {}? (Y/n):\n'
84 | ans = input(msg.format(ops[inp0].upper())).lower()
85 |
86 | # Call function according to user input
87 | if ans == 'y':
88 | print()
89 | try:
90 | # Change db file name
91 | if inp == 0:
92 | db_file['name'] = change_name(db_file['name'])
93 | start = time.time()
94 | db_file['path'] = db_file['npath'].format(db_file['name'])
95 | msg = ('~ Database file \'{}\' selected'
96 | .format(db_file['name']))
97 |
98 | # Create database tables
99 | elif inp == 1:
100 | msg = fetch.create_tables(db_file['path'])
101 |
102 | # Download data from urls listed in api.json
103 | elif inp == 2:
104 | start = fetch.fetch(db_file['path'])
105 | msg = '\n~ Database updated successfully'
106 |
107 | # Erase records from all tables
108 | elif inp == 3:
109 | msg = fetch.erase_tables(db_file['path'])
110 |
111 | # Delete all tables
112 | elif inp == 4:
113 | msg = fetch.delete_tables(db_file['path'])
114 |
115 | # Delete Fetched_urls table records
116 | elif inp == 5:
117 | msg = fetch.del_fetch_history(db_file['path'])
118 |
119 | # Back-up database file
120 | elif inp == int(list(ops.keys())[-1]):
121 | msg = backup_db(db_file)
122 |
123 | # TESTING
124 | elif inp == 99:
125 | fetch.parse.parse(db_file['path'])
126 | msg = 'FINISHED'
127 | # except sqlite3.OperationalError as S:
128 | # msg = '### Error message - {}'.format(S) + \
129 | # '\n### Scroll up for more details. If table does not ' + \
130 | # 'exist, make sure to execute action 1 before choosing' + \
131 | # ' other actions.'
132 | # pass
133 | # except KeyboardInterrupt:
134 | # print('\nGoodbye!')
135 | # exit()
136 | except Exception as e:
137 | print('\a')
138 | #print('\n\n### Error @ main.py:\n {}\n'.format(e))
139 | raise
140 |
141 | # Print output message
142 | #os.system('clear')
143 | print(msg)
144 |
145 | # Calculate and print execution time
146 | end = time.time()
147 | print('\n~ Execution Time\t{:.2f} sec\n'.format(end - start))
148 | else:
149 | os.system('clear')
150 |
151 |
152 | # Define database (db) file and menu text variables
153 | db_file = dict()
154 | db_file['npath'] = 'db/{}.sqlite'
155 | db_file['name'] = 'mstables'
156 | db_file['path'] = db_file['npath'].format(db_file['name'])
157 | db_file['db_backup'] = 'db/backup/{}.sqlite'
158 |
159 | if __name__ == '__main__':
160 | os.system('clear')
161 | main(db_file)
162 | print('Goodbye!\n\n')
163 |
--------------------------------------------------------------------------------
/input/symbols.csv:
--------------------------------------------------------------------------------
1 | "Country and Currency","Currency Code","Graphic Image ","Font: Code2000","Font: Arial Unicode MS","Unicode: Decimal","Unicode: Hex"," "
2 | "Albania Lek","ALL","","Lek","Lek","76, 101, 107","4c, 65, 6b"," "
3 | "Afghanistan Afghani","AFN","","؋","؋","1547","60b"," "
4 | "Argentina Peso","ARS","","$","$","36","24"," info"
5 | "Aruba Guilder","AWG","","ƒ","ƒ","402","192"," "
6 | "Australia Dollar","AUD","","$","$","36","24"," "
7 | "Azerbaijan Manat","AZN","","₼","₼","8380","20bc"," "
8 | "Bahamas Dollar","BSD","","$","$","36","24"," "
9 | "Barbados Dollar","BBD","","$","$","36","24"," "
10 | "Belarus Ruble","BYN","","Br","Br","66, 114","42, 72"," "
11 | "Belize Dollar","BZD","","BZ$","BZ$","66, 90, 36","42, 5a, 24"," "
12 | "Bermuda Dollar","BMD","","$","$","36","24"," "
13 | "Bolivia Bolíviano","BOB","","$b","$b","36, 98","24, 62"," "
14 | "Bosnia and Herzegovina Convertible Marka","BAM","","KM","KM","75, 77","4b, 4d"," "
15 | "Botswana Pula","BWP","","P","P","80","50"," "
16 | "Bulgaria Lev","BGN","","лв","лв","1083, 1074","43b, 432"," "
17 | "Brazil Real","BRL","","R$","R$","82, 36","52, 24"," info"
18 | "Brunei Darussalam Dollar","BND","","$","$","36","24"," "
19 | "Cambodia Riel","KHR","","៛","៛","6107","17db"," "
20 | "Canada Dollar","CAD","","$","$","36","24"," "
21 | "Cayman Islands Dollar","KYD","","$","$","36","24"," "
22 | "Chile Peso","CLP","","$","$","36","24"," info"
23 | "China Yuan Renminbi","CNY","","¥","¥","165","a5"," info"
24 | "China Yuan Renminbi","RMB","","¥","¥","165","a5"," info"
25 | "Colombia Peso","COP","","$","$","36","24"," "
26 | "Costa Rica Colon","CRC","","₡","₡","8353","20a1"," "
27 | "Croatia Kuna","HRK","","kn","kn","107, 110","6b, 6e"," "
28 | "Cuba Peso","CUP","","₱","₱","8369","20b1"," "
29 | "Czech Republic Koruna","CZK","","Kč","Kč","75, 269","4b, 10d"," "
30 | "Denmark Krone","DKK","","kr","kr","107, 114","6b, 72"," info"
31 | "Dominican Republic Peso","DOP","","RD$","RD$","82, 68, 36","52, 44, 24"," "
32 | "East Caribbean Dollar","XCD","","$","$","36","24"," "
33 | "Egypt Pound","EGP","","£","£","163","a3"," "
34 | "El Salvador Colon","SVC","","$","$","36","24"," "
35 | "Euro Member Countries","EUR","","€","€","8364","20ac"," "
36 | "Falkland Islands (Malvinas) Pound","FKP","","£","£","163","a3"," "
37 | "Fiji Dollar","FJD","","$","$","36","24"," "
38 | "Ghana Cedi","GHS","","¢","¢","162","a2"," "
39 | "Gibraltar Pound","GIP","","£","£","163","a3"," "
40 | "Guatemala Quetzal","GTQ","","Q","Q","81","51"," "
41 | "Guernsey Pound","GGP","","£","£","163","a3"," "
42 | "Guyana Dollar","GYD","","$","$","36","24"," "
43 | "Honduras Lempira","HNL","","L","L","76","4c"," "
44 | "Hong Kong Dollar","HKD","","$","$","36","24"," info"
45 | "Hungary Forint","HUF","","Ft","Ft","70, 116","46, 74"," "
46 | "Iceland Krona","ISK","","kr","kr","107, 114","6b, 72"," "
47 | "India Rupee","INR","","","","",""," info"
48 | "Indonesia Rupiah","IDR","","Rp","Rp","82, 112","52, 70"," "
49 | "Iran Rial","IRR","","﷼","﷼","65020","fdfc"," "
50 | "Isle of Man Pound","IMP","","£","£","163","a3"," "
51 | "Israel Shekel","ILS","","₪","₪","8362","20aa"," "
52 | "Jamaica Dollar","JMD","","J$","J$","74, 36","4a, 24"," "
53 | "Japan Yen","JPY","","¥","¥","165","a5"," info"
54 | "Jersey Pound","JEP","","£","£","163","a3"," "
55 | "Kazakhstan Tenge","KZT","","лв","лв","1083, 1074","43b, 432"," "
56 | "Korea (North) Won","KPW","","₩","₩","8361","20a9"," "
57 | "Korea (South) Won","KRW","","₩","₩","8361","20a9"," "
58 | "Kyrgyzstan Som","KGS","","лв","лв","1083, 1074","43b, 432"," "
59 | "Laos Kip","LAK","","₭","₭","8365","20ad"," "
60 | "Lebanon Pound","LBP","","£","£","163","a3"," "
61 | "Liberia Dollar","LRD","","$","$","36","24"," "
62 | "Macedonia Denar","MKD","","ден","ден","1076, 1077, 1085","434, 435, 43d"," "
63 | "Malaysia Ringgit","MYR","","RM","RM","82, 77","52, 4d"," "
64 | "Mauritius Rupee","MUR","","₨","₨","8360","20a8"," "
65 | "Mexico Peso","MXN","","$","$","36","24"," info"
66 | "Mongolia Tughrik","MNT","","₮","₮","8366","20ae"," "
67 | "Mozambique Metical","MZN","","MT","MT","77, 84","4d, 54"," "
68 | "Namibia Dollar","NAD","","$","$","36","24"," "
69 | "Nepal Rupee","NPR","","₨","₨","8360","20a8"," "
70 | "Netherlands Antilles Guilder","ANG","","ƒ","ƒ","402","192"," "
71 | "New Zealand Dollar","NZD","","$","$","36","24"," "
72 | "Nicaragua Cordoba","NIO","","C$","C$","67, 36","43, 24"," "
73 | "Nigeria Naira","NGN","","₦","₦","8358","20a6"," "
74 | "Norway Krone","NOK","","kr","kr","107, 114","6b, 72"," "
75 | "Oman Rial","OMR","","﷼","﷼","65020","fdfc"," "
76 | "Pakistan Rupee","PKR","","₨","₨","8360","20a8"," "
77 | "Panama Balboa","PAB","","B/.","B/.","66, 47, 46","42, 2f, 2e"," "
78 | "Paraguay Guarani","PYG","","Gs","Gs","71, 115","47, 73"," "
79 | "Peru Sol","PEN","","S/.","S/.","83, 47, 46","53, 2f, 2e"," info"
80 | "Philippines Peso","PHP","","₱","₱","8369","20b1"," "
81 | "Poland Zloty","PLN","","zł","zł","122, 322","7a, 142"," "
82 | "Qatar Riyal","QAR","","﷼","﷼","65020","fdfc"," "
83 | "Romania Leu","RON","","lei","lei","108, 101, 105","6c, 65, 69"," "
84 | "Russia Ruble","RUB","","₽","₽","8381","20bd"," "
85 | "Saint Helena Pound","SHP","","£","£","163","a3"," "
86 | "Saudi Arabia Riyal","SAR","","﷼","﷼","65020","fdfc"," "
87 | "Serbia Dinar","RSD","","Дин.","Дин.","1044, 1080, 1085, 46","414, 438, 43d, 2e"," "
88 | "Seychelles Rupee","SCR","","₨","₨","8360","20a8"," "
89 | "Singapore Dollar","SGD","","$","$","36","24"," "
90 | "Solomon Islands Dollar","SBD","","$","$","36","24"," "
91 | "Somalia Shilling","SOS","","S","S","83","53"," "
92 | "South Africa Rand","ZAR","","R","R","82","52"," "
93 | "Sri Lanka Rupee","LKR","","₨","₨","8360","20a8"," "
94 | "Sweden Krona","SEK","","kr","kr","107, 114","6b, 72"," info"
95 | "Switzerland Franc","CHF","","CHF","CHF","67, 72, 70","43, 48, 46"," "
96 | "Suriname Dollar","SRD","","$","$","36","24"," "
97 | "Syria Pound","SYP","","£","£","163","a3"," "
98 | "Taiwan New Dollar","TWD","","NT$","NT$","78, 84, 36","4e, 54, 24"," info"
99 | "Thailand Baht","THB","","฿","฿","3647","e3f"," "
100 | "Trinidad and Tobago Dollar","TTD","","TT$","TT$","84, 84, 36","54, 54, 24"," "
101 | "Turkey Lira","TRY","","","","",""," info"
102 | "Tuvalu Dollar","TVD","","$","$","36","24"," "
103 | "Ukraine Hryvnia","UAH","","₴","₴","8372","20b4"," "
104 | "United Kingdom Pound","GBP","","£","£","163","a3"," "
105 | "United States Dollar","USD","","$","$","36","24"," "
106 | "Uruguay Peso","UYU","","$U","$U","36, 85","24, 55"," "
107 | "Uzbekistan Som","UZS","","лв","лв","1083, 1074","43b, 432"," "
108 | "Venezuela Bolívar","VEF","","Bs","Bs","66, 115","42, 73"," "
109 | "Viet Nam Dong","VND","","₫","₫","8363","20ab"," "
110 | "Yemen Rial","YER","","﷼","﷼","65020","fdfc"," "
111 | "Zimbabwe Dollar","ZWD","","Z$","Z$","90, 36","5a, 24"," "
112 |
--------------------------------------------------------------------------------
/input/ctycodes.csv:
--------------------------------------------------------------------------------
1 | "COUNTRY","A2 (ISO)","A3 (UN)","NUM (UN)","DIALING CODE"
2 | "Afghanistan","AF","AFG","4","93"
3 | "Albania","AL","ALB","8","355"
4 | "Algeria","DZ","DZA","12","213"
5 | "American Samoa","AS","ASM","16","1-684"
6 | "Andorra","AD","AND","20","376"
7 | "Angola","AO","AGO","24","244"
8 | "Anguilla","AI","AIA","660","1-264"
9 | "Antarctica","AQ","ATA","10","672"
10 | "Antigua and Barbuda","AG","ATG","28","1-268"
11 | "Argentina","AR","ARG","32","54"
12 | "Armenia","AM","ARM","51","374"
13 | "Aruba","AW","ABW","533","297"
14 | "Australia","AU","AUS","36","61"
15 | "Austria","AT","AUT","40","43"
16 | "Azerbaijan","AZ","AZE","31","994"
17 | "Bahamas","BS","BHS","44","1-242"
18 | "Bahrain","BH","BHR","48","973"
19 | "Bangladesh","BD","BGD","50","880"
20 | "Barbados","BB","BRB","52","1-246"
21 | "Belarus","BY","BLR","112","375"
22 | "Belgium","BE","BEL","56","32"
23 | "Belize","BZ","BLZ","84","501"
24 | "Benin","BJ","BEN","204","229"
25 | "Bermuda","BM","BMU","60","1-441"
26 | "Bhutan","BT","BTN","64","975"
27 | "Bolivia","BO","BOL","68","591"
28 | "Bonaire","BQ","BES","535","599"
29 | "Bosnia and Herzegovina","BA","BIH","70","387"
30 | "Botswana","BW","BWA","72","267"
31 | "Bouvet Island","BV","BVT","74","47"
32 | "Brazil","BR","BRA","76","55"
33 | "British Indian Ocean Territory","IO","IOT","86","246"
34 | "Brunei Darussalam","BN","BRN","96","673"
35 | "Bulgaria","BG","BGR","100","359"
36 | "Burkina Faso","BF","BFA","854","226"
37 | "Burundi","BI","BDI","108","257"
38 | "Cambodia","KH","KHM","116","855"
39 | "Cameroon","CM","CMR","120","237"
40 | "Canada","CA","CAN","124","1"
41 | "Cape Verde","CV","CPV","132","238"
42 | "Cayman Islands","KY","CYM","136","1-345"
43 | "Central African Republic","CF","CAF","140","236"
44 | "Chad","TD","TCD","148","235"
45 | "Chile","CL","CHL","152","56"
46 | "China","CN","CHN","156","86"
47 | "Christmas Island","CX","CXR","162","61"
48 | "Cocos (Keeling) Islands","CC","CCK","166","61"
49 | "Colombia","CO","COL","170","57"
50 | "Comoros","KM","COM","174","269"
51 | "Congo","CG","COG","178","242"
52 | "Democratic Republic of the Congo","CD","COD","180","243"
53 | "Cook Islands","CK","COK","184","682"
54 | "Costa Rica","CR","CRI","188","506"
55 | "Croatia","HR","HRV","191","385"
56 | "Cuba","CU","CUB","192","53"
57 | "Curacao","CW","CUW","531","599"
58 | "Cyprus","CY","CYP","196","357"
59 | "Czech Republic","CZ","CZE","203","420"
60 | "Cote d'Ivoire","CI","CIV","384","225"
61 | "Denmark","DK","DNK","208","45"
62 | "Djibouti","DJ","DJI","262","253"
63 | "Dominica","DM","DMA","212","1-767"
64 | "Dominican Republic","DO","DOM","214","1-809,1-829,1-849"
65 | "Ecuador","EC","ECU","218","593"
66 | "Egypt","EG","EGY","818","20"
67 | "El Salvador","SV","SLV","222","503"
68 | "Equatorial Guinea","GQ","GNQ","226","240"
69 | "Eritrea","ER","ERI","232","291"
70 | "Estonia","EE","EST","233","372"
71 | "Ethiopia","ET","ETH","231","251"
72 | "Falkland Islands (Malvinas)","FK","FLK","238","500"
73 | "Faroe Islands","FO","FRO","234","298"
74 | "Fiji","FJ","FJI","242","679"
75 | "Finland","FI","FIN","246","358"
76 | "France","FR","FRA","250","33"
77 | "French Guiana","GF","GUF","254","594"
78 | "French Polynesia","PF","PYF","258","689"
79 | "French Southern Territories","TF","ATF","260","262"
80 | "Gabon","GA","GAB","266","241"
81 | "Gambia","GM","GMB","270","220"
82 | "Georgia","GE","GEO","268","995"
83 | "Germany","DE","DEU","276","49"
84 | "Ghana","GH","GHA","288","233"
85 | "Gibraltar","GI","GIB","292","350"
86 | "Greece","GR","GRC","300","30"
87 | "Greenland","GL","GRL","304","299"
88 | "Grenada","GD","GRD","308","1-473"
89 | "Guadeloupe","GP","GLP","312","590"
90 | "Guam","GU","GUM","316","1-671"
91 | "Guatemala","GT","GTM","320","502"
92 | "Guernsey","GG","GGY","831","44"
93 | "Guinea","GN","GIN","324","224"
94 | "Guinea-Bissau","GW","GNB","624","245"
95 | "Guyana","GY","GUY","328","592"
96 | "Haiti","HT","HTI","332","509"
97 | "Heard Island and McDonald Islands","HM","HMD","334","672"
98 | "Holy See (Vatican City State)","VA","VAT","336","379"
99 | "Honduras","HN","HND","340","504"
100 | "Hong Kong","HK","HKG","344","852"
101 | "Hungary","HU","HUN","348","36"
102 | "Iceland","IS","ISL","352","354"
103 | "India","IN","IND","356","91"
104 | "Indonesia","ID","IDN","360","62"
105 | "Iran, Islamic Republic of","IR","IRN","364","98"
106 | "Iraq","IQ","IRQ","368","964"
107 | "Ireland","IE","IRL","372","353"
108 | "Isle of Man","IM","IMN","833","44"
109 | "Israel","IL","ISR","376","972"
110 | "Italy","IT","ITA","380","39"
111 | "Jamaica","JM","JAM","388","1-876"
112 | "Japan","JP","JPN","392","81"
113 | "Jersey","JE","JEY","832","44"
114 | "Jordan","JO","JOR","400","962"
115 | "Kazakhstan","KZ","KAZ","398","7"
116 | "Kenya","KE","KEN","404","254"
117 | "Kiribati","KI","KIR","296","686"
118 | "Korea, Democratic People's Republic of","KP","PRK","408","850"
119 | "Korea, Republic of","KR","KOR","410","82"
120 | "Kuwait","KW","KWT","414","965"
121 | "Kyrgyzstan","KG","KGZ","417","996"
122 | "Lao People's Democratic Republic","LA","LAO","418","856"
123 | "Latvia","LV","LVA","428","371"
124 | "Lebanon","LB","LBN","422","961"
125 | "Lesotho","LS","LSO","426","266"
126 | "Liberia","LR","LBR","430","231"
127 | "Libya","LY","LBY","434","218"
128 | "Liechtenstein","LI","LIE","438","423"
129 | "Lithuania","LT","LTU","440","370"
130 | "Luxembourg","LU","LUX","442","352"
131 | "Macao","MO","MAC","446","853"
132 | "Macedonia, the Former Yugoslav Republic of","MK","MKD","807","389"
133 | "Madagascar","MG","MDG","450","261"
134 | "Malawi","MW","MWI","454","265"
135 | "Malaysia","MY","MYS","458","60"
136 | "Maldives","MV","MDV","462","960"
137 | "Mali","ML","MLI","466","223"
138 | "Malta","MT","MLT","470","356"
139 | "Marshall Islands","MH","MHL","584","692"
140 | "Martinique","MQ","MTQ","474","596"
141 | "Mauritania","MR","MRT","478","222"
142 | "Mauritius","MU","MUS","480","230"
143 | "Mayotte","YT","MYT","175","262"
144 | "Mexico","MX","MEX","484","52"
145 | "Micronesia, Federated States of","FM","FSM","583","691"
146 | "Moldova, Republic of","MD","MDA","498","373"
147 | "Monaco","MC","MCO","492","377"
148 | "Mongolia","MN","MNG","496","976"
149 | "Montenegro","ME","MNE","499","382"
150 | "Montserrat","MS","MSR","500","1-664"
151 | "Morocco","MA","MAR","504","212"
152 | "Mozambique","MZ","MOZ","508","258"
153 | "Myanmar","MM","MMR","104","95"
154 | "Namibia","NA","NAM","516","264"
155 | "Nauru","NR","NRU","520","674"
156 | "Nepal","NP","NPL","524","977"
157 | "Netherlands","NL","NLD","528","31"
158 | "New Caledonia","NC","NCL","540","687"
159 | "New Zealand","NZ","NZL","554","64"
160 | "Nicaragua","NI","NIC","558","505"
161 | "Niger","NE","NER","562","227"
162 | "Nigeria","NG","NGA","566","234"
163 | "Niue","NU","NIU","570","683"
164 | "Norfolk Island","NF","NFK","574","672"
165 | "Northern Mariana Islands","MP","MNP","580","1-670"
166 | "Norway","NO","NOR","578","47"
167 | "Oman","OM","OMN","512","968"
168 | "Pakistan","PK","PAK","586","92"
169 | "Palau","PW","PLW","585","680"
170 | "Palestine, State of","PS","PSE","275","970"
171 | "Panama","PA","PAN","591","507"
172 | "Papua New Guinea","PG","PNG","598","675"
173 | "Paraguay","PY","PRY","600","595"
174 | "Peru","PE","PER","604","51"
175 | "Philippines","PH","PHL","608","63"
176 | "Pitcairn","PN","PCN","612","870"
177 | "Poland","PL","POL","616","48"
178 | "Portugal","PT","PRT","620","351"
179 | "Puerto Rico","PR","PRI","630","1"
180 | "Qatar","QA","QAT","634","974"
181 | "Romania","RO","ROU","642","40"
182 | "Russian Federation","RU","RUS","643","7"
183 | "Rwanda","RW","RWA","646","250"
184 | "Reunion","RE","REU","638","262"
185 | "Saint Barthelemy","BL","BLM","652","590"
186 | "Saint Helena","SH","SHN","654","290"
187 | "Saint Kitts and Nevis","KN","KNA","659","1-869"
188 | "Saint Lucia","LC","LCA","662","1-758"
189 | "Saint Martin (French part)","MF","MAF","663","590"
190 | "Saint Pierre and Miquelon","PM","SPM","666","508"
191 | "Saint Vincent and the Grenadines","VC","VCT","670","1-784"
192 | "Samoa","WS","WSM","882","685"
193 | "San Marino","SM","SMR","674","378"
194 | "Sao Tome and Principe","ST","STP","678","239"
195 | "Saudi Arabia","SA","SAU","682","966"
196 | "Senegal","SN","SEN","686","221"
197 | "Serbia","RS","SRB","688","381"
198 | "Seychelles","SC","SYC","690","248"
199 | "Sierra Leone","SL","SLE","694","232"
200 | "Singapore","SG","SGP","702","65"
201 | "Sint Maarten (Dutch part)","SX","SXM","534","1-721"
202 | "Slovakia","SK","SVK","703","421"
203 | "Slovenia","SI","SVN","705","386"
204 | "Solomon Islands","SB","SLB","90","677"
205 | "Somalia","SO","SOM","706","252"
206 | "South Africa","ZA","ZAF","710","27"
207 | "South Georgia and the South Sandwich Islands","GS","SGS","239","500"
208 | "South Sudan","SS","SSD","728","211"
209 | "Spain","ES","ESP","724","34"
210 | "Sri Lanka","LK","LKA","144","94"
211 | "Sudan","SD","SDN","729","249"
212 | "Suriname","SR","SUR","740","597"
213 | "Svalbard and Jan Mayen","SJ","SJM","744","47"
214 | "Swaziland","SZ","SWZ","748","268"
215 | "Sweden","SE","SWE","752","46"
216 | "Switzerland","CH","CHE","756","41"
217 | "Syrian Arab Republic","SY","SYR","760","963"
218 | "Taiwan","TW","TWN","158","886"
219 | "Tajikistan","TJ","TJK","762","992"
220 | "United Republic of Tanzania","TZ","TZA","834","255"
221 | "Thailand","TH","THA","764","66"
222 | "Timor-Leste","TL","TLS","626","670"
223 | "Togo","TG","TGO","768","228"
224 | "Tokelau","TK","TKL","772","690"
225 | "Tonga","TO","TON","776","676"
226 | "Trinidad and Tobago","TT","TTO","780","1-868"
227 | "Tunisia","TN","TUN","788","216"
228 | "Turkey","TR","TUR","792","90"
229 | "Turkmenistan","TM","TKM","795","993"
230 | "Turks and Caicos Islands","TC","TCA","796","1-649"
231 | "Tuvalu","TV","TUV","798","688"
232 | "Uganda","UG","UGA","800","256"
233 | "Ukraine","UA","UKR","804","380"
234 | "United Arab Emirates","AE","ARE","784","971"
235 | "United Kingdom","GB","GBR","826","44"
236 | "United States","US","USA","840","1"
237 | "United States Minor Outlying Islands","UM","UMI","581","1"
238 | "Uruguay","UY","URY","858","598"
239 | "Uzbekistan","UZ","UZB","860","998"
240 | "Vanuatu","VU","VUT","548","678"
241 | "Venezuela","VE","VEN","862","58"
242 | "Viet Nam","VN","VNM","704","84"
243 | "British Virgin Islands","VG","VGB","92","1-284"
244 | "US Virgin Islands","VI","VIR","850","1-340"
245 | "Wallis and Futuna","WF","WLF","876","681"
246 | "Western Sahara","EH","ESH","732","212"
247 | "Yemen","YE","YEM","887","967"
248 | "Zambia","ZM","ZMB","894","260"
249 | "Zimbabwe","ZW","ZWE","716","263"
250 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | mstables
2 | ========
3 |
4 | msTables is a [MorningStar.com](https://www.morningstar.com) scraper written in python that fetches, parses and stores financial and market data for over 70k securities into a relational SQLite database. The scraper provides a Command Line Interface (CLI) that allows the user greater flexibility for creating and managing multiple *.sqlite* files. Once data has been downloaded into the database files, [dataframes.py](dataframes.py) module can be used to easily create DataFrame objects from the database tables for further analysis.
5 |
6 | The scraper should work as long as the structure of the responses does not change for the URL's used. See [input/api.json](input/api.json) for the complete list of URL's.
7 |
8 | IMPORTANT: The Morningstar.com data is protected under "Copyright (c) 2018 Morningstar. All rights reserved." This tool should be for personal purposes only. See the following links for more information regarding Morningstar.com terms & conditions:
9 | - [Copyright][2]
10 | - [User Agreement][3]
11 |
12 | ## Motivation
13 | As a fan of [Benjamin Graham](https://en.wikipedia.org/wiki/Benjamin_Graham)'s [value investing](https://en.wikipedia.org/wiki/Value_investing), I have always searched for sources of consolidated financial data that would allow me to identify 'undervalued' companies from a large pool of global public stocks. However, most *(if not all)* financial services that provide such data consolidation are not free and, as a small retail investor, I was not willing to pay for their fees. In fact, most of the data I needed was already available for free on various financial website, just not in a consolidated format. Therefore, I decided to create a web scraper for [MorningStar.com](https://www.morningstar.com), which is the website that I found to have the most available data in a more standardized and structured format. MS was also one of the only website services that published free financial performance data for the past 10 yrs, while most sites only provided free data for last 5 yrs.
14 |
15 | ## Next steps
16 | - Finalize instructions for the scraper CLI
17 |
18 |
19 | Instructions
20 | ------------
21 |
22 | ### Program Requirements
23 | The scraper should run on any Linux distribution that has Python3 and the following modules installed:
24 |
25 | - [Beautiful Soup](https://www.crummy.com/software/BeautifulSoup/)
26 | - [requests](http://docs.python-requests.org/en/master/)
27 | - [sqlite3](https://docs.python.org/3/library/sqlite3.html)
28 | - [pandas](https://pandas.pydata.org/)
29 | - [numpy](http://www.numpy.org/)
30 | - [multiprocessing](https://docs.python.org/3/library/multiprocessing.html?highlight=multiprocessing#module-multiprocessing)
31 |
32 | To view the [notebook with data visualization examples][1] mentioned in the instructions below, you must also have [Jupyter](https://jupyter.org/) and [matplotlib](https://matplotlib.org/) installed.
33 |
34 | ### Installation
35 | Open a Linux terminal in the desired installation directory and execute `git clone https://github.com/caiobran/msTables.git` to download the project files.
36 |
37 | ### Using the scraper Command Line Interface (CLI)
38 |
39 | Execute `python main.py` from the project root directory to start the scraper CLI. If the program has started correctly, you should see the following interface:
40 |
41 |
42 |
43 | 1. If you are running the scraper for the first time, enter option `1` to create the initial SQLite database tables.
44 | 2. Once that action has been completed, and on subsequent runs, enter option `2` to download the latest data from the MorningStar [URL's](input/api.json).
45 | - You will be prompted to enter the number of records you would like to update. You can enter a large number such as `1000000` if you would like the scraper to update all records. You may also enter smaller quantities if you do not want the scraper to run for a long period of time.
46 | - On average, it has taken about three days to update all records with the current program parameters and an Internet speed > 100mbps. The program can be interrupted at any time using Ctrl+C.
47 | - One may want to increase the size of the multiprocessing pool in [main.py](main.py) that is used for URL requests to speed up the scraper. *However, I do not recommend doing that as the MorningStar servers will not be too happy about receiving many simultaneous GET requests from the same IP address.*
48 |
49 | *(documentation in progress, to be updated with instructions on remaining actions)*
50 |
51 | ### How to access the SQLite database tables using module _dataframes.py_
52 | The scraper will automatically create a directory *db/* in the root folder to store the *.sqlite* database files generated. The current file name in use will be displayed on the scraper CLI under action `0` (see CLI figure above). Database files will contain a relational database with the following main tables:
53 |
54 | **Database Tables**
55 |
56 | - _**Master**_: Main bridge table with complete list of security and exchange symbol pairs, security name, sector, industry, security type, and FY end dates
57 | - _**MSheader**_: Quote Summary data with day hi, day lo, 52wk hi, 52wk lo, forward P/E, div. yield, volumes, and current P/B, P/S, and P/CF ratios
58 | - _**MSvaluation**_: 10yr stock valuation indicators (P/E, P/S, P/B, P/C)
59 | - _**MSfinancials**_: Key performance ratios for past 10 yrs
60 | - _**MSratio_cashflow**_, _**MSratio_financial**_, _**MSratio_growth**_, _**MSratio_profitability**_, _**MSratio_efficiency**_: Financial performance ratios for past 10 yrs
61 | - _**MSreport_is_yr**_, _**MSreport_is_qt**_: Income Statements for past 5 yrs and 5 qtrs, respectively
62 | - _**MSreport_bs_yr**_, _**MSreport_bs_qt**_: Balance Sheets for past 5 yrs and 5 qtrs, respectively
63 | - _**MSreport_cf_yr**_, _**MSreport_cf_qt**_: Cash Flow Statements for past 5 yrs and 5 qtrs, respectively
64 | - _**MSpricehistory**_: Current 50, 100 and 200 day price averages and 10 year price history (price history is compressed)
65 | - _**InsiderTransactions**_: Insider transactions for the past year from [http://insiders.morningstar.com](http://insiders.morningstar.com) (+600k transactions)
66 |
67 | **How to slice and dice the data using dataframes.py**
68 |
69 | Module _dataframes_ contains a class that can be used to generate pandas DataFrames for the data in the SQLite database file that is generated by the web crawler.
70 |
71 | See Jupyter notebook [data_overview.ipynb][1] for examples on how to create DataFrame objects to manipulate and visualize the data. Below is a list of all content found in the notebook:
72 |
73 | **Juypter Notebook Content**
74 |
75 | 1. [Required modules and matplotlib backend][1]
76 | 1. [Creating a master (bridge table) DataFrame instance using the DataFrames class][1]
77 | 1. [Methods for creating DataFrame instances][1]
78 | 1. `quoteheader` - [MorningStar (MS) Quote Header][1]
79 | 1. `valuation` - [MS Valuation table with Price Ratios (P/E, P/S, P/B, P/C) for the past 10 yrs][1]
80 | 1. `keyratios` - [MS Ratio - Key Financial Ratios & Values][1]
81 | 1. `finhealth` - [MS Ratio - Financial Health][1]
82 | 1. `profitability` - [MS Ratio - Profitability][1]
83 | 1. `growth` - [MS Ratio - Growth][1]
84 | 1. `cfhealth` - [MS Ratio - Cash Flow Health][1]
85 | 1. `efficiency` - [MS Ratio - Efficiency][1]
86 | 1. `annualIS` - [MS Annual Income Statements][1]
87 | 1. `quarterlyIS` - [MS Quarterly Income Statements][1]
88 | 1. `annualBS` - [MS Annual Balance Sheets][1]
89 | 1. `quarterlyBS` - [MS Quarterly Balance Sheets][1]
90 | 1. `annualCF` - [MS Annual Cash Flow Statements][1]
91 | 1. `quarterlyCF` - [MS Quarterly Cash Flow Statements][1]
92 | 1. `insider_trades` - [Insider transactions for the past year][1]
93 | 1. [Performing statistical analysis][1]
94 | 1. [Count of database records][1]
95 | 1. [Last updated dates][1]
96 | 1. [Number of records by security type][1]
97 | 1. [Number of records by country, based on of exchanges][1]
98 | 1. [Number of records per exchange][1]
99 | 1. [Number of stocks by sector][1]
100 | 1. [Number of stocks by industry][1]
101 | 1. [Mean price ratios (P/E, P/S, P/B, P/CF) of stocks by sectors][1]
102 | 1. [Applying various criteria to filter common stocks][1]
103 | 1. [CAGR > 7% for past 7 years][1]
104 | 1. [No earnings deficit (loss) for past 5 or 7 years][1]
105 | 1. [Uniterrupted and increasing Dividends for past 5 yrs][1]
106 | 1. [P/E Ratio of 25 or less for the past 7 yrs and less then 20 for TTM][1]
107 | 1. [Growth for the past year][1]
108 | 1. [Long-term debt < 50% of total capital][1] *(pending)*
109 | 1. [Stocks with insider buys in the past 3 months][1]
110 |
111 | **Below are sample snip-its of code from [data_overview.ipynb][1]:**
112 |
113 | - Count of records downloaded from Morningstar.com by security type:
114 |
115 |
116 | - Plot of average US stocks P/E by sector for the past 10 years:
117 |
118 |
119 | - Applying fundamental rules to screen the list of stocks ([see sample output](https://github.com/caiobran/mstables/blob/master/sample_rules_output.ods)):
120 |
121 |
122 |
123 |
124 |
125 | MIT License
126 | -----------
127 |
128 | Copyright (c) 2019 Caio Brandao
129 |
130 | Permission is hereby granted, free of charge, to any person obtaining a copy
131 | of this software and associated documentation files (the "Software"), to deal
132 | in the Software without restriction, including without limitation the rights
133 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
134 | copies of the Software, and to permit persons to whom the Software is
135 | furnished to do so, subject to the following conditions:
136 |
137 | The above copyright notice and this permission notice shall be included in all
138 | copies or substantial portions of the Software.
139 |
140 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
141 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
142 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
143 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
144 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
145 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
146 | SOFTWARE.
147 |
148 | [1]:https://github.com/caiobran/msTables/blob/master/data_overview.ipynb
149 | [2]:https://www.morningstar.com/about/copyright.html
150 | [3]:https://www.morningstar.com/about/user-agreement.html
151 |
--------------------------------------------------------------------------------
/dataframes.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import sqlite3
4 | import json
5 | import sys
6 | import re
7 | import os
8 |
9 |
10 | class DataFrames():
11 |
12 | db_file = 'db/mstables.sqlite' # Standard db file name
13 |
14 | def __init__(self, file = db_file):
15 |
16 | msg = 'Creating initial DataFrames objects from file {}...\n'
17 | print(msg.format(file))
18 |
19 | self.conn = sqlite3.connect(
20 | file, detect_types=sqlite3.PARSE_COLNAMES)
21 | self.cur = self.conn.cursor()
22 |
23 | # Row Headers
24 | colheaders = self.table('ColHeaders', True)
25 | self.colheaders = colheaders.set_index('id')
26 |
27 | # Dates and time references
28 | timerefs = self.table('TimeRefs', True)
29 | self.timerefs = timerefs.set_index('id').replace(['', '—'], None)
30 |
31 | # Reference tables
32 | self.urls = self.table('URLs', True)
33 | self.securitytypes = self.table('SecurityTypes', True)
34 | self.tickers = self.table('Tickers', True)
35 | self.sectors = self.table('Sectors', True)
36 | self.industries = self.table('Industries', True)
37 | self.styles = self.table('StockStyles', True)
38 | self.exchanges = self.table('Exchanges', True)
39 | self.countries = (self.table('Countries', True)
40 | .rename(columns={'a2_iso':'country_c2', 'a3_un':'country_c3'}))
41 | self.companies = self.table('Companies', True)
42 | self.currencies = self.table('Currencies', True)
43 | self.stocktypes = self.table('StockTypes', True)
44 |
45 | #self.fetchedurls = self.table('Fetched_urls', True)
46 |
47 | # Master table
48 | self.master0 = self.table('Master', True)
49 |
50 | # Merge Tables
51 | self.master = (self.master0
52 | # Ticker Symbols
53 | .merge(self.tickers, left_on='ticker_id', right_on='id')
54 | .drop(['id'], axis=1)
55 | # Company / Security Name
56 | .merge(self.companies, left_on='company_id', right_on='id')
57 | .drop(['id', 'company_id'], axis=1)
58 | # Exchanges
59 | .merge(self.exchanges, left_on='exchange_id', right_on='id')
60 | .drop(['id'], axis=1)
61 | # Industries
62 | .merge(self.industries, left_on='industry_id', right_on='id')
63 | .drop(['id', 'industry_id'], axis=1)
64 | # Sectors
65 | .merge(self.sectors, left_on='sector_id', right_on='id')
66 | .drop(['id', 'sector_id'], axis=1)
67 | # Countries
68 | .merge(self.countries, left_on='country_id', right_on='id')
69 | .drop(['id', 'country_id'], axis=1)
70 | # Security Types
71 | .merge(self.securitytypes, left_on='security_type_id', right_on='id')
72 | .drop(['id', 'security_type_id'], axis=1)
73 | # Stock Types
74 | .merge(self.stocktypes, left_on='stock_type_id', right_on='id')
75 | .drop(['id', 'stock_type_id'], axis=1)
76 | # Stock Style Types
77 | .merge(self.styles, left_on='style_id', right_on='id')
78 | .drop(['id', 'style_id'], axis=1)
79 | # Quote Header Info
80 | .merge(self.quoteheader(), on=['ticker_id', 'exchange_id'])
81 | .rename(columns={'fpe':'PE_Forward'})
82 | # Currency
83 | .merge(self.currencies, left_on='currency_id', right_on='id')
84 | .drop(['id', 'currency_id'], axis=1)
85 | # Fiscal Year End
86 | .merge(self.timerefs, left_on='fyend_id', right_on='id')
87 | .drop(['fyend_id'], axis=1)
88 | .rename(columns={'dates':'fy_end'})
89 | )
90 | # Change date columns to TimeFrames
91 | self.master['fy_end'] = pd.to_datetime(self.master['fy_end'])
92 | self.master['update_date'] = pd.to_datetime(self.master['update_date'])
93 | self.master['lastdate'] = pd.to_datetime(self.master['lastdate'])
94 | self.master['_52wk_hi'] = self.master['_52wk_hi'].astype('float')
95 | self.master['_52wk_lo'] = self.master['_52wk_lo'].astype('float')
96 | self.master['lastprice'] = self.master['lastprice'].astype('float')
97 | self.master['openprice'] = self.master['openprice'].astype('float')
98 |
99 | print('\nInitial DataFrames created successfully.')
100 |
101 |
102 | def quoteheader(self):
103 | return self.table('MSheader')
104 |
105 |
106 | def valuation(self):
107 | # Create DataFrame
108 | val = self.table('MSvaluation')
109 |
110 | # Rename column headers with actual year values
111 | yrs = val.iloc[0, 2:13].replace(self.timerefs['dates']).to_dict()
112 | cols = val.columns[:13].values.tolist() + list(map(
113 | lambda col: ''.join([col[:3], yrs[col[3:]]]), val.columns[13:]))
114 | val.columns = cols
115 |
116 | # Resize and reorder columns
117 | val = val.set_index(['exchange_id', 'ticker_id']).iloc[:, 11:]
118 |
119 | return val
120 |
121 |
122 | def keyratios(self):
123 | keyr = self.table('MSfinancials')
124 | yr_cols = ['Y0', 'Y1', 'Y2', 'Y3', 'Y4', 'Y5', 'Y6',
125 | 'Y7', 'Y8', 'Y9', 'Y10']
126 | keyr = self.get_yrcolumns(keyr, yr_cols)
127 | keyr[yr_cols[:-1]] = keyr[yr_cols[:-1]].astype('datetime64')
128 |
129 | return keyr
130 |
131 |
132 | def finhealth(self):
133 | finan = self.table('MSratio_financial')
134 | yr_cols = [col for col in finan.columns if col.startswith('fh_Y')]
135 | finan = self.get_yrcolumns(finan, yr_cols)
136 | finan[yr_cols[:-1]] = finan[yr_cols[:-1]].astype('datetime64')
137 |
138 | return finan
139 |
140 |
141 | def profitability(self):
142 | profit= self.table('MSratio_profitability')
143 | yr_cols = [col for col in profit.columns if col.startswith('pr_Y')]
144 | profit = self.get_yrcolumns(profit, yr_cols)
145 | profit[yr_cols[:-1]] = profit[yr_cols[:-1]].astype('datetime64')
146 |
147 | return profit
148 |
149 |
150 | def growth(self):
151 | growth = self.table('MSratio_growth')
152 | yr_cols = [col for col in growth.columns if col.startswith('gr_Y')]
153 | growth = self.get_yrcolumns(growth, yr_cols)
154 | growth[yr_cols[:-1]] = growth[yr_cols[:-1]].astype('datetime64')
155 |
156 | return growth
157 |
158 |
159 | def cfhealth(self):
160 | cfhealth = self.table('MSratio_cashflow')
161 | yr_cols = [col for col in cfhealth.columns if col.startswith('cf_Y')]
162 | cfhealth = self.get_yrcolumns(cfhealth, yr_cols)
163 | cfhealth[yr_cols[:-1]] = cfhealth[yr_cols[:-1]].astype('datetime64')
164 |
165 | return cfhealth
166 |
167 |
168 | def efficiency(self):
169 | effic = self.table('MSratio_efficiency')
170 | yr_cols = [col for col in effic.columns if col.startswith('ef_Y')]
171 | effic = self.get_yrcolumns(effic, yr_cols)
172 | effic[yr_cols[:-1]] = effic[yr_cols[:-1]].astype('datetime64')
173 |
174 | return effic
175 |
176 | # Income Statement - Annual
177 | def annualIS(self):
178 | rep_is_yr = self.table('MSreport_is_yr')
179 | yr_cols = [col for col in rep_is_yr.columns
180 | if col.startswith('Year_Y')]
181 | rep_is_yr = self.get_yrcolumns(rep_is_yr, yr_cols)
182 | rep_is_yr[yr_cols[:-1]] = rep_is_yr[yr_cols[:-1]].astype('datetime64')
183 |
184 | return rep_is_yr
185 |
186 | # Income Statement - Quarterly
187 | def quarterlyIS(self):
188 | rep_is_qt = self.table('MSreport_is_qt')
189 | yr_cols = [col for col in rep_is_qt.columns
190 | if col.startswith('Year_Y')]
191 | rep_is_qt = self.get_yrcolumns(rep_is_qt, yr_cols)
192 | rep_is_qt[yr_cols[:-1]] = rep_is_qt[yr_cols[:-1]].astype('datetime64')
193 |
194 | return rep_is_qt
195 |
196 | # Balance Sheet - Annual
197 | def annualBS(self):
198 | rep_bs_yr = self.table('MSreport_bs_yr')
199 | yr_cols = [col for col in rep_bs_yr.columns
200 | if col.startswith('Year_Y')]
201 | rep_bs_yr = self.get_yrcolumns(rep_bs_yr, yr_cols)
202 | rep_bs_yr[yr_cols[:-1]] = rep_bs_yr[yr_cols[:-1]].astype('datetime64')
203 |
204 | return rep_bs_yr
205 |
206 | # Balance Sheet - Quarterly
207 | def quarterlyBS(self):
208 | rep_bs_qt = self.table('MSreport_bs_qt')
209 | yr_cols = [col for col in rep_bs_qt.columns
210 | if col.startswith('Year_Y')]
211 | rep_bs_qt = self.get_yrcolumns(rep_bs_qt, yr_cols)
212 | rep_bs_qt[yr_cols[:-1]] = rep_bs_qt[yr_cols[:-1]].astype('datetime64')
213 |
214 | return rep_bs_qt
215 |
216 | # Cashflow Statement - Annual
217 | def annualCF(self):
218 | rep_cf_yr = self.table('MSreport_cf_yr')
219 | yr_cols = [col for col in rep_cf_yr.columns
220 | if col.startswith('Year_Y')]
221 | rep_cf_yr = self.get_yrcolumns(rep_cf_yr, yr_cols)
222 | rep_cf_yr[yr_cols[:-1]] = rep_cf_yr[yr_cols[:-1]].astype('datetime64')
223 |
224 | return rep_cf_yr
225 |
226 | # Cashflow Statement - Quarterly
227 | def quarterlyCF(self):
228 | rep_cf_qt = self.table('MSreport_cf_qt')
229 | yr_cols = [col for col in rep_cf_qt.columns
230 | if col.startswith('Year_Y')]
231 | rep_cf_qt = self.get_yrcolumns(rep_cf_qt, yr_cols)
232 | rep_cf_qt[yr_cols[:-1]] = rep_cf_qt[yr_cols[:-1]].astype('datetime64')
233 |
234 | return rep_cf_qt
235 |
236 | # 10yr Price History
237 | def priceHistory(self):
238 |
239 | return self.table('MSpricehistory')
240 |
241 |
242 | def insider_trades(self):
243 | df_insiders = self.table('Insiders', False)
244 | df_tradetypes = self.table('TransactionType', False)
245 | df_trades = self.table('InsiderTransactions', False)
246 | df_trades['date'] = pd.to_datetime(df_trades['date'])
247 | df = (df_trades
248 | .merge(df_insiders, left_on='name_id', right_on='id')
249 | .drop(['id', 'name_id'], axis=1)
250 | .merge(df_tradetypes, left_on='transaction_id', right_on='id')
251 | .drop(['id', 'transaction_id'], axis=1)
252 | )
253 | return df
254 |
255 |
256 | def get_yrcolumns(self, df, cols):
257 | for yr in cols:
258 | df = (df.merge(self.timerefs, left_on=yr, right_on='id')
259 | .drop(yr, axis=1).rename(columns={'dates':yr}))
260 |
261 | return df
262 |
263 |
264 | def table(self, tbl, prnt = False):
265 | self.cur.execute('SELECT * FROM {}'.format(tbl))
266 | cols = list(zip(*self.cur.description))[0]
267 |
268 | try:
269 | if prnt == True:
270 | msg = '\t- DataFrame \'df.{}\' ...'
271 | print(msg.format(tbl.lower()))
272 | return pd.DataFrame(self.cur.fetchall(), columns=cols)
273 | except:
274 | raise
275 |
276 |
277 | def __del__(self):
278 | self.cur.close()
279 | self.conn.close()
280 |
--------------------------------------------------------------------------------
/sample_rules_output.csv:
--------------------------------------------------------------------------------
1 | company,sector,industry,openprice,yield,avevol,CAGR_Rev,CAGR_OpeInc,CAGR_OpeCF,CAGR_FreeCF,Dividend_Y10,Rev_Growth_Y9,OpeInc_Growth_Y9,NetInc_Growth_Y9,PE_TTM,PB_TTM,PS_TTM,PC_TTM
2 | AB Sagax A,Real Estate,Real Estate Services,16.0,1.1,,19.6,18.1,31.0,31.0,1.8,20.2,17.2,6.8,12.7,2.8,17.8,25.2
3 | AF Poyry AB B,Industrials,Engineering & Construction,96.5,2.7,11383.0,10.9,16.4,15.5,15.7,4.7,10.4,20.7,14.2,18.4,2.7,1.1,18.1
4 | AVI Ltd,Consumer Defensive,Packaged Foods,5.8,4.5,,7.8,10.8,12.7,24.6,4.4,1.9,7.0,7.9,18.8,6.6,2.3,15.0
5 | Adler Real Estate AG,Real Estate,Real Estate Services,12.8,0.3,136030.0,86.2,91.3,64.1,62.9,0.0,41.9,49.1,109.5,3.6,0.7,2.3,7.6
6 | Ado Properties SA,Real Estate,Real Estate Services,46.5,1.3,54560.0,46.5,43.1,47.3,47.0,0.6,20.2,17.4,8.7,5.3,1.0,13.2,19.8
7 | Alimentation Couche-Tard Inc Class B,Consumer Defensive,Grocery Stores,52.9,0.5,,7.7,19.0,13.3,9.7,0.3,35.6,21.1,38.4,17.0,3.7,0.6,10.6
8 | Apple Inc,Technology,Consumer Electronics,184.9,1.4,30578092.1,9.2,7.7,7.6,7.5,2.9,15.9,15.6,23.1,16.4,8.0,3.8,12.9
9 | Armanino Foods of Distinction Inc,Consumer Defensive,Packaged Foods,3.3,2.8,19498.0,7.7,9.9,18.5,10.8,0.1,7.3,12.9,23.2,17.0,5.9,2.6,16.0
10 | Ascendas India Trust,Real Estate,Real Estate - General,0.9,,29988.0,8.3,12.2,12.6,12.4,0.0,20.1,22.8,37.5,6.0,1.4,6.4,8.0
11 | BOC Aviation Ltd,Industrials,Airports & Air Services,22.6,3.6,15000000.0,14.6,18.7,15.5,7.8,0.3,23.5,27.0,5.8,9.6,1.4,3.7,3.4
12 | Barratt Developments PLC,Consumer Cyclical,Residential Construction,7.1,4.5,,13.3,27.9,25.4,25.3,0.3,4.8,7.9,9.1,8.6,1.4,1.2,9.2
13 | Barratt Developments PLC ADR,Consumer Cyclical,Residential Construction,15.3,7.6,430.0,13.3,27.9,25.4,25.3,0.9,4.8,7.9,9.1,8.3,1.3,1.2,8.9
14 | Beijing North Star Co Ltd Class H,Real Estate,Real Estate - General,1.0,4.4,18000000.0,26.5,22.8,17.4,19.9,0.1,15.6,48.7,4.3,4.8,0.5,0.4,3.3
15 | Bellway PLC,Consumer Cyclical,Residential Construction,652.6,4.5,339100.0,21.6,34.0,33.9,34.4,1.4,15.6,14.2,14.5,7.1,1.4,1.2,13.4
16 | Billington Holdings PLC,Industrials,Engineering & Construction,290.1,3.9,16678.0,14.9,38.0,20.1,24.6,0.1,6.0,12.9,15.6,8.9,1.6,0.5,7.8
17 | Build King Holdings Ltd,Industrials,Engineering & Construction,0.8,2.0,36000000.0,23.5,98.3,24.0,29.6,0.0,5.3,111.7,123.7,4.4,2.0,0.3,2.8
18 | CK Infrastructure Holdings Ltd,Industrials,Infrastructure Operations,18.5,3.7,21000000.0,7.3,36.6,7.1,8.2,2.4,18.8,55.0,1.8,14.1,1.4,23.2,42.8
19 | CK Infrastructure Holdings Ltd ADR,Industrials,Infrastructure Operations,40.0,3.8,926.0,7.3,36.6,7.1,8.2,12.0,18.8,55.0,1.8,14.2,1.4,23.4,43.1
20 | CPL Resources PLC,Industrials,Staffing & Outsourcing Services,565.0,2.3,702.0,9.6,8.4,38.0,44.7,0.1,14.8,16.2,20.1,10.0,1.8,0.3,7.9
21 | Canadian Apartment Properties Real Estate Investment Trust,Real Estate,REIT - Residential,47.9,2.8,376599.0,7.6,7.9,10.6,17.4,1.3,7.8,10.2,45.5,5.7,1.2,10.0,16.0
22 | Castellum AB,Real Estate,Real Estate - General,47.8,2.1,122351.0,11.4,13.1,12.6,12.6,5.7,7.6,10.9,26.8,5.8,1.2,8.3,16.2
23 | Central Asia Metals PLC,Basic Materials,Copper,2.6,6.4,,30.4,26.9,19.1,20.3,0.2,87.6,65.4,32.0,9.5,1.6,2.7,6.3
24 | Central China Land Media Co Ltd,Basic Materials,Chemicals,8.0,2.2,41000000.0,25.6,25.4,37.3,43.2,0.2,10.1,15.7,5.9,11.1,1.1,0.9,14.4
25 | Chengdu Fusen Noble-House Industrial Co Ltd A,Consumer Cyclical,Home Furnishings & Fixtures,25.0,4.4,26000000.0,14.1,15.8,7.9,11.6,0.6,12.9,11.4,12.9,8.8,1.4,4.6,6.8
26 | China Lesso Group Holdings Ltd,Basic Materials,Building Materials,0.6,4.9,,12.7,12.7,24.0,59.9,0.2,16.6,22.4,8.7,5.8,1.0,0.6,3.3
27 | China Maple Leaf Educational Systems Ltd Shs Unitary 144A/Reg S,Consumer Defensive,Education & Training Services,0.4,2.5,151.3,23.3,28.3,24.6,60.7,0.1,23.8,20.2,32.2,16.4,2.6,6.6,11.8
28 | China National Building Material Co Ltd Class H ADR,Basic Materials,Building Materials,47.0,1.7,2065.0,13.2,9.6,33.0,98.5,5.2,71.6,73.4,133.4,6.6,0.6,0.2,1.1
29 | China Resources Land Ltd,Real Estate,Real Estate - General,9.9,2.8,67500038.0,16.8,28.4,13.9,13.6,0.8,22.3,33.7,27.5,8.3,1.4,1.7,7.4
30 | China Resources Land Ltd ADR,Real Estate,Real Estate - General,43.6,2.9,477.0,16.8,28.4,13.9,13.6,8.5,22.3,33.7,27.5,8.4,1.4,1.7,7.5
31 | China Resources Pharmaceutical Group Ltd,Healthcare,Drug Manufacturers - Specialty & Generic,3.7,0.9,22000000.0,10.2,8.0,15.0,41.5,0.1,9.9,14.9,15.9,17.4,1.8,0.4,8.5
32 | China Sunsine Chemical Holdings Ltd,Consumer Cyclical,Rubber & Plastics,0.8,2.1,0.0,14.1,41.0,33.6,101.8,0.1,19.9,37.2,87.9,4.7,1.2,0.9,3.5
33 | Citic Telecom International Holdings Ltd,Communication Services,Telecom Services,1.3,5.2,84000000.0,9.5,27.8,11.1,15.0,0.2,27.0,11.7,7.9,12.0,1.3,1.2,6.3
34 | Daiwa House Industry Co Ltd,Real Estate,Real Estate - General,24.7,3.5,0.0,13.6,22.1,18.4,13.9,110.2,8.1,12.0,17.2,8.4,1.3,0.5,6.6
35 | Daiwa House Industry Co Ltd ADR,Real Estate,Real Estate - General,28.0,3.6,25515.0,13.6,22.1,18.4,13.9,109.6,8.1,12.0,17.2,8.5,1.3,0.5,6.7
36 | Dream Global Real Estate Investment Trust,Real Estate,REIT - Office,9.0,5.8,7122.8,10.0,9.8,13.2,13.2,0.7,34.1,40.1,97.1,4.5,0.9,7.3,16.2
37 | Elanders AB Class B,Industrials,Business Services,8.6,3.2,,38.7,29.6,28.9,50.5,2.6,15.0,45.1,54.4,11.9,1.1,0.3,3.8
38 | Envea SA,Technology,Scientific & Technical Instruments,70.0,0.9,936.0,11.2,19.1,17.6,18.5,0.6,14.3,58.2,80.0,11.8,1.7,1.2,13.1
39 | Faes Farma SA,Healthcare,Medical Instruments & Supplies,4.0,3.5,48.0,12.5,16.8,13.0,11.7,0.1,18.1,23.3,22.2,19.7,3.0,3.3,18.3
40 | Fuyao Glass Industry Group Co Ltd,Consumer Cyclical,Auto Parts,14.1,6.4,184000000.0,12.0,15.0,15.4,18.4,1.5,8.1,22.3,30.9,14.6,2.9,3.0,11.2
41 | Gima TT SpA Ordinary Shares,Industrials,Diversified Industrials,6.9,6.0,337693.0,56.2,78.3,14.9,11.8,0.4,20.9,17.8,17.5,12.0,11.0,3.3,34.4
42 | HIRATA Corp,Industrials,Diversified Industrials,59.7,3.0,,19.8,83.5,24.6,20.6,100.0,16.9,13.6,13.2,16.0,1.7,1.0,29.1
43 | Hexpol AB B,Basic Materials,Specialty Chemicals,6.8,2.6,125.0,11.4,11.4,8.1,8.2,2.0,12.6,8.3,7.8,15.2,2.7,1.8,14.1
44 | Ho Bee Land Ltd,Real Estate,Real Estate Services,1.6,3.1,,7.2,39.3,26.2,27.9,0.1,19.6,34.2,8.3,6.2,0.5,8.5,7.5
45 | Hon Kwok Land Investment Co Ltd,Real Estate,Real Estate - General,4.1,3.0,87512.0,49.6,9.2,22.6,22.4,0.1,13.1,35.6,409.5,3.3,0.3,1.5,
46 | Howden Joinery Group PLC,Consumer Cyclical,Home Furnishings & Fixtures,509.0,2.2,21000000.0,9.6,10.9,13.4,13.6,0.1,7.7,2.4,2.9,16.3,5.4,2.1,19.0
47 | Howden Joinery Group PLC ADR,Consumer Cyclical,Home Furnishings & Fixtures,27.0,2.2,4948.0,9.6,10.9,13.4,13.6,0.4,7.7,2.4,2.9,16.7,5.6,2.1,19.5
48 | ISDN Holdings Ltd,Industrials,Engineering & Construction,0.6,2.7,109310.0,12.9,23.6,11.4,14.9,0.0,3.4,41.0,14.6,8.0,0.6,0.3,7.0
49 | Intrum AB,Financial Services,Credit Services,77.7,4.0,75566.0,24.0,26.2,21.7,21.8,9.5,41.5,42.5,29.0,14.2,1.3,2.2,5.2
50 | Intrum AB ADR,Financial Services,Credit Services,25.9,8.2,742.0,24.0,26.2,21.7,21.8,9.3,41.5,42.5,29.0,14.6,1.3,2.3,5.3
51 | Jones Lang LaSalle Inc,Real Estate,Real Estate Services,146.2,0.5,307288.0,29.6,14.0,15.6,19.4,0.8,105.7,31.4,90.6,14.6,1.9,0.4,11.7
52 | Klovern AB B,Real Estate,Real Estate Services,1.2,0.9,,8.7,8.8,13.7,13.5,0.4,11.4,5.7,28.1,3.6,0.6,3.1,13.2
53 | Knowit AB,Technology,Information Technology Services,88.3,2.5,569.0,9.3,27.1,20.9,21.0,4.8,12.8,12.3,14.7,19.0,3.9,1.4,15.5
54 | Kruk SA,Financial Services,Credit Services,172.5,2.0,2587.0,21.1,30.9,61.1,56.4,5.0,0.2,17.6,11.8,14.7,2.6,4.6,
55 | Link Real Estate Investment Trust,Real Estate,REIT - Retail,27.0,2.8,23500107.0,9.0,10.5,9.1,9.1,2.5,8.3,8.9,169.7,4.4,1.1,19.7,33.3
56 | Loomis AB B,Industrials,Business Services,96.4,2.6,10011.0,11.0,14.6,16.8,19.9,9.0,11.3,3.9,7.7,16.4,3.1,1.3,10.0
57 | Macfarlane Group PLC,Consumer Cyclical,Packaging & Containers,99.8,2.1,200301.0,8.5,14.9,32.0,27.2,0.0,10.9,19.2,17.7,18.6,2.6,0.7,13.7
58 | Marine Products Corp,Consumer Cyclical,Recreational Vehicles,13.8,2.6,,12.2,28.5,18.1,18.5,0.4,11.7,18.9,47.6,18.6,7.1,1.8,23.5
59 | Midea Group Co Ltd Class A,Technology,Consumer Electronics,51.4,2.3,390000000.0,16.5,29.5,22.6,22.9,1.2,7.9,64.0,17.0,17.2,4.2,1.3,12.5
60 | Morguard Corp,Real Estate,Real Estate Services,193.8,0.3,1157.0,17.8,16.3,10.8,9.7,0.6,5.5,2.3,3.1,6.9,0.6,1.9,7.6
61 | NSD Co Ltd,Technology,Software - Application,21.8,4.0,,9.7,12.0,14.2,15.6,52.0,5.2,10.4,18.2,19.9,2.5,1.9,20.7
62 | Nobility Homes Inc,Consumer Cyclical,Residential Construction,20.4,4.4,,17.7,43.1,21.7,21.7,0.2,14.0,31.4,50.0,16.1,1.7,2.0,10.7
63 | Nolato AB B,Technology,Communication Equipment,42.5,2.8,,12.4,18.1,15.3,10.4,12.5,20.6,21.8,26.2,16.1,4.5,1.4,11.1
64 | Nordic Waterproofing Holding A/S,Basic Materials,Building Materials,8.4,4.5,,9.9,9.4,16.8,16.2,3.8,22.6,8.4,11.1,14.2,2.0,0.8,11.7
65 | Northview Apartment Real Estate Investment Trust,Real Estate,REIT - Residential,28.2,5.8,127433.0,15.8,15.6,13.4,33.6,1.6,9.7,11.7,36.4,6.3,1.1,5.0,13.1
66 | OEM International AB B,Industrials,Diversified Industrials,19.2,2.9,,13.3,15.2,10.1,12.6,6.0,13.6,14.6,16.1,19.6,5.2,1.6,22.4
67 | PRO Real Estate Investment Trust,Real Estate,REIT - Diversified,2.3,9.1,100178.0,83.0,87.2,69.5,69.5,0.2,38.0,48.3,86.7,10.5,1.1,4.8,14.3
68 | Packaging Corp of America,Consumer Cyclical,Packaging & Containers,99.2,3.2,925768.0,13.9,15.8,14.2,11.0,3.0,8.8,16.8,10.4,12.0,3.5,1.3,7.9
69 | RPC Group PLC,Consumer Cyclical,Packaging & Containers,9.0,3.5,,28.9,40.9,35.0,50.7,0.3,36.4,76.5,92.3,12.6,1.6,0.8,9.0
70 | Sabra Health Care REIT Inc,Real Estate,REIT - Healthcare Facilities,17.1,9.1,,35.8,29.3,42.1,42.1,1.8,53.7,36.8,76.2,12.8,1.1,5.5,9.6
71 | SalMar ASA,Consumer Defensive,Farm Products,127.8,5.0,20675.0,12.7,19.8,20.3,17.9,19.0,5.1,71.3,56.9,12.3,4.8,3.9,15.8
72 | Shanghai Environment Group Ltd,Industrials,Waste Management,14.6,0.5,73000000.0,39.2,62.4,28.0,63.3,0.1,0.7,11.9,14.2,13.8,1.3,3.1,8.0
73 | Shanghai Pharmaceuticals Holding Co Ltd A,Healthcare,Medical Distribution,19.6,1.9,260000000.0,15.3,20.3,26.4,77.3,0.4,21.6,46.6,10.2,14.0,1.4,0.3,17.5
74 | Shanghai Pharmaceuticals Holding Co Ltd Class H,Healthcare,Medical Distribution,6.7,2.7,43000000.0,15.3,20.3,26.4,77.3,0.7,21.6,46.6,10.2,9.9,1.0,0.2,12.4
75 | Singapore Shipping Corp Ltd,Industrials,Shipping & Ports,0.2,3.5,,18.1,13.2,12.1,12.1,0.0,6.4,18.4,22.0,8.8,1.0,1.9,3.5
76 | Slate Office REIT,Real Estate,REIT - Office,4.3,12.1,9934.0,49.6,45.0,43.7,32.0,0.8,38.0,48.4,55.2,5.4,0.7,2.0,8.5
77 | Softronic AB B,Technology,Information Technology Services,1.8,4.2,,11.3,51.9,16.7,18.3,0.8,43.7,312.5,1.1,18.6,3.6,1.0,14.8
78 | Somero Enterprises Inc Shs Reg-S,Industrials,Farm & Construction Equipment,4.0,4.3,,15.9,37.0,24.6,26.9,0.2,9.8,14.7,17.0,11.5,4.5,2.6,10.4
79 | Stanley Electric Co Ltd,Consumer Cyclical,Auto Parts,23.8,1.7,,10.9,14.1,19.9,65.3,49.0,13.8,25.7,25.5,12.0,1.4,1.1,6.8
80 | Stellus Capital Investment Corp,Financial Services,Asset Management,14.3,9.5,138403.0,12.8,15.5,8.1,8.1,1.4,34.4,33.0,15.8,4.7,1.2,4.3,
81 | Suzuki Motor Corp,Consumer Cyclical,Auto Manufacturers,40.8,1.5,1175.0,7.8,20.9,18.6,55.7,81.0,18.5,40.3,34.9,9.6,1.6,0.6,5.3
82 | TDC Soft Inc,Technology,Software - Infrastructure,6.8,2.6,0.0,7.0,14.7,17.1,22.8,17.5,4.2,12.5,7.2,13.2,1.9,0.8,15.9
83 | TK Group (Holdings) Ltd,Consumer Cyclical,Rubber & Plastics,5.3,3.4,404392.0,13.9,18.0,21.8,21.7,0.2,23.5,18.0,16.0,12.6,4.1,1.9,9.0
84 | Takara Leben Co Ltd,Real Estate,Real Estate - General,2.7,4.7,,11.3,14.6,62.4,95.5,16.0,7.0,21.7,20.6,4.8,0.9,0.3,2.7
85 | Thor Industries Inc,Consumer Cyclical,Recreational Vehicles,62.8,2.3,799712.0,20.8,23.6,26.4,22.1,1.5,14.9,12.9,14.9,15.0,1.9,0.5,6.4
86 | Trelleborg AB B,Industrials,Diversified Industrials,51.2,3.0,37690.0,9.6,12.1,16.5,17.0,8.1,7.7,10.2,11.0,13.6,1.4,1.2,11.0
87 | UnitedHealth Group Inc,Healthcare,Health Care Plans,236.2,1.5,2069.0,13.1,12.5,17.6,19.2,3.4,12.4,12.6,13.5,18.1,4.1,1.0,21.6
88 | Vidrala SA,Consumer Cyclical,Packaging & Containers,82.0,1.3,,15.0,14.6,18.7,12.5,0.3,16.1,28.6,30.1,18.3,3.5,2.2,10.5
89 | Walgreens Boots Alliance Inc,Consumer Defensive,Pharmaceutical Retailers,47.7,3.1,0.0,12.7,11.7,14.0,17.4,1.7,11.3,14.8,23.2,10.0,2.0,0.4,8.2
90 | Warehouses De Pauw,Real Estate,REIT - Industrial,135.2,3.5,20803.0,15.5,15.1,14.6,15.6,4.5,20.6,20.0,39.8,9.3,1.9,15.2,17.0
91 | Winnebago Industries Inc,Consumer Cyclical,Recreational Vehicles,33.8,1.2,581877.0,20.2,29.9,52.7,55.8,0.4,30.4,30.0,43.5,10.5,2.0,0.6,9.2
92 | Wuxi Little Swan Co Ltd,Technology,Consumer Electronics,51.5,2.4,436489.0,22.0,41.0,23.7,22.6,1.0,10.5,50.2,23.6,14.7,3.2,1.2,11.4
93 | Yamaya Corp,Consumer Defensive,Beverages - Brewers,16.7,2.1,,7.1,15.8,38.8,74.1,44.0,1.0,41.0,93.7,6.5,0.7,0.1,3.7
94 | Yuzhou Properties Co Ltd,Real Estate,Real Estate - General,1.2,7.7,49500000.0,26.6,27.1,8.0,61.3,0.3,12.0,0.2,25.6,4.6,0.9,0.6,4.9
95 | ZENKOKU HOSHO Co Ltd,Financial Services,Credit Services,30.3,4.3,,9.6,23.8,14.0,14.1,80.0,10.2,10.8,12.9,11.3,2.3,6.3,8.0
96 | ZENKOKU HOSHO Co Ltd ADR,Financial Services,Credit Services,11.6,2.0,1436.0,9.6,23.8,14.0,14.1,25.6,10.2,10.8,12.9,12.4,2.5,6.9,8.8
97 | Zhejiang Hangmin Co Ltd,Consumer Cyclical,Textile Manufacturing,10.7,2.6,87000000.0,20.8,14.2,15.8,12.9,0.3,115.5,50.8,15.4,11.2,1.8,1.0,6.7
98 |
--------------------------------------------------------------------------------
/fetch.py:
--------------------------------------------------------------------------------
1 | import xml.etree.ElementTree as ET
2 | import multiprocessing as mp
3 | from datetime import datetime
4 | from importlib import reload #Comment out once done using
5 | from csv import reader
6 | import numpy as np
7 | import pandas as pd
8 | import requests, sqlite3, time, json, zlib, re, os, parse
9 |
10 |
11 | def create_tables(db_file):
12 |
13 | def mssitemap():
14 | urls1 = []
15 | xml_files = [
16 | 'sal-quote-stock-sitemap.xml', 'sal-quote-cefs-sitemap.xml',
17 | 'sal-quote-funds-sitemap.xml', 'sal-quote-etfs-sitemap.xml'
18 | ]
19 | url = 'https://www.morningstar.com/sitemaps/individual/{}'
20 |
21 | for xml_file in xml_files:
22 | type = re.findall('sal-quote-(.+?)-sitemap', xml_file)[0]
23 |
24 | print('\nFetching list of {} from MorningStar.com'.format(type))
25 | xml = requests.get(url.format(xml_file)).text
26 |
27 | print('Parsing list of {}'.format(type))
28 | tree = ET.fromstring(xml)
29 | url_tag = '{http://www.sitemaps.org/schemas/sitemap/0.9}url'
30 | loc_tag = '{http://www.sitemaps.org/schemas/sitemap/0.9}loc'
31 | urls2 = tree.findall('{}/{}'.format(url_tag, loc_tag))
32 |
33 | print('List of {} length = {}'.format(type, len(urls2)))
34 |
35 | def get_ticker(u, typ):
36 | while True:
37 | try:
38 | x = re.findall('/{}/(.+)/'.format(typ),
39 | u)[0].split('/')[1].upper()
40 | if x.find(' ') > 0:
41 | x = ''
42 | break
43 | except IndexError:
44 | typ = 'stocks'
45 | except:
46 | raise
47 |
48 | return x
49 |
50 | urls1 += [(get_ticker(url.text, type),) for url in urls2]
51 | #urls1 += [(url.text,) for url in urls2]
52 |
53 | print('\nTotal length = {}'.format(len(urls1)))
54 | return urls1
55 |
56 | # Create database connection
57 | print('\nPlease wait, database tables are being created ...')
58 | conn = sqlite3.connect(db_file)
59 | conn.execute('pragma auto_vacuum = 1')
60 | cur = conn.cursor()
61 |
62 | # Create database tables based on table.json
63 | for table in tbl_names:
64 | columns = ' '.join(['{} {}'.format(k, v) for k, v in tbl_js[table].items()])
65 | sql = 'CREATE TABLE IF NOT EXISTS {} ({})'.format(table, columns)
66 | db_execute(cur, sql)
67 |
68 | std_list = [
69 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
70 | 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
71 | '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
72 | ]
73 | sql = 'INSERT OR IGNORE INTO tickers (ticker) VALUES (?)'
74 | cur.executemany(sql, mssitemap() + std_list)
75 |
76 | # Insert list of tickers and exchanges previously retrieved into database
77 | file = 'ticker_exch.json'
78 | if file in os.listdir(fd_input):
79 | with open(fd_input + file) as fi:
80 | tbls = json.load(fi)
81 | for tbl in tbls:
82 | if tbl == 'Tickers':
83 | col = '(id, ticker)'
84 | val = '(?, ?)'
85 | elif tbl == 'Exchanges':
86 | col = '(id, exchange, exchange_sym, country_id)'
87 | val = '(?, ?, ?, ?)'
88 | elif tbl == 'Master':
89 | col = '(ticker_id, exchange_id)'
90 | val = '(?, ?)'
91 | sql = 'INSERT OR IGNORE INTO {} {} VALUES {}'.format(tbl, col, val)
92 | cur.executemany(sql, tbls[tbl])
93 |
94 | # Insert list of countries into Countries table
95 | sql = '''INSERT OR IGNORE INTO Countries
96 | (country, a2_iso, a3_un) VALUES (?, ?, ?)'''
97 | cur.executemany(sql, csv_content('input/ctycodes.csv', 3))
98 |
99 | # Insert list of currencies into Currencies table
100 | sql = '''INSERT OR IGNORE INTO Currencies (currency, currency_code)
101 | VALUES (?, ?)'''
102 | cur.executemany(sql, csv_content('input/symbols.csv', 2))
103 |
104 | # Insert list of types into SecurityTypes table
105 | sql = '''INSERT OR IGNORE INTO SecurityTypes
106 | (security_type_code, security_type) VALUES (?, ?)'''
107 | cur.executemany(sql, csv_content('input/ms_investment-types.csv', 2))
108 |
109 | # Insert list of api URLs into URLs table
110 | for k, v in apis.items():
111 | sql = sql_insert('URLs', '(id, url)', (k, v))
112 | db_execute(cur, sql)
113 |
114 | save_db(conn)
115 | cur.close()
116 | conn.close()
117 |
118 | msg = '\n~ The following {} database tables were successfully created:\n'
119 | tbls = json.dumps(sorted(tbl_names), indent=2)
120 | tbls = re.sub('\[|\]|",|"\n', '', tbls)
121 | msg += re.sub('"', '- ', tbls)
122 | return msg.format(len(tbl_names))
123 |
124 |
125 | def csv_content(file, columns, header=False):
126 | with open(file) as csvfile:
127 | info = reader(csvfile)#, delimiter=',', quotechar='"')
128 | if header == True:
129 | return [row[:columns] for row in info]
130 | return [row[:columns] for row in info][1:]
131 |
132 |
133 | def db_execute(cur, sql):
134 | x = 0
135 | while x < 100:
136 | try:
137 | sql = re.sub('\'Null\'|\'null\'|None', 'NULL', sql)
138 | return cur.execute(sql)
139 | except KeyboardInterrupt:
140 | print('\nGoodbye!')
141 | exit()
142 | except Exception as e:
143 | if x == 99:
144 | msg = '\n\n### Error occured while executing SQL cmd:'
145 | msg += '\n\n \'{}\'\n'
146 | print(msg.format(sql))
147 | print('### Error type - {}'.format(type(e)))
148 | raise
149 | x += 1
150 |
151 |
152 | def db_execute_tpl(cur, sql, tpl):
153 | while True:
154 | try:
155 | return cur.execute(sql,tpl)
156 | except sqlite3.OperationalError as S:
157 | fetch.print_('')
158 | print('\tError - sqlite3 error: {}'.format(S))
159 | except KeyboardInterrupt:
160 | print('\nGoodbye!')
161 | exit()
162 | except:
163 | print('\n\nSQL cmd = \'{}\'\n{}\n'.format(sql, tpl))
164 | raise
165 |
166 |
167 | def delete_tables(db_file):
168 | print_('\nPlease wait, database tables are being deleted ...')
169 |
170 | # Create database connection
171 | conn = sqlite3.connect(db_file)
172 | cur = conn.cursor()
173 |
174 | # Drop tables and commit database
175 | for table in tbl_names:
176 | print_('Deleting database table {} ...'.format(table))
177 | sql = 'DROP TABLE IF EXISTS ' + table
178 | db_execute(cur, sql)
179 | save_db(conn)
180 |
181 | cur.close()
182 | conn.close()
183 |
184 | msg = '\n~ {} database tables were sucessfully deleted.'
185 | return msg.format(len(tbl_names))
186 |
187 |
188 | def del_fetch_history(db_file):
189 | print_('\nPlease wait, download history is being erased ...')
190 |
191 | # Create database connection
192 | conn = sqlite3.connect(db_file)
193 | cur = conn.cursor()
194 |
195 | # Drop tables and commit database
196 | table = 'Fetched_urls'
197 | sql = 'DELETE FROM ' + table
198 | db_execute(cur, sql)
199 | save_db(conn)
200 |
201 | cur.close()
202 | conn.close()
203 |
204 | return '\n~ Download history (table Fetched_urls) erased.'
205 |
206 |
207 | def erase_tables(db_file):
208 | print_('\nPlease wait, database tables are being erased ...')
209 |
210 | # Create database connection
211 | conn = sqlite3.connect(db_file)
212 | cur = conn.cursor()
213 | for table in tbl_names:
214 | print_('Erasing database table {} ...'.format(table))
215 | sql = 'DELETE FROM ' + table
216 | db_execute(cur, sql)
217 | save_db(conn)
218 | cur.close()
219 | conn.close()
220 |
221 | msg = '\n~ Records from {} database tables were sucessfully erased.'
222 | return msg.format(len(tbl_names))
223 |
224 |
225 | def fetch(db_file):
226 | div = 150
227 | pool_size = 50
228 |
229 | # Get user input for stp (no. of tickers to update)
230 | while True:
231 | # User input for number of tickers to update
232 | try:
233 | msg = 'Qty. of records to be updated:\n'
234 | stp = int(input(msg))
235 | except KeyboardInterrupt:
236 | print('\nGoodbye!')
237 | exit()
238 | except Exception:
239 | continue
240 | start = time.time()
241 | break
242 |
243 | # Fetch data for each API for tickers qty. = 'stp'
244 | dividend = max(stp, div)
245 | runs = dividend // div
246 | div = min(stp, div)
247 | tickers = []
248 | for i in range(runs):
249 | t0 = time.time()
250 |
251 | # Create db connection
252 | conn = sqlite3.connect(db_file)
253 | cur = conn.cursor()
254 |
255 | # Get list of URL's to be retrieved and print current run info
256 | msg = '\nRun {} / {}'
257 | if i == 0:
258 | try:
259 | urls = get_url_list(cur)
260 | except KeyboardInterrupt:
261 | print('\nGoodbye!')
262 | exit()
263 | except:
264 | raise
265 | msg0 = '\nTotal URL requests pending =\t{:9,.0f}\n'
266 | msg0 += 'Total URL requests planned =\t{:9,.0f}\n'
267 | print(msg0.format(len(urls),
268 | min(len(urls), stp * len(apis))))
269 | msg0 = '\t({} requests per API per run = {} requests per run)'
270 | msg += msg0.format(div, div*len(apis))
271 |
272 | j = i * div * len(apis)
273 | items = urls[j:j + div * len(apis)]
274 | items_ct = len(items)
275 | sort0 = lambda x: (x[0], x[2], x[3])
276 | #items = sorted(items, key=sort0)
277 | print(msg.format(i+1, '{:.0f}'
278 | .format(min(len(urls), stp * len(apis))/(div*len(apis)))))
279 |
280 | # Execute sql clean.txt and exit loop if no records remain to update
281 | if items_ct == 0:
282 | #with open(sql_cmds.format('clean.txt')) as file:
283 | # cur.executescript(file.read().strip())
284 | break
285 |
286 | # Fetch data from API's using multiprocessing.Pool
287 | results = []
288 | while True:
289 | try:
290 | with mp.Pool(pool_size) as p:
291 | #r = p.imap_unordered(fetch_api, items)
292 | #r = p.map(fetch_api, items)
293 | r = p.imap(fetch_api, items)
294 | for turn in range(len(items)):
295 | try:
296 | results.append(r.next(timeout=5))
297 | except mp.context.TimeoutError:
298 | pass
299 | break
300 | except KeyboardInterrupt:
301 | print('\nGoodbye!')
302 | exit()
303 | except:
304 | raise
305 |
306 | # Fetch data from API's without multiprocessing.Pool
307 | '''for item in url_info:
308 | results.append(fetch_api(item))'''
309 |
310 | # Enter URL data into Fetched_urls
311 | if results != []:
312 | try:
313 | results = list(filter(lambda x: x is not None, results))
314 | except:
315 | # DELETE
316 | print('\n\n\n{}\n\n'.format(results[:1]))
317 | raise
318 |
319 | msg = ' - Success rate:\t{:,.0f} out of {:,.0f} ({:.1%})'
320 | totreq = min(stp, div)*len(apis)
321 | srate = len(results)/totreq
322 | print_('')
323 | print(msg.format(len(results), totreq, srate))
324 |
325 | # Insert new data
326 | msg = 'Storing source data into database table \'Fetched_urls\'...'
327 | print_(msg)
328 | cols = 'url_id, ticker_id, exch_id, fetch_date, ' + \
329 | 'status_code, source_text'
330 | sql = 'INSERT OR IGNORE INTO Fetched_urls ({}) VALUES ({})'
331 | sql = sql.format(cols, '?, ?, ?, date(?), ?, ?')
332 | #print('\n\nSQL = {}'.format(sql))
333 | cur.executemany(sql, results)
334 |
335 | # Export new ticker and exchange lists to input folder
336 | output = {}
337 | with open(fd_input + 'ticker_exch.json', 'w') as file:
338 | sql = 'SELECT * FROM Tickers'
339 | ticks = cur.execute(sql).fetchall()
340 | output['Tickers'] = ticks
341 | sql = 'SELECT * FROM Exchanges'
342 | exchs = cur.execute(sql).fetchall()
343 | output['Exchanges'] = exchs
344 | sql = 'SELECT ticker_id, exchange_id FROM Master'
345 | fetched = cur.execute(sql).fetchall()
346 | output['Master'] = fetched
347 | file.write(json.dumps(output, indent=2))
348 |
349 | # Save (commit) changes and close db
350 | save_db(conn)
351 | cur.close()
352 | conn.close()
353 |
354 | # Call parsing module from parse.py
355 | t1 = time.time()
356 | print_(' - Fetch Duration:\t{:.2f} sec\n'.format(t1-t0))
357 | parse.parse(db_file)
358 | t1 = time.time()
359 | print_(' - Total Duration:\t{:.2f} sec\n'.format(t1-t0))
360 | print(' - Speed:\t\t{:.2f} records/sec'.format(
361 | len(results)/(t1-t0)))
362 |
363 | return start
364 |
365 |
366 | def fetch_api(url_info):
367 | t0 = time.time()
368 |
369 | # Unpack variables
370 | url_id, url, ticker_id, exch_id = url_info
371 | num = ticker_list[url_id]['{}:{}'.format(exch_id, ticker_id)]
372 | ct = ticker_count[url_id]
373 | print_progress(url_id, num, ct)
374 |
375 | # Fetch URL data
376 | x = 0
377 | while True:
378 | try:
379 | page = requests.get(url)
380 | status_code = page.status_code
381 | data = re.sub('\'', '', page.text)
382 | #msg = '\n\nurl = {}\ntext = {}\n'
383 | #print(msg.format(url, page.text))
384 | if data == '' or status_code != 200:
385 | return
386 | data = zlib.compress(data.encode())
387 | break
388 | except requests.exceptions.ConnectionError:
389 | if x > 9:
390 | print_('')
391 | print('\n\tError: requests.exceptions.ConnectionError')
392 | msg = 'Ticker: {}, Exch: {}, URL: {}\n'
393 | print(msg.format(ticker_id, exch_id, url))
394 | return
395 | except requests.exceptions.ChunkedEncodingError:
396 | print_('')
397 | print('\n\tError: requests.exceptions.ChunkedEncodingError')
398 | msg = 'Ticker: {}, Exch: {}, URL: {}\n'
399 | print(msg.format(ticker_id, exch_id, url))
400 | time.sleep(4)
401 | return
402 | except KeyboardInterrupt:
403 | print('\nGoodbye!')
404 | exit()
405 | except:
406 | raise
407 | x += 1
408 |
409 | # Timer to attemp to slow down and 'align' Pool requests to every sec
410 | if False:
411 | time.sleep((1 - (time.time() % 1)))
412 |
413 | return (url_id, ticker_id, exch_id, today, status_code, data)
414 |
415 |
416 | def get_url_list(cur):
417 |
418 | urls = []
419 | api = [(int(k), v) for k, v in apis.items()]
420 | with open(sql_cmds.format('select_notupdated1.txt')) as file:
421 | sql_cmd1 = file.read().strip()
422 | with open(sql_cmds.format('select_notupdated2.txt')) as file:
423 | sql_cmd2 = file.read().strip()
424 | with open(sql_cmds.format('select_notupdated3.txt')) as file:
425 | sql_cmd3 = file.read().strip()
426 |
427 | for url_id, url0 in api:
428 |
429 | # Select list of tickers not yet updated for current API
430 | print_('Creating URL list for API {} ...'.format(url_id))
431 | if url_id < 4:
432 | sql = sql_cmd1.format(url_id)
433 | elif url_id == 9:
434 | sql = sql_cmd3.format(url_id)
435 | else:
436 | sql = sql_cmd2.format(url_id)
437 | tickers = db_execute(cur, sql).fetchall()
438 | ticker_count[url_id] = len(tickers)
439 | ticker_list[url_id] = {}
440 |
441 | # Create list of URL's for each ticker
442 | def url_list(ct, tick):
443 | exch_id, exch_sym = tick[0], tick[1]
444 | sym_id, symbol = tick[2], tick[3]
445 | url = url0.format(exch_sym, symbol)
446 | ticker_list[url_id]['{}:{}'.format(exch_id, sym_id)] = ct
447 | return (url_id, url, sym_id, exch_id)
448 |
449 | urls = urls + [url_list(c, ticker)
450 | for c, ticker in enumerate(tickers)]
451 |
452 | # Print API list and no. of tickers to be updated for each
453 | msg = '\nQty. of records pending update per API no.:\n\n'
454 | print_(msg)
455 | df_tickct = pd.DataFrame([(k, '{:8,.0f}'.format(v))
456 | for k, v in ticker_count.items()])
457 | print(df_tickct.rename(columns={0:'API', 1:'Pending'})
458 | .set_index('API'))
459 | df_tickct = None
460 |
461 | #urls = sorted(urls, key=lambda x: (x[2], x[3], x[0]))
462 | urls = sorted(urls, key=lambda x: (x[0], x[2], x[3]))
463 |
464 | return urls
465 |
466 |
467 | def print_(msg):
468 | msg = 'echo -en "\\r\\e[K{}"'.format(msg)
469 | os.system(msg)
470 |
471 |
472 | def print_progress(api, num, ct):
473 | msg = 'Fetching API {:.0f}... {:7,.0f} / {:7,.0f} ({:.2%})'
474 | msg = msg.format(api, num+1, ct, (num+1)/ct)
475 | msg = 'echo -en "\\r\\e[K{}"'.format(msg)
476 | os.system(msg)
477 |
478 |
479 | def save_db(conn):
480 | err = None
481 | while True:
482 | try:
483 | conn.commit()
484 | except sqlite3.OperationalError as err1:
485 | if err != err1:
486 | err = err1
487 | print(err)
488 | continue
489 | except Exception as errs:
490 | print(errs)
491 | raise
492 | break
493 |
494 |
495 | def sql_insert(table, columns, values):
496 | if len(values) == 1:
497 | values = '(\'{}\')'.format(values[0])
498 |
499 | sql = 'INSERT OR IGNORE INTO {} {} VALUES {}'
500 | sql = sql.format(table, columns, values)
501 | return sql
502 |
503 |
504 | def sql_insert_one_get_id(cur, tbl, col, val):
505 |
506 | # Insert value into db table
507 | column = '({})'.format(col)
508 | sql1 = sql_insert(tbl, column, (val,))
509 | sql2 = sql_record_id(tbl, column, val)
510 |
511 | # Select ID from table for value
512 | try:
513 | db_execute(cur, sql1)
514 | id = db_execute(cur, sql2).fetchone()[0]
515 | except:
516 | print('\n\n\t# Error @ SQL1 =', sql1, '\n\nSQL2 =', sql2, '\n\n')
517 | raise
518 |
519 | return id
520 |
521 |
522 | def sql_record_id(table, column, value):
523 | if type(value) is str:
524 | sql = 'SELECT id FROM {} WHERE {} =\'{}\''
525 | else:
526 | sql = 'SELECT id FROM {} WHERE {} ={}'
527 | return sql.format(table, column, value)
528 |
529 |
530 | def sql_update_record(table, dict1, dict2):
531 | updates = str(dict1).replace('{\'', '').replace(', \'', ', ')
532 | updates = updates.replace('}', '').replace('\':', ' =')
533 | conds = str(dict2).replace('{\'', '(')
534 | conds = conds.replace('}', ')').replace('\':', ' =')
535 | conds = conds.replace(', \'', ' AND ')
536 | sql = 'UPDATE OR IGNORE ' + table + ' SET ' + updates + ' WHERE ' + conds
537 | sql = re.sub('\'null\'', 'null', sql)
538 | return sql
539 |
540 |
541 | reload(parse) #Comment out after development
542 |
543 | # Reference variables
544 | ticker_list = {}
545 | ticker_count = {}
546 | fd_input = 'input/'
547 | today = datetime.today().strftime('%Y-%m-%d')
548 | sql_cmds = '{}sql_cmd/{}'.format(fd_input, '{}')
549 | with open('{}/api.json'.format(fd_input)) as file:
550 | apis = json.load(file)
551 | with open('{}/tables.json'.format(fd_input)) as file:
552 | tbl_js = json.load(file)
553 | tbl_names = list(tbl_js.keys())
554 |
--------------------------------------------------------------------------------
/parse.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup as bs
2 | from importlib import reload #Comment out once done using
3 | import datetime as DT
4 | from io import StringIO
5 | import pandas as pd
6 | import numpy as np
7 | import fetch, sqlite3, time, json, zlib, csv, sys, re
8 |
9 |
10 | # Manage database connection and fetch data to be parsed
11 | def parse(db_file):
12 | start = time.time()
13 |
14 | # Create db connection
15 | fetch.print_('Please wait while the database is being queried ...')
16 |
17 | while True:
18 | try:
19 | conn = sqlite3.connect(db_file)
20 | cur = conn.cursor()
21 | except sqlite3.OperationalError as S:
22 | fetch.print_('')
23 | print('\tError - sqlite3 error: {}'.format(S))
24 | continue
25 | except KeyboardInterrupt:
26 | print('\nGoodbye!')
27 | exit()
28 | except:
29 | raise
30 | break
31 |
32 | # Get list of fetched urls from Fetched_urls
33 | cols = 'url_id, ticker_id, exch_id, fetch_date, source_text'
34 | sql = '''SELECT {} FROM Fetched_urls
35 | WHERE status_code = 200 AND source_text IS NOT NULL
36 | ORDER BY ticker_id asc, url_id desc'''
37 | sql = sql.format(cols)
38 | fetched = fetch.db_execute(cur, sql).fetchall()
39 |
40 | # Call parsing methods
41 | parsing(conn, cur, fetched)
42 |
43 | # Save db and close db connection
44 | fetch.save_db(conn)
45 | cur.close()
46 | conn.close()
47 | fetched = None
48 |
49 |
50 | # Parse data fetched from database table 'Fetched_urls'
51 | def parsing(conn, cur, items):
52 | stp = len(items)
53 | spds = []
54 | if stp > 0:
55 | for i in range(stp):
56 |
57 | # Unpack record from Fetched_urls
58 | api = items[i][0]
59 | ticker_id = items[i][1]
60 | exch_id = items[i][2]
61 | fetch_date = items[i][3]
62 | source_text = items[i][4]
63 | parse = True
64 |
65 | # Decompress and check data integrity before parsing
66 | try:
67 | if source_text is not None:
68 | source_text = zlib.decompress(source_text).decode()
69 | except BaseException as B:
70 | print('\n\nB - {}'.format(str(B)))
71 | raise
72 | except Exception as E:
73 | print('\n\nE - {}'.format(str(E)))
74 | raise
75 | except KeyboardInterrupt:
76 | print('\nGoodbye!')
77 | exit()
78 |
79 | if (source_text is None or len(source_text) == 0 or
80 | 'Morningstar.com Error Page' in source_text or
81 | 'This page is temporarily unavailable' in source_text):
82 | parse = False
83 | code = 0
84 |
85 | # Print progress message
86 | msg = 'Parsing results into database...'
87 | msg += '\t{:6,.0f} / {:6,.0f}\t({:6.1%} )'
88 | ct = i + 1
89 | pct = (i + 1) / stp
90 | fetch.print_(msg.format(ct, stp, pct))
91 |
92 | # Invoke parsing function based on API number
93 | if parse == True:
94 | if api in [1, 2, 3]:
95 | code = parse_1(cur, source_text, api)
96 | elif api == 4:
97 | code = parse_2(cur, ticker_id, exch_id, source_text)
98 | elif api == 5:
99 | code = parse_3(cur, ticker_id, exch_id, source_text)
100 | elif api == 6:
101 | code = parse_4(cur, ticker_id, exch_id, source_text)
102 | elif api == 7:
103 | code = parse_5(cur, ticker_id, exch_id, source_text)
104 | elif api == 8:
105 | code = parse_6(cur, ticker_id, exch_id, source_text)
106 | elif api == 9:
107 | code = parse_7(cur, ticker_id, exch_id, source_text)
108 | elif api in [10, 11, 12, 13, 14, 15]:
109 | code = parse_8(cur, api, ticker_id, exch_id, source_text)
110 | elif api == 16:
111 | code = parse_9(cur, ticker_id, exch_id, source_text)
112 | elif api == 0:
113 | code = parse_10(cur, ticker_id, source_text)
114 | source_text = None
115 |
116 | # Updated record in Fetched_urls with results from parsing
117 | if True:
118 | dict1 = {
119 | 'status_code':code,
120 | 'source_text':source_text
121 | }
122 | dict2 = {
123 | 'url_id':api,
124 | 'ticker_id':ticker_id,
125 | 'exch_id':exch_id,
126 | 'fetch_date':fetch_date
127 | }
128 | sql = fetch.sql_update_record('Fetched_urls', dict1, dict2)
129 |
130 | # DELETE
131 | if dict1['source_text'] == '':
132 | print('\n\n\n{}\n\n'.format(sql))
133 | raise
134 |
135 | fetch.db_execute(cur, sql)
136 |
137 | #print('\n{} SQL = {}\n\n'.format(api, sql))
138 |
139 | if i % 1000 == 0 and i + 1 != stp:
140 | fetch.save_db(conn)
141 |
142 |
143 | # Parse table(s) from source html code
144 | def get_html_table(sp):
145 |
146 | tr_tags = sp.find_all('tr')
147 | table = []
148 | for tr in tr_tags:
149 | td_tags = tr.find_all(['th', 'td'])
150 | if len(td_tags) > 1:
151 | table.append([tag.text for tag in td_tags])
152 |
153 | return table
154 |
155 |
156 | # https://www.morningstar.com/api/v2/search/securities/5/usquote-v2/
157 | def parse_1(cur, data, api):
158 |
159 | results = []
160 | try:
161 | js = json.loads(data)
162 | if js['m'][0]['n'] != 0:
163 | results = js['m'][0]['r']
164 | except KeyError:
165 | fetch.print_('')
166 | print('\tError: KeyError at Parse_1\n')
167 | return 1
168 | except KeyboardInterrupt:
169 | print('\nGoodbye!')
170 | exit()
171 | except:
172 | print('Data = {} {}\n'.format(data, len(data)))
173 | raise
174 |
175 | if results == []:
176 | return 1
177 |
178 | for result in results:
179 | # Read data from current result
180 | exch = result['OS01X']
181 | symbol = result['OS001']
182 | exch_sym = result['LS01Z']
183 | country = result['XI018']
184 | type = result['OS010']
185 | comp = result['OS01W']
186 | curr = result['OS05M']
187 |
188 | if exch_sym == '' or symbol == '':
189 | continue
190 |
191 | # Fetch id's for data from db and update tables
192 |
193 | # Tickers
194 | ticker_id = int(fetch.sql_insert_one_get_id(
195 | cur, 'Tickers', 'ticker', symbol))
196 | # Currencies
197 | curr_id = int(fetch.sql_insert_one_get_id(
198 | cur, 'Currencies', 'currency_code', curr))
199 | # Companies
200 | comp_id = int(fetch.sql_insert_one_get_id(
201 | cur, 'Companies', 'company', comp))
202 | # SecurityTypes
203 | type_id = int(fetch.sql_insert_one_get_id(
204 | cur, 'SecurityTypes', 'security_type_code', type))
205 | # Countries
206 | country_id = int(fetch.sql_insert_one_get_id(cur,
207 | 'Countries', 'a3_un', country))
208 | # Exchanges
209 | exch_id = int(fetch.sql_insert_one_get_id(cur,
210 | 'Exchanges', 'exchange_sym', exch_sym))
211 | dict1 = {
212 | 'exchange':exch,
213 | 'exchange_sym':exch_sym,
214 | 'country_id':country_id
215 | }
216 | sql = fetch.sql_update_record('Exchanges', dict1, {'id':exch_id})
217 | fetch.db_execute(cur, sql)
218 | # Master Table
219 | columns = '(ticker_id, exchange_id)'
220 | sql = fetch.sql_insert('Master', columns, (ticker_id, exch_id))
221 | fetch.db_execute(cur, sql)
222 | dict1 = {
223 | 'company_id':comp_id,
224 | 'security_type_id':type_id,
225 | 'update_date':DT.date.today().strftime('%Y-%m-%d')
226 | }
227 | dict2 = {
228 | 'ticker_id':ticker_id,
229 | 'exchange_id':exch_id
230 | }
231 | sql = fetch.sql_update_record('Master', dict1, dict2)
232 | fetch.db_execute(cur, sql)
233 |
234 | return 200
235 |
236 |
237 | # http://quotes.morningstar.com/stockq/c-company-profile
238 | def parse_2(cur, ticker_id, exch_id, data):
239 |
240 | soup = bs(data, 'html.parser')
241 | tags = soup.find_all('span')
242 |
243 | try:
244 | sector = tags[2].text.strip()
245 | industry = tags[4].text.strip()
246 | stype = tags[6].text.strip()
247 | fyend = tags[10].text.strip()
248 | style = tags[12].text.strip()
249 | except KeyboardInterrupt:
250 | print('\nGoodbye!')
251 | exit()
252 | except:
253 | print('\n\nTicker_id = {}, Exch_id = {}'.format(ticker_id, exch_id))
254 | print('Data = {} {}\n'.format(data, len(data)))
255 | raise
256 |
257 | # Insert sector into Sectors
258 | sector_id = fetch.sql_insert_one_get_id(cur, 'Sectors', 'Sector', sector)
259 |
260 | # Insert industry into Industries
261 | sql = fetch.sql_insert('Industries',
262 | '(industry, sector_id)', (industry, sector_id))
263 | fetch.db_execute(cur, sql)
264 | sql = fetch.sql_record_id('Industries', '(industry)', industry)
265 | industry_id = fetch.db_execute(cur, sql).fetchone()[0]
266 |
267 | # Insert stock_type into StockTypes
268 | stype_id = fetch.sql_insert_one_get_id(
269 | cur, 'StockTypes', 'stock_type', stype)
270 |
271 | # Insert fyend into FYEnds
272 | fyend_id = fetch.sql_insert_one_get_id(cur, 'TimeRefs', 'dates', fyend)
273 |
274 | # Insert style into StockStyles
275 | style_id = fetch.sql_insert_one_get_id(cur, 'StockStyles', 'style', style)
276 |
277 | # Update Tickers table with parsed data
278 | sql = fetch.sql_update_record('Master', {'industry_id':industry_id,
279 | 'stock_type_id':stype_id, 'fyend_id':fyend_id, 'style_id':style_id},
280 | {'ticker_id':ticker_id, 'exchange_id':exch_id})
281 | fetch.db_execute(cur, sql)
282 |
283 | return 200
284 |
285 |
286 | # http://quotes.morningstar.com/stockq/c-header
287 | # API No. 5
288 | def parse_3(cur, ticker_id, exch_id, data):
289 |
290 | soup = bs(data, 'html.parser')
291 | tags = soup.find_all('span') + soup.find_all('div')
292 |
293 | # Parse data into info dictionary
294 | info = {}
295 | noise = ['', '-', '—', '— mil', '— bil']
296 | for count, tag in enumerate(tags):
297 |
298 | attrs = tag.attrs
299 | text = re.sub('[\n\t]', '', tag.text.strip())
300 | text = re.sub('\s\s*', ' ', text)
301 |
302 | try:
303 | if attrs.get('vkey') == 'Currency':
304 | if text in noise:
305 | info['currency_id'] = 'null'
306 | else:
307 | val = fetch.sql_insert_one_get_id(
308 | cur, 'Currencies', 'currency_code', text)
309 | info['currency_id'] = val
310 |
311 | elif attrs.get('vkey') == 'LastDate':
312 | if text == '':
313 | info['lastdate'] = 'null'
314 | else:
315 | info['lastdate'] = pd.to_datetime(
316 | text).strftime('%Y-%m-%d')
317 | elif attrs.get('vkey') == 'DayRange':
318 | text = re.sub('^-0.00', '0.00', text)
319 | vals = text.split('-')
320 | if '-' not in text or text in noise or '' in vals:
321 | info['day_lo'] = 'null'
322 | info['day_hi'] = 'null'
323 | else:
324 | info['day_lo'] = float(re.sub(',', '', vals[0]))
325 | info['day_hi'] = float(re.sub(',', '', vals[1]))
326 | elif attrs.get('vkey') == '_52Week':
327 | text = re.sub('^-0.00', '0.00', text)
328 | vals = text.split('-')
329 | if '-' not in text or text in noise or '' in vals:
330 | info['_52wk_lo'] = 'null'
331 | info['_52wk_hi'] = 'null'
332 | else:
333 | info['_52wk_lo'] = float(re.sub(',', '', vals[0]))
334 | info['_52wk_hi'] = float(re.sub(',', '', vals[1]))
335 | elif attrs.get('vkey') == 'Volume':
336 | if text in noise:
337 | info['lastvol'] = 'null'
338 | else:
339 | text = re.sub(',', '', text)
340 | unit = 1
341 | if ' mil' in text:
342 | unit = 10E6
343 | text = text.replace(' mil', '')
344 | elif ' bil' in text:
345 | unit = 10E9
346 | text = text.replace(' bil', '')
347 | elif ' tri' in text:
348 | unit = 10E12
349 | text = text.replace(' tri', '')
350 | info['lastvol'] = float(text) * unit
351 | elif attrs.get('vkey') == 'AverageVolume':
352 | if text in noise:
353 | info['avevol'] = 'null'
354 | else:
355 | text = re.sub(',', '', text)
356 | unit = 1
357 | if ' mil' in text:
358 | unit = 10E6
359 | text = text.replace(' mil', '')
360 | elif ' bil' in text:
361 | unit = 10E9
362 | text = text.replace(' bil', '')
363 | elif ' tri' in text:
364 | unit = 10E12
365 | text = text.replace(' tri', '')
366 | info['avevol'] = float(text) * unit
367 | elif attrs.get('gkey') == 'Forward':
368 | fpe = text
369 | elif attrs.get('vkey') == 'OpenPrice':
370 | if text in noise:
371 | info['openprice'] = 'null'
372 | else:
373 | info['openprice'] = float(re.sub(',', '', text))
374 | elif attrs.get('vkey') == 'LastPrice':
375 | if text in noise:
376 | info['lastprice'] = 'null'
377 | else:
378 | info['lastprice'] = float(re.sub(',', '', text))
379 | elif attrs.get('vkey') == 'ProjectedYield':
380 | if text in noise:
381 | info['yield'] = 'null'
382 | else:
383 | info['yield'] = float(re.sub('[%,]', '', text))
384 | elif attrs.get('vkey') == 'PE':
385 | if text in noise:
386 | info['fpe'] = 'null'
387 | else:
388 | info['fpe'] = float(re.sub(',', '', text))
389 | elif attrs.get('vkey') == 'PB':
390 | if text in noise:
391 | info['pb'] = 'null'
392 | else:
393 | info['pb'] = float(re.sub(',', '', text))
394 | elif attrs.get('vkey') == 'PS':
395 | if text in noise:
396 | info['ps'] = 'null'
397 | else:
398 | info['ps'] = float(re.sub(',', '', text))
399 | elif attrs.get('vkey') == 'PC':
400 | if text in noise:
401 | info['pc'] = 'null'
402 | else:
403 | info['pc'] = float(re.sub(',', '', text))
404 | except:
405 | print('\n\n{' + text + '}\n')
406 | raise
407 |
408 | # Check if parsing was successful
409 | if info == {}:
410 | return 3
411 |
412 | if 'fpe' in locals() and fpe != 'Forward' and 'fpe' in info:
413 | del info['fpe']
414 |
415 | # Remove 'empty' string values
416 | for k, v in info.items():
417 | if v == '' or v == ' ':
418 | info[k] = 'null'
419 |
420 | # Insert data into MSheader table
421 | table = 'MSheader'
422 | # Update
423 | dict0 = {'ticker_id':ticker_id, 'exchange_id':exch_id}
424 | sql = fetch.sql_update_record(table, info, dict0)
425 | fetch.db_execute(cur, sql)
426 | # Insert
427 | if cur.rowcount == 0:
428 | info['ticker_id'] = ticker_id
429 | info['exchange_id'] = exch_id
430 | sql = fetch.sql_insert(table, tuple(info.keys()), tuple(info.values()))
431 | fetch.db_execute(cur, sql)
432 |
433 | return 200
434 |
435 |
436 | # http://financials.morningstar.com/valuate/valuation-history.action
437 | def parse_4(cur, ticker_id, exch_id, data):
438 |
439 | info = {}
440 | def clean_val(h, v):
441 | if v != '—':
442 | info[h] = v
443 |
444 | soup = bs(data, 'html.parser')
445 | table = get_html_table(soup)
446 | script = soup.find('script').text
447 | script = re.sub('[ \n\t]|\\n|\\t', '', script)
448 | script = re.findall('\[\[.+?\]\]', script)[0]
449 | columns = json.loads(script)
450 |
451 | # Parse Yr Columns
452 | for year, column in enumerate(columns):
453 | if column[0] % 2 == 0:
454 | yr = column[1]
455 | yr_id = fetch.sql_insert_one_get_id(cur, 'TimeRefs', 'dates', yr)
456 | header = 'Y{}'.format(int((year-1)/2))
457 | info[header] = yr_id
458 |
459 | # Parse 'Price/Earnings'
460 | for yr, val in enumerate(table[1][1:]):
461 | header = 'PE_Y{}'.format(yr)
462 | clean_val(header, val)
463 |
464 | # Parse 'Price/Book'
465 | for yr, val in enumerate(table[4][1:]):
466 | header = 'PB_Y{}'.format(yr)
467 | clean_val(header, val)
468 |
469 | # Parse 'Price/Sales'
470 | for yr, val in enumerate(table[7][1:]):
471 | header = 'PS_Y{}'.format(yr)
472 | clean_val(header, val)
473 |
474 | # Parse 'Price/Cash Flow'
475 | for yr, val in enumerate(table[10][1:]):
476 | header = 'PC_Y{}'.format(yr)
477 | clean_val(header, val)
478 |
479 | # Check if parsing was successful
480 | if info == {}:
481 | return 4
482 |
483 | # Insert data into MSvaluation table
484 | table = 'MSvaluation'
485 | # Update
486 | dict0 = {'ticker_id':ticker_id, 'exchange_id':exch_id}
487 | sql1 = fetch.sql_update_record(table, info, dict0)
488 | # Insert
489 | info['ticker_id'] = ticker_id
490 | info['exchange_id'] = exch_id
491 | sql2 = fetch.sql_insert(table, tuple(info.keys()), tuple(info.values()))
492 | fetch.db_execute(cur, sql1)
493 | if cur.rowcount == 0:
494 | fetch.db_execute(cur, sql2)
495 |
496 | return 200
497 |
498 |
499 | # http://financials.morningstar.com/finan/financials/getKeyStatPart.html
500 | # API No. 7
501 | def parse_5(cur, ticker_id, exch_id, data):
502 |
503 | # Check if source data has correct information
504 | try:
505 | js = json.loads(data)['componentData']
506 | if js is None:
507 | return 5
508 | soup = bs(js, 'html.parser')
509 | except KeyboardInterrupt:
510 | print('\nGoodbye!')
511 | exit()
512 | except:
513 | print('\n\nTicker_id = {}, Exch_id = {}'.format(ticker_id, exch_id))
514 | print('Data = {} {}\n'.format(data, len(data)))
515 | raise
516 |
517 | # Parse table
518 | tables = {}
519 | trows = soup.find_all('tr')
520 | tname = ''
521 | for trow in trows:
522 | div_id = trow.parent.parent.parent.attrs['id']
523 | tname0 = re.sub('tab-', 'MSratio_', div_id)
524 | if tname != tname0:
525 | tname = tname0
526 | tables[tname] = {}
527 |
528 | row_tags = trow.find_all(['th', 'td'])
529 | for i, row_tag in enumerate(row_tags):
530 | if 'id' in row_tag.attrs:
531 | text = row_tag.text
532 | id = re.sub('-', '_', row_tag.attrs['id'])
533 | if i != 0:
534 | text_id = fetch.sql_insert_one_get_id(
535 | cur, 'TimeRefs', 'dates', text)
536 | else:
537 | text_id = fetch.sql_insert_one_get_id(
538 | cur, 'ColHeaders', 'header', text)
539 | tables[tname][id] = text_id
540 | elif 'headers' in row_tag.attrs:
541 | headers = row_tag.attrs['headers']
542 | header = '_'.join([headers[2], headers[0]])
543 | header = re.sub('-', '_', header)
544 | val = re.sub(',', '', row_tag.text)
545 | if val == '—':
546 | val = None
547 | else:
548 | try:
549 | val = float(val)
550 | except:
551 | val = None
552 | tables[tname][header] = val
553 |
554 | # Check if parsing was successful
555 | if tables == {}:
556 | return 5
557 |
558 | # Insert data into tables
559 | for table in tables:
560 | # Update
561 | info = tables[table]
562 | dict0 = {'ticker_id':ticker_id, 'exchange_id':exch_id}
563 | sql = fetch.sql_update_record(table, info, dict0)
564 | fetch.db_execute(cur, sql)
565 | # Insert
566 | if cur.rowcount == 0:
567 | tables[table]['ticker_id'] = ticker_id
568 | tables[table]['exchange_id'] = exch_id
569 | info = tables[table]
570 | sql = fetch.sql_insert(
571 | table, tuple(info.keys()), tuple(info.values()))
572 | fetch.db_execute(cur, sql)
573 |
574 | return 200
575 |
576 |
577 | # http://financials.morningstar.com/finan/financials/getFinancePart.html
578 | # API No. 8
579 | def parse_6(cur, ticker_id, exch_id, data):
580 |
581 | # Check if source data has correct information
582 | try:
583 | js = json.loads(data)['componentData']
584 | if js is None:
585 | return 6
586 | soup = bs(js, 'html.parser')
587 | except KeyboardInterrupt:
588 | print('\nGoodbye!')
589 | exit()
590 | except:
591 | print('\n\nTicker_id = {}, Exch_id = {}'.format(ticker_id, exch_id))
592 | print('Data = {} {}\n'.format(data, len(data)))
593 | raise
594 |
595 | # Parse table
596 | table = {}
597 | trows = soup.find_all('tr')
598 | for trow in trows:
599 | row_tags = trow.find_all(['th', 'td'])
600 | for i, row_tag in enumerate(row_tags):
601 | if 'id' in row_tag.attrs:
602 | text = row_tag.text
603 | if i != 0:
604 | text_id = fetch.sql_insert_one_get_id(
605 | cur, 'TimeRefs', 'dates', text)
606 | else:
607 | text_id = fetch.sql_insert_one_get_id(
608 | cur, 'ColHeaders', 'header', text)
609 | table[row_tag.attrs['id']] = text_id
610 | elif 'headers' in row_tag.attrs:
611 | headers = row_tag.attrs['headers']
612 | headers.reverse()
613 | val = re.sub(',', '', row_tag.text)
614 | if val == '—':
615 | val = None
616 | else:
617 | val = float(val)
618 | table['_'.join(headers)] = val
619 |
620 | if table == {}:
621 | return 6
622 |
623 | # Insert data into tables
624 | tname = 'MSfinancials'
625 | # Update
626 | dict0 = {'ticker_id':ticker_id, 'exchange_id':exch_id}
627 | sql = fetch.sql_update_record(tname, table, dict0)
628 | ct = fetch.db_execute(cur, sql)
629 | # Insert
630 | if cur.rowcount == 0:
631 | table['ticker_id'] = ticker_id
632 | table['exchange_id'] = exch_id
633 | sql = fetch.sql_insert(
634 | tname, tuple(table.keys()), tuple(table.values()))
635 | fetch.db_execute(cur, sql)
636 |
637 | return 200
638 |
639 |
640 | # http://performance.mor.../perform/Performance/stock/exportStockPrice.action
641 | # API No. 9
642 | def parse_7(cur, ticker_id, exch_id, data):
643 |
644 | tbl = pd.read_csv(StringIO(data), sep=',', header=1)
645 | tbl = tbl.where(tbl['Volume'] != '???').dropna(axis=0, how='all')
646 | tbl['diff'] = 100 * tbl['Close'].diff(-1) / tbl['Close'].shift(-1)
647 |
648 | if len(tbl) <= 1:
649 | return 99
650 |
651 | last_open0 = tbl.iloc[0, 4]
652 | last_open1 = tbl.iloc[1, 4]
653 |
654 | if last_open0 <= 0.0:
655 | return 99
656 |
657 | info = dict()
658 | info['last_open'] = last_open0
659 | info['last_close'] = tbl.iloc[0, 4]
660 | info['lastday_var'] = 100*(last_open0-last_open1)/last_open1
661 | info['ave_10d'] = tbl.iloc[:9, 4].mean()
662 | info['ave_50d'] = tbl.iloc[:49, 4].mean()
663 | info['ave_100d'] = tbl.iloc[:99, 4].mean()
664 | info['ave_200d'] = tbl.iloc[:199, 4].mean()
665 | for i in [5, 10, 30, 50, 100, 200]:
666 | info['max_var{}'.format(i)] = tbl['diff'].iloc[:i-1].max()
667 | info['max_var{}_date'.format(i)] = (DT.
668 | datetime.strptime(tbl[tbl['diff'] == info['max_var{}'.format(i)]]
669 | .iloc[0, 0],'%m/%d/%Y').strftime('%Y-%m-%d'))
670 | info['min_var{}'.format(i)] = tbl['diff'].iloc[:i-1].min()
671 | info['min_var{}_date'.format(i)] = (DT.
672 | datetime.strptime(tbl[tbl['diff'] == info['min_var{}'.format(i)]]
673 | .iloc[0, 0],'%m/%d/%Y').strftime('%Y-%m-%d'))
674 |
675 | nonan = lambda x: (str(x[1]) != 'nan') and (str(x[1]) != 'inf')
676 | info = dict(filter(nonan, info.items()))
677 |
678 | # Insert data into tables
679 | # Update
680 | table = 'MSpricehistory'
681 | dict0 = {'ticker_id':ticker_id, 'exchange_id':exch_id}
682 | sql = fetch.sql_update_record(table, info, dict0)
683 | fetch.db_execute(cur, sql)
684 | # Insert
685 | if cur.rowcount == 0:
686 | info['ticker_id'] = ticker_id
687 | info['exchange_id'] = exch_id
688 | sql = fetch.sql_insert(
689 | table, tuple(info.keys()), tuple(info.values()))
690 | fetch.db_execute(cur, sql)
691 |
692 | return 200
693 |
694 |
695 | # http://financials.morningstar.com/ajax/ReportProcess4HtmlAjax.html
696 | def parse_8(cur, api, ticker_id, exch_id, data):
697 |
698 | # Check if source data has correct information
699 | msg = 'There is no available information in our database to display.'
700 | if msg in data:
701 | return 8
702 |
703 | # Parse source data with JSON and BeautifulSoup
704 | try:
705 | js = json.loads(data)
706 | html = js['result']
707 | soup = bs(html, 'html.parser')
708 | tags = soup.find_all('div')
709 | except KeyboardInterrupt:
710 | print('\nGoodbye!')
711 | exit()
712 | except:
713 | print('\n\n', data)
714 | raise
715 |
716 | info = {}
717 | info0 = {}
718 | type = 'MSreport'
719 |
720 | if api in [10, 11]:
721 | type += '_is'
722 | elif api in [12, 13]:
723 | type += '_cf'
724 | elif api in [14, 15]:
725 | type += '_bs'
726 | if api in [10, 12, 14]:
727 | type += '_yr'
728 | elif api in [11, 13, 15]:
729 | type += '_qt'
730 | #fname = 'test/{}.json'.format(type)
731 |
732 | '''with open(fname) as file:
733 | info0 = json.load(file)'''
734 |
735 | # Parse data into info dictionary
736 | for tag in tags:
737 | attrs = tag.attrs
738 |
739 | if 'id' in attrs:
740 | tag_id = tag['id']
741 | value = tag.text
742 |
743 | # Parse currency and FY End month number
744 | if tag_id == 'unitsAndFiscalYear':
745 | info['fye_month'] = int(tag['fyenumber'])
746 | curr_id = fetch.sql_insert_one_get_id(
747 | cur, 'Currencies', 'currency_code', tag['currency'])
748 | info['currency_id'] = curr_id
749 |
750 | # Parse Yrly or Qtrly values
751 | elif tag_id[:2] == 'Y_':
752 | parent = tag.parent['id']
753 | key = '{}_{}'.format(parent, tag_id)
754 |
755 | if 'rawvalue' in attrs:
756 | if tag['rawvalue'] in ['—', 'nbsp']:
757 | continue
758 | info[key] = float(re.sub(',', '', tag['rawvalue']))
759 | #info0[key] = 'REAL,'
760 | else:
761 | if 'title' in attrs:
762 | value = tag['title']
763 | value_id = fetch.sql_insert_one_get_id(
764 | cur, 'TimeRefs', 'dates', value)
765 | info[key] = value_id
766 | #info0[key] = 'INTEGER,'
767 |
768 | # Parse labels
769 | elif tag_id[:3] == 'lab' and 'padding' not in tag_id:
770 | value_id = fetch.sql_insert_one_get_id(
771 | cur, 'ColHeaders', 'header', value)
772 | info[tag_id] = value_id
773 | #info0[tag_id] = 'INTEGER,'
774 |
775 | # Check if parsing was successful
776 | if info == {} and info0 == {}:
777 | return 8
778 |
779 | # Insert data into tables
780 | # Update
781 | dict0 = {'ticker_id':ticker_id, 'exchange_id':exch_id}
782 | sql = fetch.sql_update_record(type, info, dict0)
783 | fetch.db_execute(cur, sql)
784 | # Insert
785 | if cur.rowcount == 0:
786 | info['ticker_id'] = ticker_id
787 | info['exchange_id'] = exch_id
788 | sql = fetch.sql_insert(type, tuple(info.keys()), tuple(info.values()))
789 | fetch.db_execute(cur, sql)
790 |
791 | return 200
792 |
793 |
794 | # http://insiders.mor.../insiders/trading/insider-activity-data2.action
795 | # API No. 16
796 | def parse_9(cur, ticker_id, exch_id, data):
797 |
798 | data = re.sub('([A-Z])', r' \1', data)
799 | data = re.sub(' +', ' ', data)
800 | data = re.sub('\\n|\\t', '', data)
801 | soup = bs(data, 'html.parser')
802 | table = get_html_table(soup)
803 |
804 | if len(table) > 1:
805 | for row in table:
806 | date = ''
807 | info = {}
808 | if row[0] != '':
809 | info['date'] = DT.datetime.strptime(
810 | row[0], '%m/%d/%Y').strftime('%Y-%m-%d')
811 | try:
812 | info['quantity'] = float(re.sub(',', '', row[3]))
813 | info['value'] = float(re.sub(',', '', row[6]))
814 | except ValueError:
815 | info['quantity'] = 0
816 | info['value'] = 0
817 | except:
818 | raise
819 |
820 | name = row[1].strip()
821 | info['name_id'] = fetch.sql_insert_one_get_id(
822 | cur, 'Insiders', 'name', name)
823 |
824 | type = row[5].strip()
825 | if ' ' in type:
826 | type = type.split()[0]
827 | info['transaction_id'] = fetch.sql_insert_one_get_id(
828 | cur, 'TransactionType', 'type', type)
829 |
830 | # Insert data into tables
831 | info['ticker_id'] = ticker_id
832 | info['exchange_id'] = exch_id
833 | sql = fetch.sql_insert('InsiderTransactions',
834 | tuple(info.keys()), tuple(info.values()))
835 | fetch.db_execute(cur, sql)
836 |
837 | return 200
838 |
839 |
840 | # https://finance.yahoo.com/quote/
841 | # API No. 0
842 | def parse_10(cur, ticker_id, data):
843 |
844 | sql = 'SELECT ticker FROM Tickers WHERE id = ?'
845 | ticker = fetch.db_execute_tpl(cur, sql, (ticker_id,)).fetchall()[0][0]
846 |
847 | #soup = bs(data, 'html.parser')
848 | tables = []
849 | try:
850 | tables = pd.read_html(data)
851 | except:
852 | return 10
853 |
854 | if len(tables) == 2:
855 |
856 | info = dict()
857 | try:
858 | info['prev_close'] = float(tables[0].loc[0, 1])
859 | info['open'] = float(tables[0].loc[1, 1])
860 | info['beta'] = float(tables[1].loc[1, 1])
861 | info['eps_ttm'] = float(tables[1].loc[3, 1])
862 | info['pe_ttm'] = float(tables[1].loc[2, 1])
863 | info['yr_target'] = float(tables[1].loc[7, 1])
864 | except:
865 | pass
866 |
867 | try:
868 | date0 = tables[1].loc[6, 1]
869 | if isinstance(date0, float) == False:
870 | exdiv_date = DT.datetime.strptime(date0, '%Y-%m-%d')
871 | info['exdiv_date'] = exdiv_date.strftime('%Y-%m-%d')
872 |
873 | date0 = tables[1].loc[4, 1]
874 | if isinstance(date0, float) == False:
875 | if '-' in date0:
876 | date0 = date0.split('-')[0].strip()
877 | earn_date = DT.datetime.strptime(date0, '%b %d, %Y')
878 | info['earnings_date'] = earn_date.strftime('%Y-%m-%d')
879 |
880 | div_yield = tables[1].loc[5, 1]
881 | if '%' in div_yield:
882 | div_yield = div_yield.split('(')[1].split('%')[0]
883 | info['div_yield'] = float(div_yield)
884 | except:
885 | print('\n\nTicker: ' + ticker)
886 | print()
887 | for table in tables:
888 | print(table)
889 | raise
890 |
891 | nonan = lambda x: (str(x[1]) != 'nan')
892 | info = dict(filter(nonan, info.items()))
893 |
894 | # Insert data into tables
895 | if len(info) > 0:
896 | #print(json.dumps(info, indent=2))
897 | # Update
898 | table = 'YahooQuote'
899 | dict0 = {'ticker_id':ticker_id}
900 | sql = fetch.sql_update_record(table, info, dict0)
901 | fetch.db_execute(cur, sql)
902 | # Insert
903 | if cur.rowcount == 0:
904 | info['ticker_id'] = ticker_id
905 | sql = fetch.sql_insert(
906 | table, tuple(info.keys()), tuple(info.values()))
907 | fetch.db_execute(cur, sql)
908 |
909 | return 200
910 | return 10
911 |
--------------------------------------------------------------------------------