├── finsymbols ├── tests │ ├── __init__.py │ └── symbols_test.py ├── __init__.py ├── symbols.py └── symbol_helper.py ├── MANIFEST.in ├── .gitignore ├── .travis.yml ├── README.md └── setup.py /finsymbols/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.md 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[co] 2 | *.sw[op] 3 | *.egg-info/ 4 | 5 | # Cached files 6 | SP500.html 7 | 8 | # Python3 9 | *__pycache__/ 10 | -------------------------------------------------------------------------------- /finsymbols/__init__.py: -------------------------------------------------------------------------------- 1 | from finsymbols.symbols import get_sp500_symbols 2 | from finsymbols.symbols import get_nyse_symbols 3 | from finsymbols.symbols import get_amex_symbols 4 | from finsymbols.symbols import get_nasdaq_symbols 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - 2.7 4 | - 3.3 5 | - 3.4 6 | - 3.5 7 | 8 | install: 9 | - pip install beautifulsoup4 10 | - pip install six 11 | 12 | script: nosetests 13 | 14 | sudo: false 15 | 16 | matrix: 17 | fast_finish: true -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | finsymbols 2 | ========== 3 | 4 | [![Build Status](https://travis-ci.org/skillachie/finsymbols.svg?branch=master)](https://travis-ci.org/skillachie/finsymbols) 5 | 6 | Obtains stock symbols and relating information for SP500, AMEX, NYSE, and NASDAQ 7 | 8 | * S&P 500 listings are obtained dynamically by parsing Wikipedia 9 | * AMEX,NYSE and NASDAQ are obtained by gathering NASDAQ data 10 | 11 | How to use http://skillachie.github.io/finsymbols/ 12 | -------------------------------------------------------------------------------- /finsymbols/tests/symbols_test.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase, main 2 | from six import string_types 3 | import sys 4 | import os 5 | 6 | from finsymbols import symbols 7 | 8 | 9 | class TestSizeOfList(TestCase): 10 | 11 | def test_sp500_size(self): 12 | sp500 = symbols.get_sp500_symbols() 13 | assert len(sp500) == 505, 'len gathered data: {}.\ 14 | Expected len: 505'.format(len(sp500)) 15 | 16 | def test_amex_not_null(self): 17 | amex = symbols.get_amex_symbols() 18 | assert len(amex) != 0, 'AMEX list is of size 0' 19 | 20 | def test_nyse_not_null(self): 21 | nyse = symbols.get_nyse_symbols() 22 | assert len(nyse) != 0, 'NYSE list is of size 0' 23 | 24 | def test_nasdaq_not_null(self): 25 | nasdaq = symbols.get_nasdaq_symbols() 26 | assert len(nasdaq) != 0, 'NASDAQ list is of size 0' 27 | 28 | def test_string_output(self): 29 | sp500 = symbols.get_sp500_symbols() 30 | company = sp500[0] 31 | 32 | assert isinstance(company['company'], string_types) == True, 'Company dict: {}.\ 33 | Expected output to be string'.format(company) 34 | 35 | 36 | if __name__ == '__main__': 37 | main() 38 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | import setuptools 3 | 4 | setup( 5 | name='finsymbols', 6 | version='1.3.0', 7 | packages=['finsymbols'], 8 | package_dir={'finsymbols': 'finsymbols'}, 9 | package_data={'finsymbols': ['exchanges/*.csv']}, 10 | include_package_data=True, 11 | author='Dwayne V Campbell', 12 | author_email='dwaynecampbell13@gmail.com', 13 | description='Retrieves list of all symbols present in SP500, NASDAQ ,AMEX and NYSE', 14 | long_description=open('README.md').read(), 15 | url='http://skillachie.github.io/finsymbols/', 16 | download_url='http://pypi.python.org/pypi/finsymbols', 17 | keywords='stocks stockmarket yahoo finance SP500 NASDAQ AMEX NYSE'.split(), 18 | license='GNU LGPLv2+', 19 | install_requires=[ 20 | "beautifulsoup4 >= 4.2.1" 21 | ], 22 | classifiers=[ 23 | 'Development Status :: 4 - Beta', 24 | 'Intended Audience :: Developers', 25 | 'License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)', 26 | 'Topic :: Software Development :: Libraries :: Python Modules', 27 | 'Topic :: Office/Business :: Financial :: Investment', 28 | ] 29 | ) 30 | -------------------------------------------------------------------------------- /finsymbols/symbols.py: -------------------------------------------------------------------------------- 1 | import pprint 2 | import sys 3 | from bs4 import BeautifulSoup 4 | from finsymbols.symbol_helper import * 5 | 6 | 7 | def get_sp500_symbols(): 8 | page_html = wiki_html('List_of_S%26P_500_companies', 'SP500.html') 9 | wiki_soup = BeautifulSoup(page_html, "html.parser") 10 | symbol_table = wiki_soup.find(attrs={'class': 'wikitable sortable'}) 11 | 12 | symbol_data_list = list() 13 | 14 | for symbol in symbol_table.find_all("tr"): 15 | symbol_data_content = dict() 16 | symbol_raw_data = symbol.find_all("td") 17 | td_count = 0 18 | for symbol_data in symbol_raw_data: 19 | if(td_count == 0): 20 | symbol_data_content[ 21 | 'symbol'] = symbol_data.text 22 | elif(td_count == 1): 23 | symbol_data_content[ 24 | 'company'] = symbol_data.text 25 | elif(td_count == 3): 26 | symbol_data_content[ 27 | 'sector'] = symbol_data.text 28 | elif(td_count == 4): 29 | symbol_data_content[ 30 | 'industry'] = symbol_data.text 31 | elif(td_count == 5): 32 | symbol_data_content[ 33 | 'headquarters'] = symbol_data.text 34 | 35 | td_count += 1 36 | 37 | symbol_data_list.append(symbol_data_content) 38 | 39 | return symbol_data_list[1::] 40 | 41 | 42 | def get_nyse_symbols(): 43 | return _get_exchange_data("NYSE") 44 | 45 | 46 | def get_amex_symbols(): 47 | return _get_exchange_data("AMEX") 48 | 49 | 50 | def get_nasdaq_symbols(): 51 | return _get_exchange_data("NASDAQ") 52 | 53 | 54 | def _get_exchange_data(exchange): 55 | url = get_exchange_url(exchange) 56 | file_path = os.path.join(os.path.dirname(finsymbols.__file__), exchange) 57 | if is_cached(file_path): 58 | with open(file_path, "r") as cached_file: 59 | symbol_data = cached_file.read() 60 | else: 61 | symbol_data = fetch_file(url) 62 | save_file(file_path, symbol_data) 63 | 64 | return get_symbol_list(symbol_data, exchange) 65 | -------------------------------------------------------------------------------- /finsymbols/symbol_helper.py: -------------------------------------------------------------------------------- 1 | try: 2 | import urllib2 as urllib 3 | except ImportError: # python3 4 | import urllib.request as urllib 5 | import os 6 | import datetime 7 | import sys 8 | import finsymbols 9 | import pprint 10 | import csv 11 | import re 12 | 13 | 14 | def get_symbol_list(symbol_data, exchange_name): 15 | 16 | csv_file = exchange_name + '.csv' 17 | 18 | symbol_list = list() 19 | symbol_data = re.split("\r?\n", symbol_data) 20 | 21 | headers = symbol_data[0] 22 | # symbol,company,sector,industry,headquaters 23 | symbol_data = list(csv.reader(symbol_data, delimiter=',')) 24 | # We need to cut off the the last row because it is a null string 25 | for row in symbol_data[1:-1]: 26 | symbol_data_dict = dict() 27 | symbol_data_dict['symbol'] = row[0] 28 | symbol_data_dict['company'] = row[1] 29 | symbol_data_dict['sector'] = row[6] 30 | symbol_data_dict['industry'] = row[7] 31 | 32 | symbol_list.append(symbol_data_dict) 33 | return symbol_list 34 | 35 | 36 | def save_file(file_path, file_data): 37 | if isinstance(file_data, str): 38 | with open(file_path, "w") as saved_file: 39 | saved_file.write(file_data) 40 | elif isinstance(file_data, bytes): 41 | with open(file_path, "wb") as saved_file: 42 | saved_file.write(file_data.encode('utf-8')) 43 | 44 | 45 | def get_exchange_url(exchange): 46 | return ("http://www.nasdaq.com/screening/companies-by-industry.aspx?" 47 | "exchange={}&render=download".format(exchange)) 48 | 49 | 50 | def is_cached(file_path): 51 | ''' 52 | Checks if the file cached is still valid 53 | ''' 54 | if not os.path.exists(file_path): 55 | return False 56 | 57 | file_time = datetime.datetime.fromtimestamp(os.path.getctime(file_path)) 58 | current_time = datetime.datetime.now() 59 | file_age = (current_time - file_time).total_seconds() 60 | 61 | if file_age > 86400: 62 | return False 63 | else: 64 | return True 65 | 66 | 67 | def fetch_file(url): 68 | ''' 69 | Gets and downloads files 70 | ''' 71 | file_fetcher = urllib.build_opener() 72 | file_fetcher.addheaders = [('User-agent', 'Mozilla/5.0')] 73 | file_data = file_fetcher.open(url).read() 74 | if isinstance(file_data, str): # Python2 75 | return file_data 76 | elif isinstance(file_data, bytes): # Python3 77 | return file_data.decode("utf-8") 78 | 79 | 80 | def wiki_html(url, file_name): 81 | ''' 82 | Obtains html from Wikipedia 83 | Note: API exist but for my use case. Data returned was not parsable. Preferred to use html 84 | python-wikitools - http://code.google.com/p/python-wikitools/ 85 | Ex. http://en.wikipedia.org/w/api.php?format=xml&action=query&titles=List_of_S%26P_500_companies&prop=revisions&rvprop=content 86 | ''' 87 | file_path = os.path.join(os.path.dirname(finsymbols.__file__), file_name) 88 | 89 | if is_cached(file_path): 90 | with open(file_path, "rb") as sp500_file: 91 | return sp500_file.read() 92 | else: 93 | wiki_html = fetch_file('http://en.wikipedia.org/wiki/{}'.format(url)) 94 | # Save file to be used by cache 95 | save_file(file_path, wiki_html) 96 | return wiki_html 97 | --------------------------------------------------------------------------------