├── finsymbols
    ├── tests
    │   ├── __init__.py
    │   └── symbols_test.py
    ├── __init__.py
    ├── symbols.py
    └── symbol_helper.py
├── MANIFEST.in
├── .gitignore
├── .travis.yml
├── README.md
└── setup.py


/finsymbols/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.md 
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[co]
 2 | *.sw[op]
 3 | *.egg-info/
 4 | 
 5 | # Cached files
 6 | SP500.html
 7 | 
 8 | # Python3
 9 | *__pycache__/
10 | 


--------------------------------------------------------------------------------
/finsymbols/__init__.py:
--------------------------------------------------------------------------------
1 | from finsymbols.symbols import get_sp500_symbols
2 | from finsymbols.symbols import get_nyse_symbols
3 | from finsymbols.symbols import get_amex_symbols
4 | from finsymbols.symbols import get_nasdaq_symbols
5 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |     - 2.7
 4 |     - 3.3
 5 |     - 3.4
 6 |     - 3.5
 7 | 
 8 | install: 
 9 |     - pip install beautifulsoup4
10 |     - pip install six
11 | 
12 | script: nosetests
13 | 
14 | sudo: false
15 | 
16 | matrix:
17 |     fast_finish: true


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | finsymbols
 2 | ==========
 3 | 
 4 | [![Build Status](https://travis-ci.org/skillachie/finsymbols.svg?branch=master)](https://travis-ci.org/skillachie/finsymbols)
 5 | 
 6 | Obtains stock symbols and relating information for SP500, AMEX, NYSE, and NASDAQ 
 7 | 
 8 |  * S&P 500 listings are obtained dynamically by parsing Wikipedia
 9 |  * AMEX,NYSE and NASDAQ are obtained by gathering NASDAQ data
10 | 
11 | How to use  http://skillachie.github.io/finsymbols/
12 | 


--------------------------------------------------------------------------------
/finsymbols/tests/symbols_test.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase, main
 2 | from six import string_types
 3 | import sys
 4 | import os
 5 | 
 6 | from finsymbols import symbols
 7 | 
 8 | 
 9 | class TestSizeOfList(TestCase):
10 | 
11 |     def test_sp500_size(self):
12 |         sp500 = symbols.get_sp500_symbols()
13 |         assert len(sp500) == 505, 'len gathered data: {}.\
14 |          Expected len: 505'.format(len(sp500))
15 | 
16 |     def test_amex_not_null(self):
17 |         amex = symbols.get_amex_symbols()
18 |         assert len(amex) != 0, 'AMEX list is of size 0'
19 | 
20 |     def test_nyse_not_null(self):
21 |         nyse = symbols.get_nyse_symbols()
22 |         assert len(nyse) != 0, 'NYSE list is of size 0'
23 | 
24 |     def test_nasdaq_not_null(self):
25 |         nasdaq = symbols.get_nasdaq_symbols()
26 |         assert len(nasdaq) != 0, 'NASDAQ list is of size 0'
27 | 
28 |     def test_string_output(self):
29 |         sp500 = symbols.get_sp500_symbols()
30 |         company = sp500[0]
31 | 
32 |         assert isinstance(company['company'], string_types) == True, 'Company dict: {}.\
33 |          Expected output to be string'.format(company)
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     main()
38 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | import setuptools
 3 | 
 4 | setup(
 5 |     name='finsymbols',
 6 |     version='1.3.0',
 7 |     packages=['finsymbols'],
 8 |     package_dir={'finsymbols': 'finsymbols'},
 9 |     package_data={'finsymbols': ['exchanges/*.csv']},
10 |     include_package_data=True,
11 |     author='Dwayne V Campbell',
12 |     author_email='dwaynecampbell13@gmail.com',
13 |     description='Retrieves list of all symbols present in SP500, NASDAQ ,AMEX and NYSE',
14 |     long_description=open('README.md').read(),
15 |     url='http://skillachie.github.io/finsymbols/',
16 |     download_url='http://pypi.python.org/pypi/finsymbols',
17 |     keywords='stocks stockmarket yahoo finance SP500 NASDAQ AMEX NYSE'.split(),
18 |     license='GNU LGPLv2+',
19 |     install_requires=[
20 |         "beautifulsoup4 >= 4.2.1"
21 |     ],
22 |     classifiers=[
23 |         'Development Status :: 4 - Beta',
24 |         'Intended Audience :: Developers',
25 |         'License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)',
26 |         'Topic :: Software Development :: Libraries :: Python Modules',
27 |         'Topic :: Office/Business :: Financial :: Investment',
28 |     ]
29 | )
30 | 


--------------------------------------------------------------------------------
/finsymbols/symbols.py:
--------------------------------------------------------------------------------
 1 | import pprint
 2 | import sys
 3 | from bs4 import BeautifulSoup
 4 | from finsymbols.symbol_helper import *
 5 | 
 6 | 
 7 | def get_sp500_symbols():
 8 |     page_html = wiki_html('List_of_S%26P_500_companies', 'SP500.html')
 9 |     wiki_soup = BeautifulSoup(page_html, "html.parser")
10 |     symbol_table = wiki_soup.find(attrs={'class': 'wikitable sortable'})
11 | 
12 |     symbol_data_list = list()
13 | 
14 |     for symbol in symbol_table.find_all("tr"):
15 |         symbol_data_content = dict()
16 |         symbol_raw_data = symbol.find_all("td")
17 |         td_count = 0
18 |         for symbol_data in symbol_raw_data:
19 |             if(td_count == 0):
20 |                 symbol_data_content[
21 |                     'symbol'] = symbol_data.text
22 |             elif(td_count == 1):
23 |                 symbol_data_content[
24 |                     'company'] = symbol_data.text
25 |             elif(td_count == 3):
26 |                 symbol_data_content[
27 |                     'sector'] = symbol_data.text
28 |             elif(td_count == 4):
29 |                 symbol_data_content[
30 |                     'industry'] = symbol_data.text
31 |             elif(td_count == 5):
32 |                 symbol_data_content[
33 |                     'headquarters'] = symbol_data.text
34 | 
35 |             td_count += 1
36 | 
37 |         symbol_data_list.append(symbol_data_content)
38 | 
39 |     return symbol_data_list[1::]
40 | 
41 | 
42 | def get_nyse_symbols():
43 |     return _get_exchange_data("NYSE")
44 | 
45 | 
46 | def get_amex_symbols():
47 |     return _get_exchange_data("AMEX")
48 | 
49 | 
50 | def get_nasdaq_symbols():
51 |     return _get_exchange_data("NASDAQ")
52 | 
53 | 
54 | def _get_exchange_data(exchange):
55 |     url = get_exchange_url(exchange)
56 |     file_path = os.path.join(os.path.dirname(finsymbols.__file__), exchange)
57 |     if is_cached(file_path):
58 |         with open(file_path, "r") as cached_file:
59 |             symbol_data = cached_file.read()
60 |     else:
61 |         symbol_data = fetch_file(url)
62 |         save_file(file_path, symbol_data)
63 | 
64 |     return get_symbol_list(symbol_data, exchange)
65 | 


--------------------------------------------------------------------------------
/finsymbols/symbol_helper.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import urllib2 as urllib
 3 | except ImportError:  # python3
 4 |     import urllib.request as urllib
 5 | import os
 6 | import datetime
 7 | import sys
 8 | import finsymbols
 9 | import pprint
10 | import csv
11 | import re
12 | 
13 | 
14 | def get_symbol_list(symbol_data, exchange_name):
15 | 
16 |     csv_file = exchange_name + '.csv'
17 | 
18 |     symbol_list = list()
19 |     symbol_data = re.split("\r?\n", symbol_data)
20 | 
21 |     headers = symbol_data[0]
22 |     # symbol,company,sector,industry,headquaters
23 |     symbol_data = list(csv.reader(symbol_data, delimiter=','))
24 |     # We need to cut off the the last row because it is a null string
25 |     for row in symbol_data[1:-1]:
26 |         symbol_data_dict = dict()
27 |         symbol_data_dict['symbol'] = row[0]
28 |         symbol_data_dict['company'] = row[1]
29 |         symbol_data_dict['sector'] = row[6]
30 |         symbol_data_dict['industry'] = row[7]
31 |      
32 |         symbol_list.append(symbol_data_dict)
33 |     return symbol_list
34 | 
35 | 
36 | def save_file(file_path, file_data):
37 |     if isinstance(file_data, str):
38 |         with open(file_path, "w") as saved_file:
39 |             saved_file.write(file_data)
40 |     elif isinstance(file_data, bytes):
41 |         with open(file_path, "wb") as saved_file:
42 |             saved_file.write(file_data.encode('utf-8'))
43 | 
44 | 
45 | def get_exchange_url(exchange):
46 |     return ("http://www.nasdaq.com/screening/companies-by-industry.aspx?"
47 |             "exchange={}&render=download".format(exchange))
48 | 
49 | 
50 | def is_cached(file_path):
51 |     '''
52 |     Checks if the file cached is still valid
53 |     '''
54 |     if not os.path.exists(file_path):
55 |         return False
56 | 
57 |     file_time = datetime.datetime.fromtimestamp(os.path.getctime(file_path))
58 |     current_time = datetime.datetime.now()
59 |     file_age = (current_time - file_time).total_seconds()
60 | 
61 |     if file_age > 86400:
62 |         return False
63 |     else:
64 |         return True
65 | 
66 | 
67 | def fetch_file(url):
68 |     '''
69 |     Gets and downloads files
70 |     '''
71 |     file_fetcher = urllib.build_opener()
72 |     file_fetcher.addheaders = [('User-agent', 'Mozilla/5.0')]
73 |     file_data = file_fetcher.open(url).read()
74 |     if isinstance(file_data, str):  # Python2
75 |         return file_data
76 |     elif isinstance(file_data, bytes):  # Python3
77 |         return file_data.decode("utf-8")
78 | 
79 | 
80 | def wiki_html(url, file_name):
81 |     '''
82 |     Obtains html from Wikipedia
83 |     Note: API exist but for my use case. Data returned was not parsable. Preferred to use html
84 |     python-wikitools - http://code.google.com/p/python-wikitools/
85 |     Ex. http://en.wikipedia.org/w/api.php?format=xml&action=query&titles=List_of_S%26P_500_companies&prop=revisions&rvprop=content
86 |     '''
87 |     file_path = os.path.join(os.path.dirname(finsymbols.__file__), file_name)
88 | 
89 |     if is_cached(file_path):
90 |         with open(file_path, "rb") as sp500_file:
91 |             return sp500_file.read()
92 |     else:
93 |         wiki_html = fetch_file('http://en.wikipedia.org/wiki/{}'.format(url))
94 |         # Save file to be used by cache
95 |         save_file(file_path, wiki_html)
96 |         return wiki_html
97 | 


--------------------------------------------------------------------------------