├── .editorconfig
├── .gitignore
├── .travis.yml
├── 01-basics.ipynb
├── 01-presentation-example
    ├── 01_simple_open.py
    ├── 02_open_file.py
    ├── 03_data_manipulation.py
    ├── 04_perform_calculation.py
    ├── 04_perform_calculation_no_comments.py
    ├── 05_storing_data.py
    ├── 06_pandas.py
    └── 06_pandas_no_comments.py
├── 02-selenium-examples
    └── download_calendar.py
├── 02-selenium-safari
    ├── create_names.py
    ├── gather_links_for_processing.py
    ├── html_to_pdf.py
    ├── main.py
    ├── merge_pdf_files.py
    ├── process_html_remove_junk.py
    ├── requirements.txt
    └── zip_folder.py
├── 02-webscrape-celery
    ├── __init__.py
    ├── basic_consumer.py
    ├── basic_producer.py
    ├── consumer.py
    ├── data.html
    ├── producer.py
    └── urls.txt
├── 02-webscraping.ipynb
├── 03-tidy-data.ipynb
├── 04-other-analysis
    ├── Autoregression_retail_sales.ipynb
    ├── Dynamic Linear Regression Models in Python.ipynb
    ├── example_pandas.py
    └── read_sec.py
├── 04-pandas-other
    └── pandas-selecting-rows.ipynb
├── 04-pandas.ipynb
├── 05-data-analysis.ipynb
├── 05-other-visualizations
    ├── Visualization.ipynb
    └── visualize-football-stadiums.ipynb
├── 06-data-visualizations.ipynb
├── 06-flask
    └── flask-rss
    │   ├── README.md
    │   ├── main.py
    │   ├── static
    │       ├── css
    │       │   ├── bootstrap-theme.css
    │       │   ├── bootstrap-theme.css.map
    │       │   ├── bootstrap-theme.min.css
    │       │   ├── bootstrap-theme.min.css.map
    │       │   ├── bootstrap.css
    │       │   ├── bootstrap.css.map
    │       │   ├── bootstrap.min.css
    │       │   ├── bootstrap.min.css.map
    │       │   ├── reader.css
    │       │   └── style.css
    │       ├── fonts
    │       │   ├── glyphicons-halflings-regular.eot
    │       │   ├── glyphicons-halflings-regular.svg
    │       │   ├── glyphicons-halflings-regular.ttf
    │       │   ├── glyphicons-halflings-regular.woff
    │       │   └── glyphicons-halflings-regular.woff2
    │       └── js
    │       │   ├── bootstrap.js
    │       │   ├── bootstrap.min.js
    │       │   ├── jquery-2.2.0.js
    │       │   └── npm.js
    │   └── templates
    │       ├── index.html
    │       ├── layout.html
    │       ├── notfound.html
    │       ├── reader.html
    │       └── table.html
├── 07-airflow
    ├── README.md
    ├── dags
    │   ├── example_postgres.py
    │   ├── sql
    │   │   ├── stock_insert.sql
    │   │   └── stock_schema.sql
    │   ├── stock_analysis_dag.py
    │   └── stocks.py
    └── docker-compose.yml
├── AUTHORS.rst
├── CONTRIBUTING.rst
├── HISTORY.rst
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.rst
├── data
    ├── 20180806_ALL_EQUITY_meetup.csv
    ├── WA_Fn-UseC_-HR-Employee-Attrition.xlsx
    ├── WMT_US.csv
    ├── WMT_US_pandas.csv
    ├── WMT_US_updated.csv
    ├── billboard.csv
    ├── country_timeseries.csv
    ├── fortune_1000.csv
    ├── gapminder.tsv
    ├── global_equity_historic_sales_1999_2018_usd_all_meetup.csv
    ├── linkedin_industries.html
    ├── msft_stock_key_data.csv
    ├── pew.csv
    ├── portfolio.csv
    ├── pycon_sponsor_levels.csv
    ├── pycon_sponsors.csv
    ├── retail_sales.csv
    ├── sponsors_vlookup.csv
    ├── stl.csv
    ├── stlcom_larget_employers.xlsx
    ├── stlregionalchamber_largest_employers_.xlsx
    ├── stock_data_simple.csv
    ├── stock_data_simple.xlsx
    ├── stock_description.csv
    ├── table1.csv
    ├── table2.csv
    ├── table3.csv
    ├── table4a.csv
    ├── table4b.csv
    └── weather.csv
├── docs
    ├── Makefile
    ├── authors.rst
    ├── conf.py
    ├── contributing.rst
    ├── history.rst
    ├── index.rst
    ├── installation.rst
    ├── make.bat
    ├── readme.rst
    └── usage.rst
├── img
    ├── basics
    │   ├── basic_python_style.png
    │   ├── built-in_data_structures.png
    │   ├── built-in_functions.png
    │   ├── built-in_len.png
    │   ├── calculations.png
    │   ├── cell.png
    │   ├── cell_ex.png
    │   ├── cell_types.png
    │   ├── cells.png
    │   ├── comments.png
    │   ├── data-types.png
    │   ├── data_collections.png
    │   ├── excel-built-in-string.png
    │   ├── excel-built-in.png
    │   ├── excel-pre-installed-add-ins.png
    │   ├── jupyter-method.png
    │   ├── pycharm-function-pop.png
    │   ├── pycharm-function-popup.png
    │   ├── pycharm-methods.png
    │   ├── pycon-files.png
    │   ├── pycon_sponsor_levels.png
    │   ├── pycon_sponsors.png
    │   ├── python-pre-installed-add-ins.png
    │   ├── reserved_words.png
    │   ├── standard-library-import.png
    │   ├── standard-library.png
    │   └── vscode-method.png
    ├── dataframe.png
    ├── dataframe_components.png
    ├── excel_table.png
    ├── pandas_dataframe.png
    └── split_apply_combine.png
├── requirements_dev.txt
├── section1-01-basics_but_important_stuff.ipynb
├── section1-02-files_lists_dictionaries.ipynb
├── section1_challenge_1.py
├── section1_challenge_1_answer.py
├── section1_challenge_2.py
├── section1_challenge_2_answer.py
├── section1_challenge_3.py
├── section1_challenge_3_answer.py
├── section2-01-real-world-example.py
├── section2-02-real-world-example-refactored.py
├── section2_challenge.rst
├── setup.cfg
├── setup.py
└── tox.ini


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # http://editorconfig.org
 2 | 
 3 | root = true
 4 | 
 5 | [*]
 6 | indent_style = space
 7 | indent_size = 4
 8 | trim_trailing_whitespace = true
 9 | insert_final_newline = true
10 | charset = utf-8
11 | end_of_line = lf
12 | 
13 | [*.bat]
14 | indent_style = tab
15 | end_of_line = crlf
16 | 
17 | [LICENSE]
18 | insert_final_newline = false
19 | 
20 | [Makefile]
21 | indent_style = tab
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | 
 58 | # Flask stuff:
 59 | instance/
 60 | .webassets-cache
 61 | 
 62 | # Scrapy stuff:
 63 | .scrapy
 64 | 
 65 | # Sphinx documentation
 66 | docs/_build/
 67 | 
 68 | # PyBuilder
 69 | target/
 70 | 
 71 | # Jupyter Notebook
 72 | .ipynb_checkpoints
 73 | 
 74 | # pyenv
 75 | .python-version
 76 | 
 77 | # celery beat schedule file
 78 | celerybeat-schedule
 79 | 
 80 | # SageMath parsed files
 81 | *.sage.py
 82 | 
 83 | # dotenv
 84 | .env
 85 | 
 86 | # virtualenv
 87 | .venv
 88 | venv/
 89 | ENV/
 90 | 
 91 | # Spyder project settings
 92 | .spyderproject
 93 | .spyproject
 94 | 
 95 | # Rope project settings
 96 | .ropeproject
 97 | 
 98 | # mkdocs documentation
 99 | /site
100 | 
101 | # mypy
102 | .mypy_cache/
103 | venv/
104 | stock_algo/
105 | sec.gov.zip
106 | .ipynb_checkpoints/
107 | .idea/
108 | zip-data/
109 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Config file for automatic testing at travis-ci.org
 2 | 
 3 | language: python
 4 | python:
 5 |   - 3.7
 6 |   - 3.6
 7 |   - 3.5
 8 |   - 2.7
 9 | 
10 | # Command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
11 | install: pip install -U tox-travis
12 | 
13 | # Command to run tests, e.g. python setup.py test
14 | script: tox
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/01-basics.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Intro to Python\n",
  8 |     "\n",
  9 |     "## string"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {
 16 |     "pycharm": {
 17 |      "is_executing": false,
 18 |      "name": "#%%\n"
 19 |     },
 20 |     "scrolled": true
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "tickers = \"GOOG MSFT IBM TSLA\""
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "### Save String to File"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 2,
 37 |    "metadata": {
 38 |     "pycharm": {
 39 |      "name": "#%%\n"
 40 |     }
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "# write to file\n",
 45 |     "f = open('tickers.txt', 'wt')\n",
 46 |     "f.write(tickers)\n",
 47 |     "f.close()"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "### Tuple"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 3,
 60 |    "metadata": {
 61 |     "pycharm": {
 62 |      "name": "#%%\n"
 63 |     }
 64 |    },
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "# tuple\n",
 68 |     "tickers = (\"GOOG\",\n",
 69 |     "          \"MSFT\",\n",
 70 |     "          \"IBM\",\n",
 71 |     "          \"TSLA\")"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "### For-Loop over tuple and sum values"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 4,
 84 |    "metadata": {
 85 |     "pycharm": {
 86 |      "name": "#%%\n"
 87 |     }
 88 |    },
 89 |    "outputs": [
 90 |     {
 91 |      "name": "stdout",
 92 |      "output_type": "stream",
 93 |      "text": [
 94 |       "(1, 1)\n",
 95 |       "The sum of the tuple:\t2\n",
 96 |       "(2, 2)\n",
 97 |       "The sum of the tuple:\t4\n",
 98 |       "(3, 3)\n",
 99 |       "The sum of the tuple:\t6\n"
100 |      ]
101 |     }
102 |    ],
103 |    "source": [
104 |     "list_of_tuples = [(1,1),\n",
105 |     "                (2,2),\n",
106 |     "                (3,3)]\n",
107 |     "\n",
108 |     "for values in list_of_tuples:\n",
109 |     "    print(f\"{values}\")\n",
110 |     "    total = sum(values)\n",
111 |     "    print(f\"The sum of the tuple:\\t{total}\")"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "## For-Loop Over tickers"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 5,
124 |    "metadata": {
125 |     "pycharm": {
126 |      "name": "#%%\n"
127 |     }
128 |    },
129 |    "outputs": [
130 |     {
131 |      "name": "stdout",
132 |      "output_type": "stream",
133 |      "text": [
134 |       "GOOG\n",
135 |       "MSFT\n",
136 |       "IBM\n",
137 |       "TSLA\n"
138 |      ]
139 |     }
140 |    ],
141 |    "source": [
142 |     "# loolist\n",
143 |     "tickers = [\"GOOG\",\n",
144 |     "           \"MSFT\",\n",
145 |     "           \"IBM\",\n",
146 |     "           \"TSLA\"]\n",
147 |     "\n",
148 |     "for ticker in tickers:\n",
149 |     "    print(ticker)"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "## For Loop - String Formatting"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": 6,
162 |    "metadata": {
163 |     "pycharm": {
164 |      "name": "#%%\n"
165 |     }
166 |    },
167 |    "outputs": [
168 |     {
169 |      "name": "stdout",
170 |      "output_type": "stream",
171 |      "text": [
172 |       "Ticker:  GOOG\n",
173 |       "Ticker:  MSFT\n",
174 |       "Ticker:  IBM\n",
175 |       "Ticker:  TSLA\n"
176 |      ]
177 |     }
178 |    ],
179 |    "source": [
180 |     "# loop through list\n",
181 |     "tickers = [\"GOOG\",\"MSFT\",\"IBM\",\"TSLA\"]\n",
182 |     "\n",
183 |     "for ticker in tickers:\n",
184 |     "    print(f\"Ticker:  {ticker}\")"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {},
190 |    "source": []
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": 7,
195 |    "metadata": {
196 |     "pycharm": {
197 |      "name": "#%%\n"
198 |     }
199 |    },
200 |    "outputs": [],
201 |    "source": [
202 |     "tickers = set([\"GOOG\",\n",
203 |     "               \"MSFT\",\n",
204 |     "               \"IBM\",\n",
205 |     "               \"TSLA\"])\n",
206 |     "\n",
207 |     "tickers = (\"GOOG\",\n",
208 |     "           \"MSFT\",\n",
209 |     "           \"IBM\",\n",
210 |     "           \"TSLA\")"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "markdown",
215 |    "metadata": {},
216 |    "source": [
217 |     "# Tuple"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": 9,
223 |    "metadata": {
224 |     "pycharm": {
225 |      "name": "#%%\n"
226 |     }
227 |    },
228 |    "outputs": [],
229 |    "source": [
230 |     "tickers = tuple([\"GOOG\",\n",
231 |     "                \"MSFT\",\n",
232 |     "                \"IBM\",\n",
233 |     "                \"TSLA\"])"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "markdown",
238 |    "metadata": {},
239 |    "source": [
240 |     "# Dictionary"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": 11,
246 |    "metadata": {
247 |     "pycharm": {
248 |      "name": "#%%\n"
249 |     }
250 |    },
251 |    "outputs": [],
252 |    "source": [
253 |     "tickers = {1: \"GOOG\",\n",
254 |     "           2: \"MSFT\",\n",
255 |     "           3: \"IBM\",\n",
256 |     "           4: \"TSLA\"}"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "markdown",
261 |    "metadata": {},
262 |    "source": [
263 |     "# Opening Files"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": 12,
269 |    "metadata": {
270 |     "pycharm": {
271 |      "name": "#%%\n"
272 |     }
273 |    },
274 |    "outputs": [
275 |     {
276 |      "name": "stdout",
277 |      "output_type": "stream",
278 |      "text": [
279 |       "Ticker,Date,Shares,Price\n",
280 |       "\n",
281 |       "GOOG,2019-10-01,100,1\n",
282 |       "\n",
283 |       "MSFT,2019-10-01,200,1\n",
284 |       "\n",
285 |       "IBM,2019-10-01,500,1\n",
286 |       "\n",
287 |       "TSLA,2019-10-01,300,1\n",
288 |       "\n",
289 |       "\n",
290 |       "\n"
291 |      ]
292 |     }
293 |    ],
294 |    "source": [
295 |     "import os\n",
296 |     "# Contents of portfolio.csv:\n",
297 |     "\"\"\"\n",
298 |     "Ticker,Date,Shares,Price\n",
299 |     "GOOG,2019-10-01,100,1\n",
300 |     "MSFT,2019-10-01,200,1\n",
301 |     "IBM,2019-10-01,500,1\n",
302 |     "TSLA,2019-10-01,300,1\n",
303 |     "\"\"\"\n",
304 |     "\n",
305 |     "# basic - open a file\n",
306 |     "file =  open('data/portfolio.csv', 'r')\n",
307 |     "\n",
308 |     "# print each line\n",
309 |     "for line in file:\n",
310 |     "    print(line)\n",
311 |     "\n",
312 |     "# don't forget to close the file\n",
313 |     "file.close()"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "markdown",
318 |    "metadata": {},
319 |    "source": [
320 |     "# Opening Files - Preferred Way"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "code",
325 |    "execution_count": 13,
326 |    "metadata": {
327 |     "pycharm": {
328 |      "is_executing": false,
329 |      "name": "#%%\n"
330 |     }
331 |    },
332 |    "outputs": [
333 |     {
334 |      "name": "stdout",
335 |      "output_type": "stream",
336 |      "text": [
337 |       "['GOOG', '2019-10-01', '100', '1']\n",
338 |       "['MSFT', '2019-10-01', '200', '1']\n",
339 |       "['IBM', '2019-10-01', '500', '1']\n",
340 |       "['TSLA', '2019-10-01', '300', '1']\n",
341 |       "['']\n"
342 |      ]
343 |     }
344 |    ],
345 |    "source": [
346 |     "### Better way to a file\n",
347 |     "### with automatically closes the file for you\n",
348 |     "\n",
349 |     "with open('data/portfolio.csv', 'r') as f:\n",
350 |     "    headers = next(f) # skip a single of input\n",
351 |     "    for line in f:\n",
352 |     "        line = line.strip() #strip the whitespace\n",
353 |     "        parts = line.split(\",\")\n",
354 |     "        print(parts)\n"
355 |    ]
356 |   },
357 |   {
358 |    "cell_type": "markdown",
359 |    "metadata": {},
360 |    "source": [
361 |     "# Example of indexing into lists and if statement"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "code",
366 |    "execution_count": 14,
367 |    "metadata": {
368 |     "pycharm": {
369 |      "is_executing": false,
370 |      "name": "#%%\n"
371 |     }
372 |    },
373 |    "outputs": [
374 |     {
375 |      "name": "stdout",
376 |      "output_type": "stream",
377 |      "text": [
378 |       "Ticker:GOOG\tDate: 2019-10-01\tShares: 100\tPrice: 1\n",
379 |       "\n",
380 |       "Ticker:MSFT\tDate: 2019-10-01\tShares: 200\tPrice: 1\n",
381 |       "\n",
382 |       "Ticker:IBM\tDate: 2019-10-01\tShares: 500\tPrice: 1\n",
383 |       "\n",
384 |       "Ticker:TSLA\tDate: 2019-10-01\tShares: 300\tPrice: 1\n",
385 |       "\n"
386 |      ]
387 |     }
388 |    ],
389 |    "source": [
390 |     "\n",
391 |     "with open(r'data/portfolio.csv', 'r') as f:\n",
392 |     "    headers = next(f)  # skip a single line of input, or skip header\n",
393 |     "    for line in f:\n",
394 |     "        parts = line.split(\",\")\n",
395 |     "        # check if number of items in list greater than 1\n",
396 |     "        # this will skip lines with only 1 element\n",
397 |     "        if len(parts) > 1:\n",
398 |     "            ticker = parts[0] # take the first item in the list\n",
399 |     "            date = parts[1] # take the second item\n",
400 |     "            shares = parts[2]\n",
401 |     "            price = parts[3]\n",
402 |     "            # f-strings formatting\n",
403 |     "            print(f\"Ticker:{ticker}\\tDate: {date}\\tShares: {shares}\\tPrice: {price}\")"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "markdown",
408 |    "metadata": {},
409 |    "source": [
410 |     "# \"Batteries included\" with csv module"
411 |    ]
412 |   },
413 |   {
414 |    "cell_type": "code",
415 |    "execution_count": 15,
416 |    "metadata": {
417 |     "pycharm": {
418 |      "is_executing": false,
419 |      "name": "#%%\n"
420 |     }
421 |    },
422 |    "outputs": [
423 |     {
424 |      "name": "stdout",
425 |      "output_type": "stream",
426 |      "text": [
427 |       "[{'date': '2019-10-01', 'price': '1', 'shares': '100', 'ticker': 'GOOG'},\n",
428 |       " {'date': '2019-10-01', 'price': '1', 'shares': '200', 'ticker': 'MSFT'},\n",
429 |       " {'date': '2019-10-01', 'price': '1', 'shares': '500', 'ticker': 'IBM'},\n",
430 |       " {'date': '2019-10-01', 'price': '1', 'shares': '300', 'ticker': 'TSLA'}]\n"
431 |      ]
432 |     }
433 |    ],
434 |    "source": [
435 |     "import csv\n",
436 |     "from pprint import pprint\n",
437 |     "\n",
438 |     "portfolio = list()  # need to create a list before you try using it\n",
439 |     "# or, more common way to create list\n",
440 |     "portfolio = []  # create a list to store tickers\n",
441 |     "\n",
442 |     "with open(r'data/portfolio.csv', 'r') as f:\n",
443 |     "    rows = csv.reader(f)\n",
444 |     "    headers = next(f)  # skip a single of input\n",
445 |     "    for row in rows:\n",
446 |     "        if len(row) > 1:\n",
447 |     "            record = {\n",
448 |     "                'ticker' : row[0],\n",
449 |     "            'date' : row[1],\n",
450 |     "            'shares' : row[2],\n",
451 |     "            'price': row[3]\n",
452 |     "                }\n",
453 |     "            portfolio.append(record)\n",
454 |     "\n",
455 |     "pprint(portfolio)"
456 |    ]
457 |   },
458 |   {
459 |    "cell_type": "markdown",
460 |    "metadata": {},
461 |    "source": [
462 |     "# Create a function that takes a filename and returns the contents"
463 |    ]
464 |   },
465 |   {
466 |    "cell_type": "code",
467 |    "execution_count": 16,
468 |    "metadata": {
469 |     "pycharm": {
470 |      "is_executing": false,
471 |      "name": "#%%\n"
472 |     }
473 |    },
474 |    "outputs": [
475 |     {
476 |      "name": "stdout",
477 |      "output_type": "stream",
478 |      "text": [
479 |       "[{'date': '2019-10-01', 'price': '1', 'shares': '100', 'ticker': 'GOOG'},\n",
480 |       " {'date': '2019-10-01', 'price': '1', 'shares': '200', 'ticker': 'MSFT'},\n",
481 |       " {'date': '2019-10-01', 'price': '1', 'shares': '500', 'ticker': 'IBM'},\n",
482 |       " {'date': '2019-10-01', 'price': '1', 'shares': '300', 'ticker': 'TSLA'}]\n"
483 |      ]
484 |     }
485 |    ],
486 |    "source": [
487 |     "import csv\n",
488 |     "from pprint import pprint\n",
489 |     "\n",
490 |     "def read_portfolio(filename):\n",
491 |     "\n",
492 |     "    portfolio = list()  # create a list to store tickers\n",
493 |     "\n",
494 |     "    with open(filename, 'r') as f:\n",
495 |     "        rows = csv.reader(f)\n",
496 |     "        headers = next(f)  # skip a single of input\n",
497 |     "        for row in rows:\n",
498 |     "            if len(row) > 1:\n",
499 |     "                record = {\n",
500 |     "                    'ticker' : row[0],\n",
501 |     "                'date' : row[1],\n",
502 |     "                'shares' : row[2],\n",
503 |     "                'price': row[3]\n",
504 |     "                    }\n",
505 |     "                portfolio.append(record)\n",
506 |     "    return portfolio\n",
507 |     "\n",
508 |     "portfolio = read_portfolio(r'data/portfolio.csv')\n",
509 |     "\n",
510 |     "pprint(portfolio)\n"
511 |    ]
512 |   },
513 |   {
514 |    "cell_type": "markdown",
515 |    "metadata": {
516 |     "pycharm": {
517 |      "name": "#%% md\n"
518 |     }
519 |    },
520 |    "source": [
521 |     "# SQL Connectivity"
522 |    ]
523 |   },
524 |   {
525 |    "cell_type": "code",
526 |    "execution_count": 17,
527 |    "metadata": {
528 |     "pycharm": {
529 |      "name": "#%%\n"
530 |     }
531 |    },
532 |    "outputs": [
533 |     {
534 |      "name": "stdout",
535 |      "output_type": "stream",
536 |      "text": [
537 |       "2020-05-14 14:47:03.454172\n",
538 |       "2019-11-01 00:00:00\n",
539 |       "2020-05-14 14:47:03.455172\n"
540 |      ]
541 |     }
542 |    ],
543 |    "source": [
544 |     "# $ pip install sqlalchemy\n",
545 |     "# sqlalchemy_uri = \"dialect+driver://user:password@host:port/dbname\"\n",
546 |     "import sqlalchemy as sa\n",
547 |     "\n",
548 |     "conn = sa.create_engine('sqlite://')\n",
549 |     "\n",
550 |     "conn.execute('''CREATE TABLE zoo\n",
551 |     "    (critter VARCHAR(20) PRIMARY KEY,\n",
552 |     "     count INT,\n",
553 |     "     damages FLOAT)''')\n",
554 |     "\n",
555 |     "import datetime\n",
556 |     "\n",
557 |     "today = datetime.datetime.today()\n",
558 |     "print(today)\n",
559 |     "\n",
560 |     "today = datetime.datetime.strptime(\"11/01/2019\",\"%m/%d/%Y\" )\n",
561 |     "print(today)\n",
562 |     "\n",
563 |     "today = datetime.datetime.now()\n",
564 |     "print(today)"
565 |    ]
566 |   },
567 |   {
568 |    "cell_type": "code",
569 |    "execution_count": null,
570 |    "outputs": [],
571 |    "source": [],
572 |    "metadata": {
573 |     "collapsed": false,
574 |     "pycharm": {
575 |      "name": "#%%\n"
576 |     }
577 |    }
578 |   },
579 |   {
580 |    "cell_type": "code",
581 |    "execution_count": null,
582 |    "metadata": {},
583 |    "outputs": [],
584 |    "source": []
585 |   }
586 |  ],
587 |  "metadata": {
588 |   "kernelspec": {
589 |    "display_name": "Python 3",
590 |    "language": "python",
591 |    "name": "python3"
592 |   },
593 |   "language_info": {
594 |    "codemirror_mode": {
595 |     "name": "ipython",
596 |     "version": 3
597 |    },
598 |    "file_extension": ".py",
599 |    "mimetype": "text/x-python",
600 |    "name": "python",
601 |    "nbconvert_exporter": "python",
602 |    "pygments_lexer": "ipython3",
603 |    "version": "3.7.7"
604 |   }
605 |  },
606 |  "nbformat": 4,
607 |  "nbformat_minor": 1
608 | }


--------------------------------------------------------------------------------
/01-presentation-example/01_simple_open.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | filename = r'data\WMT_US.csv'
 4 | 
 5 | f = open(filename, 'r')
 6 | 
 7 | print(f)
 8 | 
 9 | data = f.read()
10 | 
11 | print(data)
12 | 
13 | f.close()
14 | 
15 | f = open(filename, 'r')  # open file
16 | 
17 | for line in f:
18 |     print(line)
19 | 
20 | f.close()  # close file
21 | 


--------------------------------------------------------------------------------
/01-presentation-example/02_open_file.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import csv
 3 | 
 4 | filename = r'data\WMT_US.csv'
 5 | 
 6 | total = 0.0
 7 | 
 8 | with open(filename, 'r') as f:
 9 |     rows = csv.reader(f)
10 | 
11 |     # save header row
12 |     header = next(f)
13 |     # and skip to next row
14 | 
15 |     for row in rows:
16 |         print(row)
17 | 
18 | 


--------------------------------------------------------------------------------
/01-presentation-example/03_data_manipulation.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import csv
 3 | from datetime import datetime
 4 | 
 5 | filename = r'data\WMT_US.csv'
 6 | 
 7 | with open(filename, 'r') as f:
 8 |     rows = csv.reader(f)
 9 | 
10 |     # skip header row
11 |     header = next(f)
12 | 
13 |     for row in rows:
14 |         row[2] = datetime.strptime(row[2], "%m/%d/%Y")
15 | 
16 |         # convert string to integer
17 |         row[3] = int(row[3])
18 |         row[4] = int(row[4])
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/01-presentation-example/04_perform_calculation.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import csv
 3 | from datetime import datetime
 4 | 
 5 | filename = r'data\WMT_US.csv'
 6 | 
 7 | records = []
 8 | 
 9 | with open(filename, 'r') as f:
10 |     rows = csv.reader(f)
11 | 
12 |     # skip header row
13 |     header = next(f)
14 | 
15 |     for row in rows:
16 |         # print(row)
17 |         # ['WMT US', 'WAL-MART STORES INC', '12/31/2014', '476293988352', '460271988736']
18 | 
19 |         # convert string to date object
20 |         row_date = datetime.strptime(row[2], "%m/%d/%Y")
21 |         # print(row_date)
22 |         # 2003-12-31 00:00:00
23 |         # gives us ability to ask for year
24 |         row_date_year = row_date.year
25 | 
26 |         # need to convert sales and expenses values from string to integer
27 |         # so can perform mathmatical operations
28 |         row_sales = int(row[3])
29 |         row_expenses = int(row[4])
30 | 
31 |         # perform profit calculation
32 |         profit = row_sales - row_expenses
33 | 
34 |         print(f"{row_date_year} Profit = {profit:,}")
35 | 
36 | """
37 | Output:
38 | 
39 |     2014 Profit = 16,021,999,616
40 |     2013 Profit = 16,999,000,064
41 |     2012 Profit = 15,699,000,320
42 |     2011 Profit = 16,389,000,192
43 |     2010 Profit = 14,334,999,552
44 |     2009 Profit = 13,400,000,512
45 |     2008 Profit = 12,730,999,808
46 |     2007 Profit = 11,283,999,744
47 |     2006 Profit = 11,230,999,552
48 |     2005 Profit = 10,266,999,808
49 |     2004 Profit = 9,054,000,128
50 |     2003 Profit = 7,954,999,808
51 | """
52 | 
53 | 


--------------------------------------------------------------------------------
/01-presentation-example/04_perform_calculation_no_comments.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import csv
 3 | from datetime import datetime
 4 | 
 5 | filename = r'data\WMT_US.csv'
 6 | 
 7 | with open(filename, 'r') as f:
 8 |     rows = csv.reader(f)
 9 |     header = next(f)
10 | 
11 |     for row in rows:
12 |         row_date_year = datetime.strptime(row[2], "%m/%d/%Y").year
13 | 
14 |         row_sales = int(row[3])
15 |         row_expenses = int(row[4])
16 | 
17 |         profit = row_sales - row_expenses
18 | 
19 |         print(f"{row_date_year} Profit = {profit:,}")
20 | 
21 | """
22 | Output:
23 | 
24 |     2014 Profit = 16,021,999,616
25 |     2013 Profit = 16,999,000,064
26 |     2012 Profit = 15,699,000,320
27 |     2011 Profit = 16,389,000,192
28 |     2010 Profit = 14,334,999,552
29 |     2009 Profit = 13,400,000,512
30 |     2008 Profit = 12,730,999,808
31 |     2007 Profit = 11,283,999,744
32 |     2006 Profit = 11,230,999,552
33 |     2005 Profit = 10,266,999,808
34 |     2004 Profit = 9,054,000,128
35 |     2003 Profit = 7,954,999,808
36 | """
37 | 
38 | 


--------------------------------------------------------------------------------
/01-presentation-example/05_storing_data.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import csv
 3 | from datetime import datetime
 4 | from pprint import pprint
 5 | 
 6 | filename = r'data\WMT_US.csv'
 7 | 
 8 | records = []
 9 | 
10 | with open(filename, 'r') as f:
11 |     rows = csv.reader(f)
12 | 
13 |     # skip header row
14 |     header = next(f)
15 | 
16 |     for row in rows:
17 |         row[2] = datetime.strptime(row[2], "%m/%d/%Y")
18 |         row[3] = int(row[3])
19 |         row[4] = int(row[4])
20 |         # perform calculation
21 |         profit = row[3] - row[4]
22 | 
23 |         record = {
24 |             "ticker": row[0],
25 |             "name": row[1],
26 |             "date": row[2],
27 |             "sales": row[3],
28 |             "expenses": row[4],
29 |             "profit": profit
30 |         }
31 | 
32 |         records.append(record)
33 | 
34 | pprint(records)
35 | 


--------------------------------------------------------------------------------
/01-presentation-example/06_pandas.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import pandas as pd
  3 | 
  4 | pd.set_option('display.float_format', lambda x: f'{x:.5f}')
  5 | pd.set_option('display.max_columns', 100)
  6 | pd.set_option('display.max_rows', 100)
  7 | pd.set_option('display.width', 600)
  8 | 
  9 | filename = r'data\WMT_US.csv'
 10 | 
 11 | df = pd.read_csv(filename)
 12 | 
 13 | # check the data types of each columns
 14 | print(df.dtypes)
 15 | 
 16 | """
 17 | Ticker            object
 18 | Company Name      object
 19 | Year End          object
 20 | Total Sales        int64
 21 | Total Expenses     int64
 22 | dtype: object
 23 | """
 24 | 
 25 | # convert the date column to python date object
 26 | # which makes it easier to work with
 27 | df['Year End'] = pd.to_datetime(df['Year End'])
 28 | 
 29 | # print(df.dtypes)
 30 | """
 31 | Ticker                    object
 32 | Company Name              object
 33 | Year End          datetime64[ns]
 34 | Total Sales                int64
 35 | Total Expenses             int64
 36 | dtype: object
 37 | 
 38 | """
 39 | 
 40 | # calculate total profit
 41 | df['Total Profit'] = df['Total Sales'] - df['Total Expenses']
 42 | 
 43 | # print(df)
 44 | """
 45 |     Ticker         Company Name   Year End   Total Sales  Total Expenses  Total Profit
 46 | 0   WMT US  WAL-MART STORES INC 2014-12-31  476293988352    460271988736   16021999616
 47 | 1   WMT US  WAL-MART STORES INC 2013-12-31  469162000384    452163000320   16999000064
 48 | 2   WMT US  WAL-MART STORES INC 2012-12-31  446950014976    431251014656   15699000320
 49 | 3   WMT US  WAL-MART STORES INC 2011-12-31  421849006080    405460005888   16389000192
 50 | 4   WMT US  WAL-MART STORES INC 2010-12-31  408214011904    393879012352   14334999552
 51 | 5   WMT US  WAL-MART STORES INC 2009-12-31  405606989824    392206989312   13400000512
 52 | 6   WMT US  WAL-MART STORES INC 2008-12-31  378798997504    366067997696   12730999808
 53 | 7   WMT US  WAL-MART STORES INC 2007-12-31  348650012672    337366012928   11283999744
 54 | 8   WMT US  WAL-MART STORES INC 2006-12-31  312426987520    301195987968   11230999552
 55 | 9   WMT US  WAL-MART STORES INC 2005-12-31  287989006336    277722006528   10266999808
 56 | 10  WMT US  WAL-MART STORES INC 2004-12-31  256329007104    247275006976    9054000128
 57 | 11  WMT US  WAL-MART STORES INC 2003-12-31  229615992832    221660993024    7954999808
 58 | """
 59 | 
 60 | df['Profit Margin'] = (df['Total Profit'] / df['Total Sales']) * 100
 61 | 
 62 | # print(df)
 63 | """
 64 |     Ticker         Company Name   Year End   Total Sales  Total Expenses  Total Profit  Profit Margin
 65 | 0   WMT US  WAL-MART STORES INC 2014-12-31  476293988352    460271988736   16021999616        3.36389
 66 | 1   WMT US  WAL-MART STORES INC 2013-12-31  469162000384    452163000320   16999000064        3.62327
 67 | 2   WMT US  WAL-MART STORES INC 2012-12-31  446950014976    431251014656   15699000320        3.51247
 68 | 3   WMT US  WAL-MART STORES INC 2011-12-31  421849006080    405460005888   16389000192        3.88504
 69 | 4   WMT US  WAL-MART STORES INC 2010-12-31  408214011904    393879012352   14334999552        3.51164
 70 | 5   WMT US  WAL-MART STORES INC 2009-12-31  405606989824    392206989312   13400000512        3.30369
 71 | 6   WMT US  WAL-MART STORES INC 2008-12-31  378798997504    366067997696   12730999808        3.36089
 72 | 7   WMT US  WAL-MART STORES INC 2007-12-31  348650012672    337366012928   11283999744        3.23648
 73 | 8   WMT US  WAL-MART STORES INC 2006-12-31  312426987520    301195987968   11230999552        3.59476
 74 | 9   WMT US  WAL-MART STORES INC 2005-12-31  287989006336    277722006528   10266999808        3.56507
 75 | 10  WMT US  WAL-MART STORES INC 2004-12-31  256329007104    247275006976    9054000128        3.53218
 76 | 11  WMT US  WAL-MART STORES INC 2003-12-31  229615992832    221660993024    7954999808        3.46448
 77 | """
 78 | 
 79 | # percent change needs to be ascending dates
 80 | df.sort_values("Year End", inplace=True)
 81 | df['Sales Growth YoY %'] = df['Total Sales'].pct_change() * 100
 82 | 
 83 | # print(df)
 84 | """
 85 |     Ticker         Company Name   Year End   Total Sales  Total Expenses  Total Profit  Profit Margin  Sales Growth YoY %
 86 | 11  WMT US  WAL-MART STORES INC 2003-12-31  229615992832    221660993024    7954999808        3.46448                 nan
 87 | 10  WMT US  WAL-MART STORES INC 2004-12-31  256329007104    247275006976    9054000128        3.53218            11.63378
 88 | 9   WMT US  WAL-MART STORES INC 2005-12-31  287989006336    277722006528   10266999808        3.56507            12.35131
 89 | 8   WMT US  WAL-MART STORES INC 2006-12-31  312426987520    301195987968   11230999552        3.59476             8.48573
 90 | 7   WMT US  WAL-MART STORES INC 2007-12-31  348650012672    337366012928   11283999744        3.23648            11.59408
 91 | 6   WMT US  WAL-MART STORES INC 2008-12-31  378798997504    366067997696   12730999808        3.36089             8.64735
 92 | 5   WMT US  WAL-MART STORES INC 2009-12-31  405606989824    392206989312   13400000512        3.30369             7.07710
 93 | 4   WMT US  WAL-MART STORES INC 2010-12-31  408214011904    393879012352   14334999552        3.51164             0.64275
 94 | 3   WMT US  WAL-MART STORES INC 2011-12-31  421849006080    405460005888   16389000192        3.88504             3.34016
 95 | 2   WMT US  WAL-MART STORES INC 2012-12-31  446950014976    431251014656   15699000320        3.51247             5.95024
 96 | 1   WMT US  WAL-MART STORES INC 2013-12-31  469162000384    452163000320   16999000064        3.62327             4.96968
 97 | 0   WMT US  WAL-MART STORES INC 2014-12-31  476293988352    460271988736   16021999616        3.36389             1.52015
 98 | """
 99 | 
100 | new_filename = filename.replace(".csv", "_pandas.csv")
101 | 
102 | df.to_csv(new_filename)
103 | 


--------------------------------------------------------------------------------
/01-presentation-example/06_pandas_no_comments.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pandas as pd
 3 | 
 4 | input_filename = r'data\WMT_US.csv'
 5 | output_filename = r'data\WMT_US_output.csv'
 6 | 
 7 | df = pd.read_csv(input_filename)
 8 | 
 9 | df['Total Profit'] = df['Total Sales'] - df['Total Expenses']
10 | 
11 | df.to_csv(output_filename)
12 | 
13 | print(df)
14 | """
15 |     Ticker         Company Name    Year End   Total Sales  Total Expenses  Total Profit
16 | 0   WMT US  WAL-MART STORES INC  12/31/2014  476293988352    460271988736   16021999616
17 | 1   WMT US  WAL-MART STORES INC  12/31/2013  469162000384    452163000320   16999000064
18 | 2   WMT US  WAL-MART STORES INC  12/31/2012  446950014976    431251014656   15699000320
19 | 3   WMT US  WAL-MART STORES INC  12/31/2011  421849006080    405460005888   16389000192
20 | 4   WMT US  WAL-MART STORES INC  12/31/2010  408214011904    393879012352   14334999552
21 | 5   WMT US  WAL-MART STORES INC  12/31/2009  405606989824    392206989312   13400000512
22 | 6   WMT US  WAL-MART STORES INC  12/31/2008  378798997504    366067997696   12730999808
23 | 7   WMT US  WAL-MART STORES INC  12/31/2007  348650012672    337366012928   11283999744
24 | 8   WMT US  WAL-MART STORES INC  12/31/2006  312426987520    301195987968   11230999552
25 | 9   WMT US  WAL-MART STORES INC  12/31/2005  287989006336    277722006528   10266999808
26 | 10  WMT US  WAL-MART STORES INC  12/31/2004  256329007104    247275006976    9054000128
27 | 11  WMT US  WAL-MART STORES INC  12/31/2003  229615992832    221660993024    7954999808
28 | """
29 | 


--------------------------------------------------------------------------------
/02-selenium-examples/download_calendar.py:
--------------------------------------------------------------------------------
  1 | #! py27w
  2 | import os, time
  3 | from datetime import datetime
  4 | from datetime import date
  5 | from datetime import timedelta
  6 | from selenium import webdriver
  7 | from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
  8 | from selenium.common.exceptions import NoSuchElementException
  9 | from selenium.webdriver.common.by import By
 10 | from selenium.webdriver.support.ui import WebDriverWait
 11 | from selenium.webdriver.support import expected_conditions as EC
 12 | 
 13 | fp = webdriver.FirefoxProfile()
 14 | fp.set_preference('browser.download.folderList', 2)
 15 | fp.set_preference("browser.download.manager.showWhenStarting", False)
 16 | fp.set_preference('browser.download.dir', os.getcwd())
 17 | fp.set_preference("browser.helperApps.neverAsk.saveToDisk", 'application/vnd.ms-excel')
 18 | fp.set_preference("browser.download.dir", "c:\\tmp");
 19 | driver = webdriver.Firefox(firefox_profile=fp)
 20 | driver.get('https://www.zacks.com/earnings/earnings-reports')
 21 | 
 22 | 
 23 | def click_calendar():
 24 |     try:
 25 |         element_xpath = '//*[@id="earnings_release"]/div[1]/p/a'
 26 |         element = WebDriverWait(driver, 10).until(
 27 |             lambda driver: driver.find_element_by_xpath(element_xpath).click()
 28 |         )
 29 |     finally:
 30 |         print("clicked calendar")
 31 | 
 32 | 
 33 | def click_prev_day(x):
 34 |     s = 'datespan_%d' % (x)
 35 |     try:
 36 |         WebDriverWait(driver, 10).until(
 37 |             lambda driver: driver.find_element_by_id(s).click()
 38 |         )
 39 |     except:
 40 |         result = False
 41 |     else:
 42 |         result = True
 43 |     return result
 44 | 
 45 | 
 46 | def click_export():
 47 |     try:
 48 |         element = WebDriverWait(driver, 10).until(
 49 |             lambda driver: driver.find_element_by_id('export_excel').click()
 50 |         )
 51 |     except:
 52 |         result = False
 53 |     else:
 54 |         result = True
 55 |     return result
 56 | 
 57 | 
 58 | def click_prev_month():
 59 |     try:
 60 |         driver.find_element_by_id('prevCal').click()
 61 |     except:
 62 |         result = False
 63 |     else:
 64 |         result = True
 65 |     i = 31
 66 |     while i > 27:
 67 |         try:
 68 |             click_prev_day(i)
 69 |             return False
 70 |         except:
 71 |             print('could not find %s in prev month' % (i))
 72 |             i -= 1
 73 | 
 74 | 
 75 | def subtract_day(n):
 76 |     y = n - 1
 77 |     return y
 78 | 
 79 | 
 80 | def start_date():
 81 |     return datetime(2016, 2, 29)
 82 | 
 83 | 
 84 | def click_to_start_date():
 85 |     start_date = datetime(2016, 2, 28)
 86 |     a = date.today()
 87 |     b = start_date
 88 |     c = a.month - b.month
 89 |     if c > 0:
 90 |         click_calendar()
 91 |         while c > 0:
 92 |             click_prev_month()
 93 |             c -= 1
 94 |         try:
 95 |             click_prev_day(31)
 96 |         except:
 97 |             click_prev_day(30)
 98 | 
 99 | 
100 | def main():
101 |     # click_to_start_date()
102 |     # sdate = start_date()
103 |     m = 12
104 |     while m > 0:
105 |         m -= 1
106 |         for x in range(31, 0, -1):
107 |             click_calendar()
108 |             click_prev_day(x)
109 |             click_export()
110 | 
111 |         click_calendar()
112 |         click_prev_month()
113 | 
114 | 
115 | if __name__ == '__main__':
116 |     main()
117 | 


--------------------------------------------------------------------------------
/02-selenium-safari/create_names.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import time
 4 | import urllib.parse
 5 | from pyquery import PyQuery as pq
 6 | from bs4 import BeautifulSoup
 7 | import configparser
 8 | 
 9 | 
10 | def create_filenames_for_conversion(filepath, filename, file_extention):
11 |     print(filepath, filename)
12 |     timestr = time.strftime("%Y%m%d-%H%M%S",time.localtime(os.path.getmtime(os.path.join(filepath, filename))))
13 |     filename = filename.replace(file_extention,"")
14 |     filename = filename.translate(dict((ord(char), None) for char in '\/*?:"<>|,.'))
15 |     filename_html, filename_pdf = os.path.join(timestr + '_' + filename + '(clean)' + file_extention), os.path.join(timestr+'_'+filename + '(clean).pdf')
16 |     print('starting creation of: ' + filename_html)
17 |     return filename_html, filename_pdf
18 | 
19 | 
20 | def create_filename_from_url(url):
21 |     url, fragment = urllib.parse.urldefrag(url)
22 |     parsed = urllib.parse.urlsplit(url)
23 |     stripped = parsed.path.replace(URL_REPLACE, '')
24 |     filename = stripped.translate(dict((ord(char), None) for char in '\/*?:"<>|'))
25 |     print(filename)
26 |     return filename
27 | 
28 | def create_folder_path_from_url(base_dir, url):
29 |     path = os.path.join(base_dir, str(url.split("/")[5]+"_"+url.split("/")[6]).translate(dict((ord(char), None) for char in '\/*?:"<>|')))
30 |     if os.path.exists(path) != True:
31 |         os.makedirs(path)
32 |     print(path)
33 |     return path
34 | 
35 | 
36 | def create_file(filename, w_page_source, URL_WEBSITE):
37 |     d = pq(w_page_source, parser='html')
38 |     ab = d.make_links_absolute(URL_WEBSITE)
39 |     soup = BeautifulSoup(ab.html(), "html.parser")
40 |     try:
41 |         with open(filename, "w", encoding='utf-8') as f:
42 |             f.write(str(soup.decode_contents))
43 |     except:
44 |         print('something broke: ', filename)
45 |     return filename
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/02-selenium-safari/gather_links_for_processing.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | import configparser
 3 | import time
 4 | from create_names import create_file
 5 | #sys.stdout = codecs.getwriter('utf8')(sys.stdout)
 6 | import urllib.parse
 7 | import lxml
 8 | import lxml.html
 9 | from lxml.html import parse, tostring, open_in_browser, fromstring
10 | 
11 | 
12 | def get_toc_links(filename, w_page_source, URL_WEBSITE,toc_xpath=None):
13 |     create_file(filename, w_page_source, URL_WEBSITE)
14 |     html = lxml.html.fromstring(w_page_source)
15 |     html.make_links_absolute(URL_WEBSITE)
16 |     ab = lxml.html.tostring(html,pretty_print=True, method="html")
17 |     soup = BeautifulSoup(ab, 'lxml')
18 |     links = []
19 |     for link in soup.find_all('a'):
20 |         if 'href' in link.attrs:
21 |             links.append(str(link.attrs['href']))
22 |     urls = []
23 |     for i in links:
24 |         url, fragment = urllib.parse.urldefrag(i)
25 |         urls.append(url)
26 |     urls = f7(urls)
27 |     newurls = []
28 |     for i in urls:
29 |         if 'htm' in i:
30 |             newurls.append(i)
31 |     return(newurls)
32 | 
33 | def f7(seq):
34 |     seen = set()
35 |     seen_add = seen.add
36 |     return [x for x in seq if not (x in seen or seen_add(x))]
37 | 


--------------------------------------------------------------------------------
/02-selenium-safari/html_to_pdf.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # try importing scandir and if found, use it as it's a few magnitudes of an order faster than stock os.walk
 4 | 
 5 | import sys
 6 | import os
 7 | # Generate type library so that we can access constants
 8 | 
 9 | import process_html_remove_junk
10 | def convertHTML2PDF(htmlPath, pdfPath):
11 |     import win32com.client.makepy
12 |     import win32com.client
13 |     from win32com.client import Dispatch
14 |     from win32com.client.dynamic import ERRORS_BAD_CONTEXT
15 |     import winerror
16 |     win32com.client.makepy.GenerateFromTypeLibSpec('Acrobat')
17 |     # Use Unicode characters instead of their ascii psuedo-replacements
18 |     UNICODE_SNOB = 0
19 |     'Convert an HTML document to PDF format'
20 |     # Connect to Adobe Acrobat
21 |     import win32com.client
22 |     avDoc = win32com.client.DispatchEx('AcroExch.AVDoc')
23 |     avDoc.Open(os.path.abspath(htmlPath), 'html2pdf')
24 |     # Save in PDF format
25 |     pdDoc = avDoc.GetPDDoc()
26 |     pdDoc.Save(win32com.client.constants.PDSaveFull, os.path.abspath(pdfPath))
27 |     pdDoc.Close()
28 |     # Close HTML document without prompting to save
29 |     avDoc.Close(True)
30 | 
31 | def file_conversion(folder):
32 |     nfolder = os.path.join(folder,'clean')
33 |     #folder = os.path.normpath(sys.argv[1])
34 |     if nfolder is None:
35 |         directory = 'C:\\HTML'
36 |         files = process_html_remove_junk.walk_dir_fullfilename(directory)
37 |     else:
38 |         files=[]
39 |         files = [os.path.join(nfolder, x) for x in os.listdir(nfolder)]
40 |     for filename in files:
41 |         basename = os.path.basename(filename)
42 |         extname = os.path.splitext(basename)
43 |         dirname = os.path.dirname(filename)
44 |         pdf = os.path.join(folder,'pdf', extname[0]+'.pdf')
45 |         try:
46 |             print(pdf)
47 |             convertHTML2PDF(filename, pdf)
48 |         except:
49 |             print('problem with: ' + filename)
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/02-selenium-safari/main.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import os
  4 | import selenium
  5 | from selenium import webdriver
  6 | import create_names
  7 | from create_names import create_folder_path_from_url, create_filename_from_url, create_file
  8 | from zip_folder import zip_directory
  9 | from merge_pdf_files import pyMerger
 10 | import time
 11 | import gather_links_for_processing
 12 | from gather_links_for_processing import get_toc_links
 13 | import random
 14 | import process_html_remove_junk
 15 | import html_to_pdf
 16 | 
 17 | 
 18 | def file_merge(directory):
 19 |     for path, dirnames, files in os.walk(directory):
 20 |         pyMerger(path)
 21 |         print(path)
 22 | 
 23 | def post_process():
 24 |     file_merge(BASE_DIR)
 25 |     zip_directory(BASE_DIR)
 26 | 
 27 | def pause_for_random_time():
 28 |     time.sleep(random.randint(3,7))
 29 | 
 30 | def process_html_files(directory=None):
 31 |     files_processed = process_html_remove_junk.process_html_files_removing_junk(directory)
 32 |     return files_processed
 33 | 
 34 | def process_cleaned_files_into_pdf(directory):
 35 |     files_processed_pdf = html_to_pdf.file_conversion(directory)
 36 |     return files_processed_pdf
 37 | 
 38 | def grab_urls_from_file(INPUT_FILE):
 39 |     file = INPUT_FILE
 40 |     urls = []
 41 |     list_of_list_of_filenames=[]
 42 |     with open(file, 'r') as f:
 43 |         urls = f.read().splitlines()
 44 |     if len(urls) < 1:
 45 |         urls = [sys.argv[1]]
 46 |         print(urls)
 47 |     return urls
 48 | 
 49 | def main():
 50 |     '''
 51 |     function which calls file with urls to process
 52 |     '''
 53 |     if len(sys.argv) < 2:
 54 |             sys.exit(0)
 55 | 
 56 |     w = webdriver.Chrome()
 57 | 
 58 |     domain_url, base_login = URL_WEBSITE, URL_LOGIN
 59 |     w.get(domain_url + base_login)
 60 |     loginElem = w.find_element_by_name('email')
 61 |     loginElem.send_keys(USERNAME)
 62 |     loginPass = w.find_element_by_name('password1')
 63 |     loginPass.send_keys(PASSWORD)
 64 |     time.sleep(3)
 65 |     loginPass.submit()
 66 |     time.sleep(3)
 67 | 
 68 |     urls = grab_urls_from_file(INPUT_FILE)
 69 | 
 70 |     for url in urls:
 71 |         w.get(url)
 72 |         base_dir = os.path.abspath(os.sep)
 73 |         path = create_names.create_folder_path_from_url(BASE_DIR, url)
 74 |         filename = os.path.join(path,create_names.create_filename_from_url(url) + '(t).html')
 75 |         page_source = w.page_source
 76 |         toc_table_only = page_source
 77 |         toc = gather_links_for_processing.get_toc_links(filename, w.page_source, URL_WEBSITE)
 78 |         for webpage_url in toc:
 79 |             try:
 80 |                 w.get(webpage_url)
 81 |                 filename = create_names.create_filename_from_url(w.current_url)
 82 |                 fout = create_names.create_file(os.path.join(path,filename + '.html'), w.page_source, URL_WEBSITE)
 83 |             except:
 84 |                 print('something broke: ', filename)
 85 |             pause_for_random_time()
 86 |         list_of_list_of_filenames = process_html_files(path)
 87 |         process_cleaned_files_into_pdf(path)
 88 |         #pyMerger(directory)
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | if __name__ == '__main__':
 99 |     main()
100 | 


--------------------------------------------------------------------------------
/02-selenium-safari/merge_pdf_files.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | def pyMerger(directory):
 3 |     pdfFiles = [f for f in os.listdir(directory) if f.lower().endswith("pdf")]
 4 |     merger = PdfFileMerger()
 5 | 
 6 |     if pdfFiles != []:  # check if directory has pdf files in it
 7 |         for filename in pdfFiles:
 8 |             if filename != "_mergedFull.pdf":  # check if merged file already exists and skip it
 9 |                 merger.append(PdfFileReader(os.path.join(directory, filename), "rb"))
10 | 
11 |         outputFile = os.path.join(r'c:\pdf', directory+"_mergedFull.pdf")
12 |         merger.write(outputFile)  # it will overwrite if final file existed
13 |     else:
14 |         print(directory + " has no pdf files in it.")
15 | 
16 | 


--------------------------------------------------------------------------------
/02-selenium-safari/process_html_remove_junk.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | from bs4 import BeautifulSoup
 4 | import create_names
 5 | from create_names import create_filenames_for_conversion
 6 | 
 7 | 
 8 | 
 9 | def grab_junk_tag(file):
10 |     with open(file, 'r', encoding='utf-8') as f:
11 |         data = f.read()
12 |     bsObj = BeautifulSoup(data, "html.parser")
13 |     head_elements_blacklist = ['topbar t-topbar']
14 |     body_elements_blacklist = ['expanded', 'annotator-modal-wrapper annotator-editor-modal annotator-editor annotator-hide', 'annotator-modal-wrapper annotator-delete-confirm-modal', 'annotator-adder','sbo-reading-menu sbo-menu-top', 'interface-controls interface-controls-top', 'sample-message', 'font-flyout','t-sbo-next sbo-next sbo-nav-bottom', 't-sbo-next sbo-next sbo-nav-top', 't-sbo-prev sbo-prev sbo-nav-bottom', 't-sbo-prev sbo-prev sbo-nav-top', 'reading-controls-bottom']
15 |     footer_elements_blacklist = ['pagefoot t-pagefoot']
16 |     html_elements_blacklists = [{'header': head_elements_blacklist}, {'div': body_elements_blacklist}, {'footer': footer_elements_blacklist}]
17 | 
18 |     for elements in html_elements_blacklists:
19 |         for element, tags in elements.items():
20 |             for tag in tags:
21 |                 try:
22 |                     temp = bsObj.find(element, {'class': tag})
23 |                     temp.decompose()
24 |                     #print('processed: ' + element + ' ' + tag)
25 |                 except:
26 |                     print('error: ' + tag)
27 |                     continue
28 |     return(bsObj)
29 | 
30 | def check_for_folder_and_create(destfolder,additional=None):
31 |     if additional != None:
32 |         new_folders = []
33 |         for folder in additional:
34 |             newfolder = os.path.join(destfolder,folder)
35 |             if not os.path.isdir(newfolder):
36 |                 os.makedirs(newfolder)
37 |             new_folders.append(newfolder)
38 |         return new_folders
39 |     if not os.path.isdir(destfolder):
40 |         os.makedirs(destfolder)
41 |         return destfolder
42 |     
43 | 
44 | def get_fullfilepaths_files_in_folder(folder_to_process, extfilter=None):
45 |     files_in_folder = [os.path.join(folder_to_process, x) for x in os.listdir(folder_to_process) if extfilter in x]
46 |     return files_in_folder
47 | 
48 | def walk_dir_fullfilename(directory, extfilter=None):
49 |     all_files = []
50 |     for path, dirnames, files in os.walk(directory):
51 |         for file in files:
52 |             filepath, filename = path, file
53 |             fullfilepath = os.path.join(path, file)
54 |             if extfilter != None:
55 |                 if extfilter in fullfilepath and '(clean)' not in fullfilepath:
56 |                     all_files.append(fullfilepath)
57 |                 else:
58 |                     pass
59 |             else:
60 |                 all_files.append(fullfilepath)
61 |     return all_files
62 | 
63 | #walk_test=walk_dir_fullfilename(directory, extfilter='htm')
64 | 
65 | def process_html_files_removing_junk(directory):
66 |     #folder = os.path.normpath(sys.argv[1])
67 |     if directory is None:
68 |         directory = 'C:\\HTML'
69 |         files = walk_dir_fullfilename(directory)
70 |     else:
71 |         files = get_fullfilepaths_files_in_folder(directory, extfilter='htm')
72 |     for filename in files:
73 |         try:
74 |             list_of_list_of_filenames = []
75 |             basename = os.path.basename(filename)
76 |             extname = os.path.splitext(basename)
77 |             dirname = os.path.dirname(filename)
78 |             destfolder = directory
79 |             filename_html, filename_pdf = create_names.create_filenames_for_conversion(destfolder, filename, extname[1])
80 |             filepath = check_for_folder_and_create(destfolder,additional=["clean","pdf","html"])
81 |             pdf = os.path.join(filepath[1],filename_pdf)
82 |             html_clean = os.path.join(filepath[0], filename_html)
83 |             try:
84 |                 bsObj = grab_junk_tag(filename)
85 |             except:
86 |                 print('error' + html)
87 |             try:
88 |                 with open(html_clean, "w", encoding='utf-8') as file:
89 |                     file.write(str(bsObj.decode_contents))
90 |             except:
91 |                 with open(html_clean, "w", encoding='utf-8') as file:
92 |                     file.write(str(bsObj.decode_contents))
93 |             list_of_list_of_filenames.append([filename, html_clean, pdf])
94 |         except:
95 |             print('problem with:' + filename)
96 |     return list_of_list_of_filenames


--------------------------------------------------------------------------------
/02-selenium-safari/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/02-selenium-safari/requirements.txt


--------------------------------------------------------------------------------
/02-selenium-safari/zip_folder.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import zipfile
 3 | import shutil
 4 | 
 5 | def zip_folder(folder_path, output_path):
 6 |     parent_folder = os.path.dirname(folder_path)
 7 |     # Retrieve the paths of the folder contents.
 8 |     contents = os.walk(folder_path)
 9 |     try:
10 |         zip_file = zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED)
11 |         for root, folders, files in contents:
12 |             # Include all subfolders, including empty ones.
13 |             for folder_name in folders:
14 |                 absolute_path = os.path.join(root, folder_name)
15 |                 relative_path = absolute_path.replace(parent_folder + '\\','')
16 |                 print("Adding {:d} to archive.".format(absolute_path))
17 |                 zip_file.write(absolute_path, relative_path)
18 |             for file_name in files:
19 |                 absolute_path = os.path.join(root, file_name)
20 |                 relative_path = absolute_path.replace(parent_folder + '\\','')
21 |                 print ("Adding '{:s}' to archive.".format(absolute_path))
22 |                 zip_file.write(absolute_path, relative_path)
23 |         print("'{:s}' created successfully.".format(output_path))
24 |     except IOError: 
25 |         print (message)
26 |         sys.exit(1)
27 |     except OSError:
28 |         print (message)
29 |         sys.exit(1)
30 |     except zipfile.BadZipfile:
31 |         print (message)
32 |         sys.exit(1)
33 |     finally:
34 |         zip_file.close()
35 |         shutil.rmtree(folder_path) 
36 | 
37 | def zip_directory(directory):
38 |     folders =  [ name for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name)) ]
39 |     for i in folders:
40 |         zip_folder(os.path.join(directory, i), os.path.join(directory, i + ".zip"))
41 | 
42 | 


--------------------------------------------------------------------------------
/02-webscrape-celery/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/02-webscrape-celery/__init__.py


--------------------------------------------------------------------------------
/02-webscrape-celery/basic_consumer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import pika
 3 | import requests
 4 | from bs4 import BeautifulSoup
 5 | # python3 -m venv venv
 6 | 
 7 | # activates the virtualenv
 8 | # source venv/bin/activate
 9 | # pip3 install bs4 requests celery pika
10 | # python basic_consumer.py
11 | 
12 | def on_message(channel, method_frame, header_frame, body):
13 |     print(f"-> Starting: [{body}]")
14 |     r = requests.get(body)
15 |     soup = BeautifulSoup(r.text)
16 |     print(f"-> Extracted: {soup.html.head.title}")
17 |     print(f"-> Done: [{body}]")
18 |     channel.basic_ack(delivery_tag=method_frame.delivery_tag)
19 | 
20 | connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost'))
21 | channel = connection.channel()
22 | print('* Handling messages.')
23 | 
24 | channel.basic_consume('pages', on_message)
25 | 
26 | try:
27 |     channel.start_consuming()
28 | except KeyboardInterrupt:
29 |     channel.stop_consuming()
30 | 
31 | connection.close()
32 | 


--------------------------------------------------------------------------------
/02-webscrape-celery/basic_producer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import time
 3 | import pika
 4 | # activate spreadsheets_to_dataframes
 5 | # python basic_producer.py
 6 | 
 7 | print("* Connecting to RabbitMQ broker")
 8 | 
 9 | connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost'))
10 | channel = connection.channel()
11 | channel.queue_declare(queue='pages')
12 | 
13 | def produce():
14 |     with open('webscraping_celery/urls.txt', 'r') as f:
15 |         urls = f.read().splitlines()
16 | 
17 |     for url in urls:
18 |         print(f"* Pushed: [{url}]")
19 |         channel.basic_publish(exchange='', routing_key='pages', body=url)
20 | 
21 | counter = 0
22 | 
23 | produce()
24 | 
25 | connection.close()
26 | 


--------------------------------------------------------------------------------
/02-webscrape-celery/consumer.py:
--------------------------------------------------------------------------------
 1 | 
 2 | #####################
 3 | #   using docker for both broker and backend
 4 | #     $  docker run -d -p 5672:5672 -p 15672:15672 --name url-rabbit rabbitmq:management
 5 | #     $  celery -A consumer worker --loglevel=info
 6 | 
 7 | 
 8 | import requests
 9 | from celery import Celery
10 | #   pip install celery==3.1.21
11 | # ^ windows
12 | 
13 | app = Celery('tasks', broker='amqp://localhost/')
14 | 
15 | @app.task
16 | def download_url(url):
17 |     print(f"-> Starting: [{url}]")
18 |     try:
19 |         req = requests.get(url)
20 |         if req.status_code == 200:
21 | 
22 |             print(f"-> Success Download: [{url}]")
23 |     except:
24 |         print(f'error: {url}')
25 | 
26 | 
27 | #   celery -A consumer worker --loglevel=info
28 | #   ^ run above celery command in terminal while situated in same folder as current file
29 | 
30 | #   from celery.task.control import discard_all
31 | #   discard_all()
32 | #   ^ use above to clear celery queue
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/02-webscrape-celery/data.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/02-webscrape-celery/data.html


--------------------------------------------------------------------------------
/02-webscrape-celery/producer.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from celery import Celery
 3 | import consumer
 4 | 
 5 | app = Celery('tasks', broker='amqp://localhost//')
 6 | 
 7 | def produce():
 8 |     with open(f'urls.txt', 'r') as f:
 9 |         urls = f.read().splitlines()
10 | 
11 |     for url in urls:
12 |         consumer.download_url.delay(url)
13 |         print(f"* Submitted: [{url}]")
14 | 
15 | produce()
16 | 
17 | #####################
18 | 
19 | #   urls.txt    #  example
20 | 
21 | """
22 | http://www.apple.com
23 | http://www.amazon.com
24 | http://www.abc.xyz
25 | http://www.microsoft.com
26 | http://www.facebook.com
27 | http://www.alibabagroup.com
28 | http://www.tencent.com
29 | http://www.berkshirehathaway.com
30 | http://www.jpmorganchase.com
31 | http://www.exxonmobil.com
32 | http://www.jnj.com
33 | http://usa.visa.com
34 | http://www.shell.com
35 | http://www.samsung.com
36 | http://www.bankofamerica.com
37 | http://www.icbc.com.cn
38 | http://www.wellsfargo.com
39 | http://corporate.walmart.com
40 | http://www.nestle.com
41 | http://www.unitedhealthgroup.com
42 | http://www.intel.com
43 | http://www.att.com
44 | http://www.chevron.com
45 | http://www.ccb.com
46 | http://www.homedepot.com
47 | http://www.pfizer.com
48 | http://www.verizon.com
49 | http://www.toyota.co.jp
50 | http://www.ab-inbev.com
51 | http://www.mastercard.com
52 | """
53 | 


--------------------------------------------------------------------------------
/02-webscrape-celery/urls.txt:
--------------------------------------------------------------------------------
 1 | http://www.apple.com
 2 | http://www.amazon.com
 3 | http://www.abc.xyz
 4 | http://www.microsoft.com
 5 | http://www.facebook.com
 6 | http://www.alibabagroup.com
 7 | http://www.tencent.com
 8 | http://www.berkshirehathaway.com
 9 | http://www.jpmorganchase.com
10 | http://www.exxonmobil.com
11 | http://www.jnj.com
12 | http://usa.visa.com
13 | http://www.shell.com
14 | http://www.samsung.com
15 | http://www.bankofamerica.com
16 | http://www.icbc.com.cn
17 | http://www.wellsfargo.com
18 | http://corporate.walmart.com
19 | http://www.nestle.com
20 | http://www.unitedhealthgroup.com
21 | http://www.intel.com
22 | http://www.att.com
23 | http://www.chevron.com
24 | http://www.ccb.com
25 | http://www.homedepot.com
26 | http://www.pfizer.com
27 | http://www.verizon.com
28 | http://www.toyota.co.jp
29 | http://www.ab-inbev.com
30 | http://www.mastercard.com
31 | http://www.cisco.com
32 | http://www.pg.com
33 | http://www.novartis.com
34 | http://www.petrochina.com.cn
35 | http://www.roche.com
36 | http://www.boeing.com
37 | http://www.coca-colacompany.com
38 | http://www.hsbc.com
39 | http://www.tsmc.com
40 | http://www.chinamobileltd.com
41 | http://www.oracle.com
42 | http://www.abchina.com
43 | http://www.netflix.com
44 | http://www.citigroup.com
45 | http://www.lvmh.com
46 | http://www.merck.com
47 | http://www.total.com
48 | http://www.pingan.com
49 | http://www.thewaltdisneycompany.com
50 | http://www.pepsico.com
51 | 


--------------------------------------------------------------------------------
/04-other-analysis/example_pandas.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | filepath = r'2019-11-04T00-02-57_stlregionalchamber_largest_employers.csv'
 4 | 
 5 | df = pd.read_csv(filepath)
 6 | df.columns = df.columns.str.strip().to_list()
 7 | df.columns = df.columns.str.replace(" ","_")
 8 | df.columns = df.columns.str.replace("(","_").str.replace(")","_")
 9 | df.columns = df.columns.str.lower()
10 | 
11 | df_nan = df[df.website.isna()]
12 | df = df.dropna(subset=['website'])
13 | 
14 | df['website'] = df.website.str.replace('www.',"")
15 | df['website'] = df['website'].apply(lambda x : "https://" + x)
16 | 
17 | import tldextract
18 | 
19 | folderpath = r'D:\PROJECTS\presentations\stl_data'
20 | 
21 | import glob
22 | 
23 | files = glob.glob(folderpath + "\\*.csv")
24 | 
25 | import os
26 | 
27 | df_list = []
28 | for file in files:
29 |     df = pd.read_csv(file)
30 |     df['year'] = os.path.basename(file).split("_")[0]
31 |     df_list.append(df)
32 | 
33 | df = pd.concat(df_list)
34 | 
35 | df.columns = df.columns.str.strip().to_list()
36 | df.columns = df.columns.str.replace(" ","_")
37 | df.columns = df.columns.str.replace("(","_").str.replace(")","_")
38 | df.columns = df.columns.str.lower()
39 | df.columns = df.columns.str.replace(".","")
40 | 
41 | df = df.dropna(subset=['website'])
42 | df['website'] = df.website.str.replace('www.',"")
43 | df['website'] = df['website'].apply(lambda x : "https://" + x)
44 | df.sort_values('st_louis_employees', ascending=False)
45 | df['st_louis_employees'] = df.st_louis_employees.astype(int)
46 | df['website_domain'] = df['website'].apply(lambda x: tldextract.extract(x).domain)
47 | 
48 | df_groups = []
49 | 
50 | for i, df_group in df.groupby('website_domain'):
51 |     if df_group.index.size > 1:
52 |         df_group = df_group.sort_values('year')
53 |         df_group['pct_chg'] = df_group['st_louis_employees'].pct_change()
54 |     else:
55 |         df_group['pct_chg'] = None
56 | 
57 |     df_groups.append(df_group)
58 | 
59 | df_all = pd.concat(df_groups)
60 | df_all.sort_values('st_louis_employees', ascending=False)
61 | 


--------------------------------------------------------------------------------
/04-other-analysis/read_sec.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pandas as pd
 3 | 
 4 | df = pd.DataFrame()
 5 | 
 6 | df.from_csv(files)
 7 | 
 8 | 
 9 | subs = []
10 | 
11 | for filename in files:
12 |     print(f"Companies in {filename}")
13 |     zip_filepath = os.path.join(folder_path, filename)
14 |     data_file = zipfile.ZipFile(zip_filepath)
15 | 
16 |     df_sub = pd.read_csv(data_file.open('sub.txt'), sep='\t', error_bad_lines=False)
17 | 
18 |     subs.append(df_sub)
19 | 
20 | revenues = []
21 | for filename in files:
22 |     print(f"Companies in {filename}")
23 |     zip_filepath = os.path.join(folder_path, filename)
24 |     data_file = zipfile.ZipFile(zip_filepath)
25 | 
26 |     df_num = pd.read_csv(data_file.open('num.txt'), encoding="latin1", sep='\t', error_bad_lines=False)
27 |     df_revenues = df_num[df_num['tag'].str.contains('Revenues', regex=True)]
28 |     revenues.append(df_revenues)
29 | 
30 | df_revs = pd.concat(revenues)
31 | 
32 | df_revs.sort_values('ddate', inplace=True)
33 | df_revs['cik'] = df_revs['adsh'].apply(lambda x: x.split("-")[0])
34 | 
35 | for i, df_group in df_revs.groupby('cik'):
36 |     print(df_group)
37 | 
38 | # df_sub_nodupes = df_sub.drop_duplicates(subset='name')
39 | # df_sub_nodupes.head(100)
40 | 
41 | # df['stprinc'].drop_duplicates()
42 | 
43 | df_missouri = df_sub[(df_sub['stprinc'].isin(['MO']) | df_sub['stprma'].isin(['MO'])) ]
44 | print(df_missouri[df_missouri['form'].isin(['10-K'])].sort_values('name'))
45 | 
46 | df_sec_symbols = pd.read_json(r'D:\PROJECTS\presentations\meetup-2019-spreadsheets-to-dataframes\company_tickers.json').T
47 | 
48 | """
49 | curl "https://api-global.morningstar.com/sal-service/v1/stock/newfinancials/0P0000014I/incomeStatement/detail?dataType=A^&reportType=A^&locale=en^&^&operation=export" -H "Sec-Fetch-Mode: cors" -H "Origin: https://www.morningstar.com" -H "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36" -H "Accept: application/json, text/plain, */*" -H "Referer: https://www.morningstar.com/stocks/xnys/cat/financials" -H "X-API-RequestId: 52a823bc-0d1f-6c2a-51a9-fb553553a192" -H "ApiKey: lstzFDEOhfFNMLikKa0am9mgEKLBl49T" -H "X-API-REALTIME-E: eyJlbmMiOiJBMTI4R0NNIiwiYWxnIjoiUlNBLU9BRVAifQ.XmuAS3x5r-0MJuwLDdD4jNC6zjsY7HAFNo2VdvGg6jGcj4hZ4NaJgH20ez313H8An9UJrsUj8ERH0R8UyjQu2UGMUnJ5B1ooXFPla0LQEbN_Em3-IG84YPFcWVmEgcs1Fl2jjlKHVqZp04D21UvtgQ4xyPwQ-QDdTxHqyvSCpcE.ACRnQsNuTh1K_C9R.xpLNZ8Cc9faKoOYhss1CD0A4hG4m0M7-LZQ0fISw7NUHwzQs2AEo9ZXfwOvAj1fCbcE96mbKQo8gr7Oq1a2-piYXM1X5yNMcCxEaYyGinpnf6PGqbdr6zbYZdqyJk0KrxWVhKSQchLJaLGJOts4GlpqujSqJObJQcWWbkJQYKG9K7oKsdtMAKsHIVo5-0BCUbjKVnHJNsYwTsI7xn2Om8zGm4A.nBOuiEDssVFHC_N68tDjVA" -H "X-SAL-ContentType: e7FDDltrTy+tA2HnLovvGL0LFMwT+KkEptGju5wXVTU=" -H "DNT: 1" --compressed
50 | 
51 | """
52 | 
53 | df_missouri_qtr_ann = df_missouri[df_missouri['form'].isin(['10-Q', '10-K'])]
54 | 
55 | df_missouri['instance'].apply(lambda x: x.split("-"))
56 | 
57 | df_microsoft = df_sub[df_sub['name'].str.contains('COCA COLA CO', regex=True)]
58 | 
59 | microsoft_adsh = df_microsoft.adsh.to_list()[0]
60 | 
61 | df_pre = pd.read_csv(data_file.open('pre.txt'), sep='\t', error_bad_lines=False)
62 | df_num = pd.read_csv(data_file.open('num.txt'), sep='\t', error_bad_lines=False)
63 | df_pre.head(100)
64 | 
65 | df_ko_num = df_num[df_num['adsh'].isin(['0000021344-19-000034'])]
66 | df_revenues = df_num[df_num['tag'].str.contains('Revenues', regex=True)]
67 | df_revenues.sort_values('adsh').head(100)
68 | 
69 | df_microsoft_num.sort_values("tag").drop_duplicates(subset=['tag'])
70 | df_microsoft_num = df_microsoft_num[df_microsoft_num['form'].isin(['10-Q', '10-K'])]
71 | 
72 | 
73 | df_ = df_.iloc[:, 0:len(df_.columns.tolist()[0:len(df_head.columns.tolist())])]
74 | df_.columns = df_head.columns.to_list()
75 | 
76 | 


--------------------------------------------------------------------------------
/06-flask/flask-rss/README.md:
--------------------------------------------------------------------------------
1 | # Flask-RSS
2 | $ python main.py
3 | 


--------------------------------------------------------------------------------
/06-flask/flask-rss/main.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import feedparser
 3 | import pandas as pd
 4 | from flask import Flask, render_template
 5 | 
 6 | app = Flask(__name__)
 7 | 
 8 | @app.route("/")
 9 | def index():
10 | 
11 |     feed = feedparser.parse(r'http://www.prweb.com/rss2/daily.xml')
12 | 
13 |     df = pd.json_normalize(feed.entries, sep='_')
14 | 
15 |     df['source'] = "prweb"
16 | 
17 |     df = df.sort_values('published', ascending=False)
18 | 
19 |     df = df[['published', 'link', 'title','source']]
20 | 
21 |     return render_template("reader.html", df=df.itertuples(), columns_to_display=['published', 'Source', 'Headline'])
22 | 
23 | if __name__ == "__main__":
24 |     app.run(debug=True)
25 | 


--------------------------------------------------------------------------------
/06-flask/flask-rss/static/css/bootstrap-theme.min.css.map:
--------------------------------------------------------------------------------
1 | {"version":3,"sources":["less/theme.less","less/mixins/vendor-prefixes.less","less/mixins/gradients.less","less/mixins/reset-filter.less"],"names":[],"mappings":";;;;AAmBA,YAAA,aAAA,UAAA,aAAA,aAAA,aAME,YAAA,EAAA,KAAA,EAAA,eC2CA,mBAAA,MAAA,EAAA,IAAA,EAAA,sBAAA,EAAA,IAAA,IAAA,iBACQ,WAAA,MAAA,EAAA,IAAA,EAAA,sBAAA,EAAA,IAAA,IAAA,iBDvCR,mBAAA,mBAAA,oBAAA,oBAAA,iBAAA,iBAAA,oBAAA,oBAAA,oBAAA,oBAAA,oBAAA,oBCsCA,mBAAA,MAAA,EAAA,IAAA,IAAA,iBACQ,WAAA,MAAA,EAAA,IAAA,IAAA,iBDlCR,qBAAA,sBAAA,sBAAA,uBAAA,mBAAA,oBAAA,sBAAA,uBAAA,sBAAA,uBAAA,sBAAA,uBAAA,+BAAA,gCAAA,6BAAA,gCAAA,gCAAA,gCCiCA,mBAAA,KACQ,WAAA,KDlDV,mBAAA,oBAAA,iBAAA,oBAAA,oBAAA,oBAuBI,YAAA,KAyCF,YAAA,YAEE,iBAAA,KAKJ,aErEI,YAAA,EAAA,IAAA,EAAA,KACA,iBAAA,iDACA,iBAAA,4CAAA,iBAAA,qEAEA,iBAAA,+CCnBF,OAAA,+GH4CA,OAAA,0DACA,kBAAA,SAuC2C,aAAA,QAA2B,aAAA,KArCtE,mBAAA,mBAEE,iBAAA,QACA,oBAAA,EAAA,MAGF,oBAAA,oBAEE,iBAAA,QACA,aAAA,QAMA,sBAAA,6BAAA,4BAAA,6BAAA,4BAAA,4BAAA,uBAAA,8BAAA,6BAAA,8BAAA,6BAAA,6BAAA,gCAAA,uCAAA,sCAAA,uCAAA,sCAAA,sCAME,iBAAA,QACA,iBAAA,KAgBN,aEtEI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDAEA,OAAA,+GCnBF,OAAA,0DH4CA,kBAAA,SACA,aAAA,QAEA,mBAAA,mBAEE,iBAAA,QACA,oBAAA,EAAA,MAGF,oBAAA,oBAEE,iBAAA,QACA,aAAA,QAMA,sBAAA,6BAAA,4BAAA,6BAAA,4BAAA,4BAAA,uBAAA,8BAAA,6BAAA,8BAAA,6BAAA,6BAAA,gCAAA,uCAAA,sCAAA,uCAAA,sCAAA,sCAME,iBAAA,QACA,iBAAA,KAiBN,aEvEI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDAEA,OAAA,+GCnBF,OAAA,0DH4CA,kBAAA,SACA,aAAA,QAEA,mBAAA,mBAEE,iBAAA,QACA,oBAAA,EAAA,MAGF,oBAAA,oBAEE,iBAAA,QACA,aAAA,QAMA,sBAAA,6BAAA,4BAAA,6BAAA,4BAAA,4BAAA,uBAAA,8BAAA,6BAAA,8BAAA,6BAAA,6BAAA,gCAAA,uCAAA,sCAAA,uCAAA,sCAAA,sCAME,iBAAA,QACA,iBAAA,KAkBN,UExEI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDAEA,OAAA,+GCnBF,OAAA,0DH4CA,kBAAA,SACA,aAAA,QAEA,gBAAA,gBAEE,iBAAA,QACA,oBAAA,EAAA,MAGF,iBAAA,iBAEE,iBAAA,QACA,aAAA,QAMA,mBAAA,0BAAA,yBAAA,0BAAA,yBAAA,yBAAA,oBAAA,2BAAA,0BAAA,2BAAA,0BAAA,0BAAA,6BAAA,oCAAA,mCAAA,oCAAA,mCAAA,mCAME,iBAAA,QACA,iBAAA,KAmBN,aEzEI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDAEA,OAAA,+GCnBF,OAAA,0DH4CA,kBAAA,SACA,aAAA,QAEA,mBAAA,mBAEE,iBAAA,QACA,oBAAA,EAAA,MAGF,oBAAA,oBAEE,iBAAA,QACA,aAAA,QAMA,sBAAA,6BAAA,4BAAA,6BAAA,4BAAA,4BAAA,uBAAA,8BAAA,6BAAA,8BAAA,6BAAA,6BAAA,gCAAA,uCAAA,sCAAA,uCAAA,sCAAA,sCAME,iBAAA,QACA,iBAAA,KAoBN,YE1EI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDAEA,OAAA,+GCnBF,OAAA,0DH4CA,kBAAA,SACA,aAAA,QAEA,kBAAA,kBAEE,iBAAA,QACA,oBAAA,EAAA,MAGF,mBAAA,mBAEE,iBAAA,QACA,aAAA,QAMA,qBAAA,4BAAA,2BAAA,4BAAA,2BAAA,2BAAA,sBAAA,6BAAA,4BAAA,6BAAA,4BAAA,4BAAA,+BAAA,sCAAA,qCAAA,sCAAA,qCAAA,qCAME,iBAAA,QACA,iBAAA,KA2BN,eAAA,WClCE,mBAAA,EAAA,IAAA,IAAA,iBACQ,WAAA,EAAA,IAAA,IAAA,iBD2CV,0BAAA,0BE3FI,iBAAA,QACA,iBAAA,oDACA,iBAAA,+CAAA,iBAAA,wEACA,iBAAA,kDACA,OAAA,+GF0FF,kBAAA,SAEF,yBAAA,+BAAA,+BEhGI,iBAAA,QACA,iBAAA,oDACA,iBAAA,+CAAA,iBAAA,wEACA,iBAAA,kDACA,OAAA,+GFgGF,kBAAA,SASF,gBE7GI,iBAAA,iDACA,iBAAA,4CACA,iBAAA,qEAAA,iBAAA,+CACA,OAAA,+GACA,OAAA,0DCnBF,kBAAA,SH+HA,cAAA,ICjEA,mBAAA,MAAA,EAAA,IAAA,EAAA,sBAAA,EAAA,IAAA,IAAA,iBACQ,WAAA,MAAA,EAAA,IAAA,EAAA,sBAAA,EAAA,IAAA,IAAA,iBD6DV,sCAAA,oCE7GI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SD2CF,mBAAA,MAAA,EAAA,IAAA,IAAA,iBACQ,WAAA,MAAA,EAAA,IAAA,IAAA,iBD0EV,cAAA,iBAEE,YAAA,EAAA,IAAA,EAAA,sBAIF,gBEhII,iBAAA,iDACA,iBAAA,4CACA,iBAAA,qEAAA,iBAAA,+CACA,OAAA,+GACA,OAAA,0DCnBF,kBAAA,SHkJA,cAAA,IAHF,sCAAA,oCEhII,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SD2CF,mBAAA,MAAA,EAAA,IAAA,IAAA,gBACQ,WAAA,MAAA,EAAA,IAAA,IAAA,gBDgFV,8BAAA,iCAYI,YAAA,EAAA,KAAA,EAAA,gBAKJ,qBAAA,kBAAA,mBAGE,cAAA,EAqBF,yBAfI,mDAAA,yDAAA,yDAGE,MAAA,KE7JF,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,UFqKJ,OACE,YAAA,EAAA,IAAA,EAAA,qBC3HA,mBAAA,MAAA,EAAA,IAAA,EAAA,sBAAA,EAAA,IAAA,IAAA,gBACQ,WAAA,MAAA,EAAA,IAAA,EAAA,sBAAA,EAAA,IAAA,IAAA,gBDsIV,eEtLI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF8KF,aAAA,QAKF,YEvLI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF8KF,aAAA,QAMF,eExLI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF8KF,aAAA,QAOF,cEzLI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF8KF,aAAA,QAeF,UEjMI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFuMJ,cE3MI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFwMJ,sBE5MI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFyMJ,mBE7MI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF0MJ,sBE9MI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF2MJ,qBE/MI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF+MJ,sBElLI,iBAAA,yKACA,iBAAA,oKACA,iBAAA,iKFyLJ,YACE,cAAA,IC9KA,mBAAA,EAAA,IAAA,IAAA,iBACQ,WAAA,EAAA,IAAA,IAAA,iBDgLV,wBAAA,8BAAA,8BAGE,YAAA,EAAA,KAAA,EAAA,QEnOE,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFiOF,aAAA,QALF,+BAAA,qCAAA,qCAQI,YAAA,KAUJ,OCnME,mBAAA,EAAA,IAAA,IAAA,gBACQ,WAAA,EAAA,IAAA,IAAA,gBD4MV,8BE5PI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFyPJ,8BE7PI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF0PJ,8BE9PI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF2PJ,2BE/PI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF4PJ,8BEhQI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF6PJ,6BEjQI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFoQJ,MExQI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFsQF,aAAA,QC3NA,mBAAA,MAAA,EAAA,IAAA,IAAA,gBAAA,EAAA,IAAA,EAAA,qBACQ,WAAA,MAAA,EAAA,IAAA,IAAA,gBAAA,EAAA,IAAA,EAAA"}


--------------------------------------------------------------------------------
/06-flask/flask-rss/static/css/reader.css:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/06-flask/flask-rss/static/css/reader.css


--------------------------------------------------------------------------------
/06-flask/flask-rss/static/css/style.css:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/06-flask/flask-rss/static/css/style.css


--------------------------------------------------------------------------------
/06-flask/flask-rss/static/fonts/glyphicons-halflings-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/06-flask/flask-rss/static/fonts/glyphicons-halflings-regular.eot


--------------------------------------------------------------------------------
/06-flask/flask-rss/static/fonts/glyphicons-halflings-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/06-flask/flask-rss/static/fonts/glyphicons-halflings-regular.ttf


--------------------------------------------------------------------------------
/06-flask/flask-rss/static/fonts/glyphicons-halflings-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/06-flask/flask-rss/static/fonts/glyphicons-halflings-regular.woff


--------------------------------------------------------------------------------
/06-flask/flask-rss/static/fonts/glyphicons-halflings-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/06-flask/flask-rss/static/fonts/glyphicons-halflings-regular.woff2


--------------------------------------------------------------------------------
/06-flask/flask-rss/static/js/npm.js:
--------------------------------------------------------------------------------
 1 | // This file is autogenerated via the `commonjs` Grunt task. You can require() this file in a CommonJS environment.
 2 | require('../../js/transition.js')
 3 | require('../../js/alert.js')
 4 | require('../../js/button.js')
 5 | require('../../js/carousel.js')
 6 | require('../../js/collapse.js')
 7 | require('../../js/dropdown.js')
 8 | require('../../js/modal.js')
 9 | require('../../js/tooltip.js')
10 | require('../../js/popover.js')
11 | require('../../js/scrollspy.js')
12 | require('../../js/tab.js')
13 | require('../../js/affix.js')


--------------------------------------------------------------------------------
/06-flask/flask-rss/templates/index.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% block title %}
 4 | 
 5 | {% endblock %}
 6 | 
 7 | {% block content %}
 8 | 
 9 | 	<h1>Flask-RSS</h1>
10 | 	<p class="lead">RSS Reader</p>
11 | 	<br /><br /><br />
12 | 	<h2>Feeds</h2>
13 | 	<ul>
14 | 		{% for key, value in featured.items() %}
15 | 			<li>{{ key }}: <a href="{{ url_for('readerPage') }}" id="prweb">View Feed</a></li>
16 | 		{% endfor %}
17 | 	</ul>
18 | 	<br>
19 | 
20 | {% endblock %}
21 | 


--------------------------------------------------------------------------------
/06-flask/flask-rss/templates/layout.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 | <head>
 4 |     <title>{% block title %}{% endblock %} ~ RssReader</title>
 5 |     <meta charset="utf-8">
 6 |     <script src="{{ url_for('static', filename='js/jquery.js') }}"></script>
 7 |     <script src="{{ url_for('static', filename='js/bootstrap.min.js') }}"></script>
 8 |     <link rel="stylesheet" href="{{ url_for('static', filename='css/bootstrap.min.css') }}">
 9 |     <link rel="stylesheet" href="{{ url_for('static', filename='css/style.css') }}">
10 |     {% block header %}{% endblock %}
11 | </head>
12 | 
13 | <body>
14 | 
15 | <div class="container-fluid">
16 | 
17 |     {% block content %}{% endblock %}
18 | </div>
19 | 
20 | <footer class="footer">
21 |     <div class="container">
22 |         <p class="text-muted"><span class="left"><a href="/">Home</a> &Xi; <a href="/reader/">RSS Reader</a></span><span class="right">&copy; Ryan S. McCoy 2019</span></p>
23 |     </div>
24 | </footer>
25 | </body>
26 | 
27 | </html>
28 | 


--------------------------------------------------------------------------------
/06-flask/flask-rss/templates/notfound.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% block title %}
 4 | 	404
 5 | {% endblock %}
 6 | 
 7 | {% block content %}
 8 | 	<h1>404 - Seite nicht gefunden</h1>
 9 | 	<br /><br />
10 | 	<p>Diese Seite gibt es nicht. Kann es vielleicht sein, dass du dich vertippt hast?</p>
11 | 	<p>Oder hat dir irgendjemand einen falschen Link gegeben?! :o Wenn ja, darfst du ihn gerne eine verpassen!</p>
12 | 	<br /><hr><br>
13 | 	<h2>Featured Websites</h2>
14 | 	<ul>
15 | 		{% for key, value in featured.items() %}
16 | 			<li>{{ key }}: <a href="{{ value }}">Zum Feed</a></li>
17 | 		{% endfor %}
18 | 		<!--<li>All-Craft: <a href="/reader/www.all-craft.de/feed/">Zum Feed</a></li>
19 | 		<li>Crap-App ~ Blog: <a href="/reader/blog.crap-app.de/feed/">Zum Feed</a></li>-->
20 | 	</ul>
21 | 	
22 | {% endblock %}


--------------------------------------------------------------------------------
/06-flask/flask-rss/templates/reader.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% block title %}
 4 |     {% if name %}
 5 |         {{ name }}
 6 |     {% else %}
 7 |         Reader
 8 |     {% endif %}
 9 | {% endblock %}
10 | 
11 | {% block content %}
12 |     <table id="list" class="table" style="width:100%;text-align:left;background-color:black;color:gold;">
13 |         <thead>
14 |         {% for column in columns_to_display %}
15 |             <th>{{ column }}</th>
16 |         {%- endfor -%}
17 |         </thead>
18 |         <tbody>
19 | 
20 |         {% for row in df %}
21 |             <tr style="background-color:black;color:gold;">
22 |                 <td>{{ row.published }}</td>
23 |                 <td>{{ row.source }}</td>
24 | 
25 |                 <td><a href="{{ row.link }}">{{ row.title }}</a></td>
26 |             </tr>
27 |         {%- endfor -%}
28 |         </tbody>
29 |     </table>
30 | 
31 | {% endblock %}
32 | 


--------------------------------------------------------------------------------
/06-flask/flask-rss/templates/table.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% block title %}
 4 |     {% if name %}
 5 |         {{ name }}
 6 |     {% else %}
 7 |         Reader
 8 |     {% endif %}
 9 | {% endblock %}
10 | 
11 | {% block content %}
12 |     <table id="list" class="table" style="width:100%;text-align:left;background-color:black;color:gold;">
13 |         <thead>
14 |         {% for column in columns_to_display %}
15 |             <th>{{ column }}</th>
16 |         {%- endfor -%}
17 |         </thead>
18 |         <tbody>
19 | 
20 |         {% for row in df %}
21 |             <tr style="background-color:black;color:gold;">
22 |                 <td>{{ row.published }}</td>
23 |                 <td>{{ row.source }}</td>
24 | 
25 |                 <td><a href="{{ row.link }}">{{ row.title }}</a></td>
26 |             </tr>
27 |         {%- endfor -%}
28 |         </tbody>
29 |     </table>
30 | 
31 | {% endblock %}
32 | 


--------------------------------------------------------------------------------
/07-airflow/README.md:
--------------------------------------------------------------------------------
1 | 
2 | ### Deployment
3 | 
4 | https://airflow.apache.org/docs/apache-airflow/stable/start/docker.html
5 | 


--------------------------------------------------------------------------------
/07-airflow/dags/example_postgres.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | from airflow import DAG
 4 | from airflow.providers.postgres.operators.postgres import PostgresOperator
 5 | 
 6 | default_args = {"owner": "airflow"}
 7 | 
 8 | # create_stock_table, populate_stock_table, get_all_stocks are examples of tasks created by
 9 | # instantiating the Postgres Operator
10 | 
11 | with DAG(
12 |         dag_id="postgres_operator_dag",
13 |         start_date=datetime.datetime(2020, 2, 2),
14 |         schedule_interval="@once",
15 |         default_args=default_args,
16 |         catchup=False,
17 | ) as dag:
18 | 
19 |     create_stock_table = PostgresOperator(
20 |         task_id="create_stock_table",
21 |         postgres_conn_id="postgres_default",
22 |         sql="sql/stock_schema.sql"
23 |     )
24 | 
25 |     populate_stock_table = PostgresOperator(
26 |         task_id="populate_stock_table",
27 |         postgres_conn_id="postgres_default",
28 |         sql="sql/stock_insert.sql"
29 |     )
30 | 
31 |     get_all_stocks = PostgresOperator(
32 |         task_id="get_all_stocks", postgres_conn_id="postgres_default", sql="SELECT * FROM stocks;"
33 |     )
34 | 
35 |     create_stock_table >> populate_stock_table >> get_all_stocks
36 | 


--------------------------------------------------------------------------------
/07-airflow/dags/sql/stock_insert.sql:
--------------------------------------------------------------------------------
1 | insert into stocks values (1, 'MSFT', 'Microsoft', '2018-07-05', 124.35);
2 | insert into stocks values (2, 'GOOG', 'Google', '2019-05-01', 234.42);
3 | insert into stocks values (3, 'TSLA', 'Tesla', '2020-06-23', 2434.22);
4 | insert into stocks values (4, 'AMZN', 'Amazon', '2013-08-11', 2344.34);
5 | 


--------------------------------------------------------------------------------
/07-airflow/dags/sql/stock_schema.sql:
--------------------------------------------------------------------------------
1 | -- create pet table
2 | CREATE TABLE IF NOT EXISTS stocks (
3 |     id SERIAL PRIMARY KEY,
4 |     symbol VARCHAR NOT NULL,
5 |     name VARCHAR NOT NULL,
6 |     date DATE NOT NULL,
7 |     price numeric NOT NULL);
8 | 


--------------------------------------------------------------------------------
/07-airflow/dags/stock_analysis_dag.py:
--------------------------------------------------------------------------------
  1 | from airflow import DAG
  2 | from airflow.operators.python_operator import PythonOperator
  3 | 
  4 | from datetime import datetime, timedelta
  5 | import datetime as dt
  6 | import pandas as pd
  7 | import yfinance as yf
  8 | import requests
  9 | 
 10 | from functools import reduce
 11 | 
 12 | 
 13 | ############################################
 14 | # DEFINE AIRFLOW DAG (SETTINGS + SCHEDULE)
 15 | ############################################
 16 | default_args = {
 17 |      'owner': 'airflow',
 18 |      'depends_on_past': False,
 19 |      'email': ['user@gmail.com'],
 20 |      'email_on_failure': False,
 21 |      'email_on_retry': False,
 22 |      'retries': 1
 23 |     }
 24 | 
 25 | dag = DAG( 'stocks_analysis_ETL_7AM',
 26 |             default_args=default_args,
 27 |             description='Collect Stock Prices For Analysis',
 28 |             catchup=False,
 29 |             start_date= datetime(2020, 6, 23),
 30 |             schedule_interval= '* 7 * * *'
 31 |           )
 32 | 
 33 | tickers = ['AAPL', 'AMZN', 'BLK', 'T', 'TSLA'] # <-- Initial Tickers List. It will be available globally for all functions.
 34 | 
 35 | ####################################################
 36 | # DEFINE PYTHON FUNCTIONS
 37 | ####################################################
 38 | 
 39 | def fetch_prices_function(**kwargs): # <-- Remember to include "**kwargs" in all the defined functions
 40 |     print('1 Fetching stock prices and remove duplicates...')
 41 |     stocks_prices = []
 42 |     for i in range(0, len(tickers)):
 43 |         prices = yf.download(tickers[i], period = 'max').iloc[: , :5].dropna(axis=0, how='any')
 44 |         prices = prices.loc[~prices.index.duplicated(keep='last')]
 45 |         prices = prices.reset_index()
 46 |         prices.insert(loc = 1, column = 'Stock', value = tickers[i])
 47 |         stocks_prices.append(prices)
 48 |     return stocks_prices  # <-- This list is the output of the fetch_prices_function and the input for the functions below
 49 | 
 50 | 
 51 | def stocks_plot_function(**kwargs):
 52 |     print('2 Pulling stocks_prices to concatenate sub-lists to create a combined dataset + write to CSV file...')
 53 |     ti = kwargs['ti']
 54 |     stocks_prices = ti.xcom_pull(task_ids='fetch_prices_task')  # <-- xcom_pull is used to pull the stocks_prices list generated above
 55 |     stock_plots_data = pd.concat(stocks_prices, ignore_index=True)
 56 |     stock_plots_data.to_csv('/Users/anbento/Documents/Data_Sets/Medium/stocks_plots_data.csv', index=False)
 57 | 
 58 |     print('DF Shape: ', stock_plots_data.shape)
 59 |     print(stock_plots_data.head(5))
 60 |     print('Completed \n\n')
 61 | 
 62 | def stocks_table_function(**kwargs):
 63 |     print('3 Creating aggregated dataframe with stock stats for last available date + write to CSV file...')
 64 |     ti = kwargs['ti']
 65 |     stocks_prices = ti.xcom_pull(task_ids='fetch_prices_task') # <-- xcom_pull is used to pull the stocks_prices list generated above
 66 |     stocks_adj_close = []
 67 |     for i in range(0, len(stocks_prices)):
 68 |         adj_price= stocks_prices[i][['Date','Adj Close']]
 69 |         adj_price.set_index('Date', inplace = True)
 70 |         adj_price.columns = [tickers[i]]
 71 |         stocks_adj_close.append(adj_price)
 72 | 
 73 |     stocks_adj_close = reduce(lambda left,right: pd.merge(left, right, left_index = True, right_index = True ,how='outer'), stocks_adj_close)
 74 |     stocks_adj_close.sort_index(ascending = False, inplace = True)
 75 |     stocks_adj_close.index = pd.to_datetime(stocks_adj_close.index).date
 76 | 
 77 | ##########################################
 78 | # DEFINE AIRFLOW OPERATORS
 79 | ##########################################
 80 | 
 81 | fetch_prices_task = PythonOperator(task_id = 'fetch_prices_task',
 82 |                                    python_callable = fetch_prices_function,
 83 |                                    provide_context = True,
 84 |                                    dag= dag )
 85 | 
 86 | stocks_plot_task= PythonOperator(task_id = 'stocks_plot_task',
 87 |                                  python_callable = stocks_plot_function,
 88 |                                  provide_context = True,
 89 |                                  dag= dag)
 90 | 
 91 | stocks_table_task = PythonOperator(task_id = 'stocks_table_task',
 92 |                                   python_callable = stocks_table_function,
 93 |                                   provide_context = True,
 94 |                                   dag= dag)
 95 | 
 96 | ##########################################
 97 | # DEFINE TASKS HIERARCHY
 98 | ##########################################
 99 | 
100 | fetch_prices_task  >> stocks_plot_task >> stocks_table_task
101 | 


--------------------------------------------------------------------------------
/07-airflow/dags/stocks.py:
--------------------------------------------------------------------------------
 1 | # import json
 2 | # from datetime import datetime, timedelta
 3 | #
 4 | # import redis
 5 | # from airflow.models import DAG
 6 | # from airflow.operators import PythonOperator
 7 | #
 8 | # stocks = ('AAPL', 'AMZN', 'GOOGL', 'MSFT',
 9 | #           'FB', 'BABA', 'BRK.B', 'JPM',
10 | #           'XOM', 'JNJ', 'V', 'BAC', 'WFC',
11 | #           'WMT', 'UNH', 'INTC', 'T', 'CVX',
12 | #           'HD', 'PFE', 'VZ', 'MA', 'CSCO', 'PG',
13 | #           'BA', 'KO', 'ORCL', 'NFLX', 'C', 'MRK',
14 | #           'DIS')
15 | #
16 | #
17 | # def get_stocks(ds, **context):
18 | #     symbol = context['params']['symbol']
19 | #
20 | #     pg_hook = postgres_hook(postgres_conn_id='stocks')
21 | #     api_hook = http_hook(http_conn_id='alphavantage', method='GET')
22 | #
23 | #     # If either of these raises an exception then we'll be notified via
24 | #     # Airflow
25 | #     resp = api_hook.run(f'query?function=TIME_SERIES_DAILY_ADJUSTED&symbol={symbol}&apikey=537201H9R203WT4C&datatype=csv')
26 | #     resp = json.loads(resp.content)
27 | #
28 | #     # These are the only valid stocks the DB supports at the moment. Anything
29 | #     # else that turns up will be ignored.
30 | #
31 | #     stocks_insert = f"""INSERT INTO stocks (symbol, valid_until, price)
32 | #                       VALUES ({symbol}, {valid_until}, {price});"""
33 | #
34 | #     # If this raises an exception then we'll be notified via Airflow
35 | #     valid_until = datetime.fromtimestamp(resp['timestamp'])
36 | #
37 | #     for iso2, price in resp['stocks'].items():
38 | #         # If converting the price to a float fails for whatever reason then
39 | #         # just move on.
40 | #         try:
41 | #             price = float(price)
42 | #         except:
43 | #             continue
44 | #
45 | #         iso2 = iso2.upper().strip()
46 | #
47 | #         if iso2 not in stocks or price < 0:
48 | #             continue
49 | #
50 | #         pg_hook.run(stocks_insert, parameters=(iso2,
51 | #                                                valid_until,
52 | #                                                price))
53 | #
54 | #
55 | # def cache_latest_stocks(ds, **kwargs):
56 | #     redis_conn = redis.StrictRedis(host='redis')
57 | #     pg_hook = postgres_hook(postgres_conn_id='stocks')
58 | #     latest_stocks = """SELECT DISTINCT ON (symbol)
59 | #                              symbol, price
60 | #                       FROM   stocks
61 | #                       ORDER  BY symbol, valid_until DESC;"""
62 | #
63 | #     for iso2, stock in pg_hook.get_records(latest_stocks):
64 | #         redis_conn.set(iso2, stock)
65 | #
66 | #
67 | # args = {
68 | #     'owner': 'ryan',
69 | #     'depends_on_past': False,
70 | #     'start_date': datetime.utcnow(),
71 | #     'retries': 1,
72 | #     'retry_delay': timedelta(minutes=5),
73 | # }
74 | #
75 | # # Run at the top of the hour Monday to Friday.
76 | # # Note: This doesn't line up with the market hours of
77 | # # 10PM Sunday till 10PM Friday GMT.
78 | # dag = DAG(dag_id='stocks',
79 | #           default_args=args,
80 | #           schedule_interval='0 * * * 1,2,3,4,5',
81 | #           dagrun_timeout=timedelta(seconds=30))
82 | #
83 | # # loop through the lob's we want to use to build up our dag
84 | # for stock in stocks:
85 | #     get_stocks_task = \
86 | #         PythonOperator(task_id='get_stocks',
87 | #                        provide_context=True,
88 | #                        op_kwargs={"stock": stock},
89 | #                        python_callable=get_stocks,
90 | #                        dag=dag)
91 | #
92 | #     cache_latest_stocks_task = \
93 | #         PythonOperator(task_id='cache_latest_stocks',
94 | #                        provide_context=True,
95 | #                        python_callable=cache_latest_stocks,
96 | #                        dag=dag)
97 | #
98 | #     get_stocks_task.set_downstream(cache_latest_stocks_task)
99 | 


--------------------------------------------------------------------------------
/07-airflow/docker-compose.yml:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one
  2 | # or more contributor license agreements.  See the NOTICE file
  3 | # distributed with this work for additional information
  4 | # regarding copyright ownership.  The ASF licenses this file
  5 | # to you under the Apache License, Version 2.0 (the
  6 | # "License"); you may not use this file except in compliance
  7 | # with the License.  You may obtain a copy of the License at
  8 | #
  9 | #   http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing,
 12 | # software distributed under the License is distributed on an
 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 14 | # KIND, either express or implied.  See the License for the
 15 | # specific language governing permissions and limitations
 16 | # under the License.
 17 | #
 18 | 
 19 | # Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL.
 20 | #
 21 | # WARNING: This configuration is for local development. Do not use it in a production deployment.
 22 | #
 23 | # This configuration supports basic configuration using environment variables or an .env file
 24 | # The following variables are supported:
 25 | #
 26 | # AIRFLOW_IMAGE_NAME         - Docker image name used to run Airflow.
 27 | #                              Default: apache/airflow:master-python3.8
 28 | # AIRFLOW_UID                - User ID in Airflow containers
 29 | #                              Default: 50000
 30 | # AIRFLOW_GID                - Group ID in Airflow containers
 31 | #                              Default: 50000
 32 | # _AIRFLOW_WWW_USER_USERNAME - Username for the administrator account.
 33 | #                              Default: airflow
 34 | # _AIRFLOW_WWW_USER_PASSWORD - Password for the administrator account.
 35 | #                              Default: airflow
 36 | #
 37 | # Feel free to modify this file to suit your needs.
 38 | ---
 39 | version: '3'
 40 | x-airflow-common:
 41 |   &airflow-common
 42 |   image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.0.2}
 43 |   environment:
 44 |     &airflow-common-env
 45 |     AIRFLOW__CORE__EXECUTOR: CeleryExecutor
 46 |     AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow
 47 |     AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow
 48 |     AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0
 49 |     AIRFLOW__CORE__FERNET_KEY: ''
 50 |     AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true'
 51 |     AIRFLOW__CORE__LOAD_EXAMPLES: 'true'
 52 |     AIRFLOW__API__AUTH_BACKEND: 'airflow.api.auth.backend.basic_auth'
 53 |   volumes:
 54 |     - ./dags:/opt/airflow/dags
 55 |     - ./logs:/opt/airflow/logs
 56 |     - ./plugins:/opt/airflow/plugins
 57 |   user: "${AIRFLOW_UID:-50000}:${AIRFLOW_GID:-50000}"
 58 |   depends_on:
 59 |     redis:
 60 |       condition: service_healthy
 61 |     postgres:
 62 |       condition: service_healthy
 63 |   before_script:
 64 |     - pip3 install yfinance pandas
 65 | 
 66 | services:
 67 |   postgres:
 68 |     image: postgres:13
 69 |     environment:
 70 |       POSTGRES_USER: airflow
 71 |       POSTGRES_PASSWORD: airflow
 72 |       POSTGRES_DB: airflow
 73 |     volumes:
 74 |       - postgres-db-volume:/var/lib/postgresql/data
 75 |     healthcheck:
 76 |       test: [ "CMD", "pg_isready", "-U", "airflow" ]
 77 |       interval: 5s
 78 |       retries: 5
 79 |     restart: always
 80 | 
 81 |   redis:
 82 |     image: redis:latest
 83 |     ports:
 84 |       - 6379:6379
 85 |     healthcheck:
 86 |       test: [ "CMD", "redis-cli", "ping" ]
 87 |       interval: 5s
 88 |       timeout: 30s
 89 |       retries: 50
 90 |     restart: always
 91 | 
 92 |   airflow-webserver:
 93 |     <<: *airflow-common
 94 |     command: webserver
 95 |     ports:
 96 |       - 8080:8080
 97 |     healthcheck:
 98 |       test: [ "CMD", "curl", "--fail", "http://localhost:8080/health" ]
 99 |       interval: 10s
100 |       timeout: 10s
101 |       retries: 5
102 |     restart: always
103 | 
104 |   airflow-scheduler:
105 |     <<: *airflow-common
106 |     command: scheduler
107 |     restart: always
108 | 
109 |   airflow-worker:
110 |     <<: *airflow-common
111 |     command: celery worker
112 |     restart: always
113 | 
114 |   airflow-init:
115 |     <<: *airflow-common
116 |     command: version
117 |     environment:
118 |       <<: *airflow-common-env
119 |       _AIRFLOW_DB_UPGRADE: 'true'
120 |       _AIRFLOW_WWW_USER_CREATE: 'true'
121 |       _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow}
122 |       _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow}
123 | 
124 |   flower:
125 |     <<: *airflow-common
126 |     command: celery flower
127 |     ports:
128 |       - 5555:5555
129 |     healthcheck:
130 |       test: [ "CMD", "curl", "--fail", "http://localhost:5555/" ]
131 |       interval: 10s
132 |       timeout: 10s
133 |       retries: 5
134 |     restart: always
135 | 
136 | volumes:
137 |   postgres-db-volume:
138 | 


--------------------------------------------------------------------------------
/AUTHORS.rst:
--------------------------------------------------------------------------------
 1 | =======
 2 | Credits
 3 | =======
 4 | 
 5 | Development Lead
 6 | ----------------
 7 | 
 8 | * Ryan S. McCoy <github@ryansmccoy.com>
 9 | 
10 | Contributors
11 | ------------
12 | 
13 | None yet. Why not be the first?
14 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
  1 | .. highlight:: shell
  2 | 
  3 | ============
  4 | Contributing
  5 | ============
  6 | 
  7 | Contributions are welcome, and they are greatly appreciated! Every little bit
  8 | helps, and credit will always be given.
  9 | 
 10 | You can contribute in many ways:
 11 | 
 12 | Types of Contributions
 13 | ----------------------
 14 | 
 15 | Report Bugs
 16 | ~~~~~~~~~~~
 17 | 
 18 | Report bugs at https://github.com/ryansmccoy/spreadsheets_to_dataframes/issues.
 19 | 
 20 | If you are reporting a bug, please include:
 21 | 
 22 | * Your operating system name and version.
 23 | * Any details about your local setup that might be helpful in troubleshooting.
 24 | * Detailed steps to reproduce the bug.
 25 | 
 26 | Fix Bugs
 27 | ~~~~~~~~
 28 | 
 29 | Look through the GitHub issues for bugs. Anything tagged with "bug" and "help
 30 | wanted" is open to whoever wants to implement it.
 31 | 
 32 | Implement Features
 33 | ~~~~~~~~~~~~~~~~~~
 34 | 
 35 | Look through the GitHub issues for features. Anything tagged with "enhancement"
 36 | and "help wanted" is open to whoever wants to implement it.
 37 | 
 38 | Write Documentation
 39 | ~~~~~~~~~~~~~~~~~~~
 40 | 
 41 | Spreadsheets to DataFrames could always use more documentation, whether as part of the
 42 | official Spreadsheets to DataFrames docs, in docstrings, or even on the web in blog posts,
 43 | articles, and such.
 44 | 
 45 | Submit Feedback
 46 | ~~~~~~~~~~~~~~~
 47 | 
 48 | The best way to send feedback is to file an issue at https://github.com/ryansmccoy/spreadsheets_to_dataframes/issues.
 49 | 
 50 | If you are proposing a feature:
 51 | 
 52 | * Explain in detail how it would work.
 53 | * Keep the scope as narrow as possible, to make it easier to implement.
 54 | * Remember that this is a volunteer-driven project, and that contributions
 55 |   are welcome :)
 56 | 
 57 | Get Started!
 58 | ------------
 59 | 
 60 | Ready to contribute? Here's how to set up `spreadsheets_to_dataframes` for local development.
 61 | 
 62 | 1. Fork the `spreadsheets_to_dataframes` repo on GitHub.
 63 | 2. Clone your fork locally::
 64 | 
 65 |     $ git clone git@github.com:your_name_here/spreadsheets_to_dataframes.git
 66 | 
 67 | 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development::
 68 | 
 69 |     $ mkvirtualenv spreadsheets_to_dataframes
 70 |     $ cd spreadsheets_to_dataframes/
 71 |     $ python setup.py develop
 72 | 
 73 | 4. Create a branch for local development::
 74 | 
 75 |     $ git checkout -b name-of-your-bugfix-or-feature
 76 | 
 77 |    Now you can make your changes locally.
 78 | 
 79 | 5. When you're done making changes, check that your changes pass flake8 and the
 80 |    tests, including testing other Python versions with tox::
 81 | 
 82 |     $ flake8 spreadsheets_to_dataframes tests
 83 |     $ python setup.py test or pytest
 84 |     $ tox
 85 | 
 86 |    To get flake8 and tox, just pip install them into your virtualenv.
 87 | 
 88 | 6. Commit your changes and push your branch to GitHub::
 89 | 
 90 |     $ git add .
 91 |     $ git commit -m "Your detailed description of your changes."
 92 |     $ git push origin name-of-your-bugfix-or-feature
 93 | 
 94 | 7. Submit a pull request through the GitHub website.
 95 | 
 96 | Pull Request Guidelines
 97 | -----------------------
 98 | 
 99 | Before you submit a pull request, check that it meets these guidelines:
100 | 
101 | 1. The pull request should include tests.
102 | 2. If the pull request adds functionality, the docs should be updated. Put
103 |    your new functionality into a function with a docstring, and add the
104 |    feature to the list in README.rst.
105 | 3. The pull request should work for Python 2.7, 3.5, 3.6 and 3.7, and for PyPy. Check
106 |    https://travis-ci.org/ryansmccoy/spreadsheets_to_dataframes/pull_requests
107 |    and make sure that the tests pass for all supported Python versions.
108 | 
109 | Tips
110 | ----
111 | 
112 | To run a subset of tests::
113 | 
114 | $ pytest tests.test_spreadsheets_to_dataframes
115 | 
116 | 
117 | Deploying
118 | ---------
119 | 
120 | A reminder for the maintainers on how to deploy.
121 | Make sure all your changes are committed (including an entry in HISTORY.rst).
122 | Then run::
123 | 
124 | $ bump2version patch # possible: major / minor / patch
125 | $ git push
126 | $ git push --tags
127 | 
128 | Travis will then deploy to PyPI if tests pass.
129 | 


--------------------------------------------------------------------------------
/HISTORY.rst:
--------------------------------------------------------------------------------
1 | =======
2 | History
3 | =======
4 | 
5 | 0.1.0 (2019-10-09)
6 | ------------------
7 | 
8 | * First release on PyPI.
9 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019, Ryan S. McCoy
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include AUTHORS.rst
 2 | include CONTRIBUTING.rst
 3 | include HISTORY.rst
 4 | include LICENSE
 5 | include README.rst
 6 | 
 7 | recursive-include tests *
 8 | recursive-exclude * __pycache__
 9 | recursive-exclude * *.py[co]
10 | 
11 | recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif
12 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: clean clean-test clean-pyc clean-build docs help
 2 | .DEFAULT_GOAL := help
 3 | 
 4 | define BROWSER_PYSCRIPT
 5 | import os, webbrowser, sys
 6 | 
 7 | try:
 8 | 	from urllib import pathname2url
 9 | except:
10 | 	from urllib.request import pathname2url
11 | 
12 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
13 | endef
14 | export BROWSER_PYSCRIPT
15 | 
16 | define PRINT_HELP_PYSCRIPT
17 | import re, sys
18 | 
19 | for line in sys.stdin:
20 | 	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
21 | 	if match:
22 | 		target, help = match.groups()
23 | 		print("%-20s %s" % (target, help))
24 | endef
25 | export PRINT_HELP_PYSCRIPT
26 | 
27 | BROWSER := python -c "$$BROWSER_PYSCRIPT"
28 | 
29 | help:
30 | 	@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
31 | 
32 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
33 | 
34 | clean-build: ## remove build artifacts
35 | 	rm -fr build/
36 | 	rm -fr dist/
37 | 	rm -fr .eggs/
38 | 	find . -name '*.egg-info' -exec rm -fr {} +
39 | 	find . -name '*.egg' -exec rm -f {} +
40 | 
41 | clean-pyc: ## remove Python file artifacts
42 | 	find . -name '*.pyc' -exec rm -f {} +
43 | 	find . -name '*.pyo' -exec rm -f {} +
44 | 	find . -name '*~' -exec rm -f {} +
45 | 	find . -name '__pycache__' -exec rm -fr {} +
46 | 
47 | clean-test: ## remove test and coverage artifacts
48 | 	rm -fr .tox/
49 | 	rm -f .coverage
50 | 	rm -fr htmlcov/
51 | 	rm -fr .pytest_cache
52 | 
53 | lint: ## check style with flake8
54 | 	flake8 spreadsheets_to_dataframes tests
55 | 
56 | test: ## run tests quickly with the default Python
57 | 	pytest
58 | 
59 | test-all: ## run tests on every Python version with tox
60 | 	tox
61 | 
62 | coverage: ## check code coverage quickly with the default Python
63 | 	coverage run --source spreadsheets_to_dataframes -m pytest
64 | 	coverage report -m
65 | 	coverage html
66 | 	$(BROWSER) htmlcov/index.html
67 | 
68 | docs: ## generate Sphinx HTML documentation, including API docs
69 | 	rm -f docs/spreadsheets_to_dataframes.rst
70 | 	rm -f docs/modules.rst
71 | 	sphinx-apidoc -o docs/ spreadsheets_to_dataframes
72 | 	$(MAKE) -C docs clean
73 | 	$(MAKE) -C docs html
74 | 	$(BROWSER) docs/_build/html/index.html
75 | 
76 | servedocs: docs ## compile the docs watching for changes
77 | 	watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
78 | 
79 | release: dist ## package and upload a release
80 | 	twine upload dist/*
81 | 
82 | dist: clean ## builds source and wheel package
83 | 	python setup.py sdist
84 | 	python setup.py bdist_wheel
85 | 	ls -l dist
86 | 
87 | install: clean ## install the package to the active Python's site-packages
88 | 	python setup.py install
89 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | =======================================================================================
  2 | From Spreadsheets to DataFrames: Escaping Excel Hell with Python
  3 | =======================================================================================
  4 | 
  5 | ==============================================================================================================================================================================
  6 | 
  7 | `Pycon 2021 Tutorial Video [YouTube] - May 12, 2021 <https://www.youtube.com/watch?v=llgTl9BDuKw>`_
  8 | 
  9 | 
 10 | Other Presentations:
 11 | 
 12 | `STL Python Presentation [YouTube] <https://www.meetup.com/STL-Python/events/265283397>`_
 13 | 
 14 | `Chicago Python Users Group [YouTube] <https://www.youtube.com/watch?v=CtN_EVqZ72s>`_
 15 | 
 16 | Details
 17 | 
 18 | A spreadsheet is a wonderful invention and an excellent tool for certain jobs. All too often, however, spreadsheets are called upon to perform tasks that are beyond their capabilities. It’s like the old saying, 'If the only tool you have is a hammer, every problem looks like a nail.' However, some problems are better addressed with a screwdriver, with glue, or with a Swiss Army Knife.
 19 | 
 20 | Python is described by some in the programming world as the Swiss Army Knife of programming languages because of its unrivaled versatility and flexibility in use. This allows its users to solve complex problems relatively easily compared with other programming languages and is one of the reasons why Python has become increasingly popular over time.
 21 | 
 22 | In this tutorial, we’ll briefly discuss spreadsheets, signs that you might be living in “Excel Hell”, and then we’ll spend the rest of the time learning how to escape it using Python.
 23 | 
 24 | In the first section, we’ll extend on what spreadsheet users already know about cells, rows, columns, and formulas, and map them to their Python equivalent, such as variables, lists, dictionaries, and functions. At the end of this section, we’ll do an interactive exercise and learn how we can perform a simple calculation, similar to one you might do in Excel, but instead using Python.
 25 | 
 26 | In the second section, we’ll discuss (and attempt) how we can perform more complex tasks including web scraping, data processing, analysis, and visualization, by utilizing a few popular 3rd party libraries used including Requests, Pandas, Flask, Matplotlib, and others.
 27 | 
 28 | In the last section, we’ll round out our discussion with a few important concepts in data management, including concept of tidy data, building a data pipeline, and a few strategies (and packages) to use when approaching various data problems, including demo using Apache Airflow.
 29 | 
 30 | Slides
 31 | ======================
 32 | 
 33 | `Intro [Slides] <https://gotemstl-my.sharepoint.com/:p:/g/personal/ryan_mccoystl_com/Ed80otUOcyZIjnb3_wexK4gBal7c5NmQzUYX2MBaJbbYXg?e=sxgRbz>`_
 34 | 
 35 | `Excel to Python [Slides] <https://gotemstl-my.sharepoint.com/:p:/g/personal/ryan_mccoystl_com/EfZc2NJYryhDsyaqFdSrN9UBNEqyTY9tUqd5b4c3sABprQ?e=TH17We>`_
 36 | 
 37 | `Python Libraries & Resources [Slides] <https://gotemstl-my.sharepoint.com/:p:/g/personal/ryan_mccoystl_com/EdXZeyVGz7VFvZu6zCbEfw8BNUYPhT6SDejGtfw8I1_z1Q?e=xeQTL6>`_
 38 | 
 39 | `Data Management [Slides] <https://gotemstl-my.sharepoint.com/:p:/g/personal/ryan_mccoystl_com/EX91EofZ7w9JunZvZ4wmZ8EBTWT5ztaRepBkooGdX0CohQ?e=q2B770>`_
 40 | 
 41 | Tutorial Code
 42 | ======================
 43 | 
 44 | Section 1 - Python Fundamentals for an Excel User
 45 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 46 | 
 47 | `01 basics_but_important_stuff.ipynb <https://github.com/ryansmccoy/spreadsheets-to-dataframes/blob/master/section1-01-basics_but_important_stuff.ipynb>`_
 48 | 
 49 | `02 files_lists_dictionaries.ipynb <https://github.com/ryansmccoy/spreadsheets-to-dataframes/blob/master/section1-02-files_lists_dictionaries.ipynb>`_
 50 | 
 51 | Section 1 - Challenges
 52 | ~~~~~~~~~~~~~~~~~~~~~~~
 53 | 
 54 | `challenge_1.py <https://github.com/ryansmccoy/spreadsheets-to-dataframes/blob/master/section1_challenge_1.py>`_
 55 | 
 56 | `challenge_1_answer.py <https://github.com/ryansmccoy/spreadsheets-to-dataframes/blob/master/section1_challenge_1_answer.py>`_
 57 | 
 58 | `challenge_2.py <https://github.com/ryansmccoy/spreadsheets-to-dataframes/blob/master/section1_challenge_2.py>`_
 59 | 
 60 | `challenge_2_answer.py <https://github.com/ryansmccoy/spreadsheets-to-dataframes/blob/master/section1_challenge_2_answer.py>`_
 61 | 
 62 | `challenge_3.py <https://github.com/ryansmccoy/spreadsheets-to-dataframes/blob/master/section1_challenge_3.py>`_
 63 | 
 64 | `challenge_3_answer.py <https://github.com/ryansmccoy/spreadsheets-to-dataframes/blob/master/section1_challenge_3_answer.py>`_
 65 | 
 66 | Section 2 - Real-World Python Example for an Excel User
 67 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 68 | 
 69 | `01-real-world-example.py <https://github.com/ryansmccoy/spreadsheets-to-dataframes/blob/master/section2-01-real-world-example.py>`_
 70 | 
 71 | `02-real-world-example-refactored.py <https://github.com/ryansmccoy/spreadsheets-to-dataframes/blob/master/section2-02-real-world-example-refactored.py>`_
 72 | 
 73 | Section 2 - Challenge
 74 | ~~~~~~~~~~~~~~~~~~~~~~~
 75 | 
 76 | `section2_challenge.rst <https://github.com/ryansmccoy/spreadsheets-to-dataframes/blob/master/section2_challenge.rst>`_
 77 | 
 78 | 
 79 | Section 3 - Best Practices in Python & Data for an Excel User
 80 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 81 | 
 82 | `Data Management [Slides] <https://gotemstl-my.sharepoint.com/:p:/g/personal/ryan_mccoystl_com/EX91EofZ7w9JunZvZ4wmZ8EBTWT5ztaRepBkooGdX0CohQ?e=q2B770>`_
 83 | 
 84 | `07-airflow <https://github.com/ryansmccoy/spreadsheets-to-dataframes/blob/master/07-airflow>`_
 85 | 
 86 | STL Python - Talk Code
 87 | ======================
 88 | 
 89 | `01-basics.ipynb <https://github.com/ryansmccoy/spreadsheets-to-dataframes/blob/master/01-basics.ipynb>`_
 90 | 
 91 | `02-webscraping.ipynb <https://github.com/ryansmccoy/spreadsheets-to-dataframes/blob/master/02-webscraping.ipynb>`_
 92 | 
 93 | `03-tidy-data.ipynb <https://github.com/ryansmccoy/spreadsheets-to-dataframes/blob/master/03-tidy-data.ipynb>`_
 94 | 
 95 | `04-pandas.ipynb <https://github.com/ryansmccoy/spreadsheets-to-dataframes/blob/master/04-pandas.ipynb>`_
 96 | 
 97 | `05-data-analysis.ipynb <https://github.com/ryansmccoy/spreadsheets-to-dataframes/blob/master/05-data-analysis.ipynb>`_
 98 | 
 99 | `06-data-visualizations.ipynb <https://github.com/ryansmccoy/spreadsheets-to-dataframes/blob/master/06-data-visualizations.ipynb>`_
100 | 
101 | STL Python - Folders
102 | ===================================================
103 | 
104 | * 01-basics - examples used in presentation
105 | * 02-webscraping - program that clicks through a calendar (written in javascript) and exports csv files
106 | * 02-selenium-safari - program that logins to website, scrapes html from javascript generated page, cleans html, and exports to pdf files
107 | * 02-webscrape-celery - example of that uses a message queue, and celery to download list of urls
108 | * 04-other-analysis - examples of different quantitative notebooks
109 | * 05-other-visualizations - examples of different data visualization tools
110 | * 06-flask - different flask examples
111 | * 07-airflow - example that uses airflow to download and store stock prices
112 | 
113 | Quick Start Guides
114 | ======================
115 | 
116 | 
117 | `Install Anaconda & Pycharm <https://medium.com/@GalarnykMichael/setting-up-pycharm-with-anaconda-plus-installing-packages-windows-mac-db2b158bd8c>`_
118 | 
119 | * Anaconda = manages your Python environments
120 | 
121 | * Pycharm = code editor
122 | 
123 | `Install Git <https://git-scm.com/book/en/v2/Getting-Started-Installing-Git>`_ - Allows you to git clone/download Github Projects'
124 | 
125 | Setup Environment & Run Example  (Windows):
126 | ==================================================
127 | 
128 | .. code-block:: bash
129 | 
130 |     $ git clone https://github.com/ryansmccoy/spreadsheets-to-dataframes.git
131 |     $ cd spreadsheets-to-dataframes
132 |     $ conda create -n spreadsheets-to-dataframes python=3.8 pandas scipy numpy lxml jupyter matplotlib -y
133 |     $ activate spreadsheets-to-dataframes
134 |     $ pip install -r requirements_dev.txt
135 | 
136 | Setup Environment & Run Example (Linux):
137 | ==================================================
138 | 
139 | .. code-block:: bash
140 | 
141 |     $ git clone https://github.com/ryansmccoy/spreadsheets-to-dataframes.git
142 |     $ cd spreadsheets-to-dataframes
143 |     $ conda create -n spreadsheets-to-dataframes python=3.8 pandas scipy numpy lxml jupyter matplotlib -y
144 |     $ source activate spreadsheets-to-dataframes
145 |     $ pip install -r requirements_dev.txt
146 | 
147 | Running Jupyter Notebooks:
148 | ==================================================
149 | 
150 | Navigate to spreadsheet-to-dataframe directory/folder:
151 | 
152 | .. code-block:: bash
153 | 
154 |     $ activate spreadsheets-to-dataframes
155 |     $ jupyter notebook
156 | 
157 | (Optional) Install Docker to Run Airflow Example
158 | ===================================================
159 | 
160 | https://airflow.apache.org/docs/apache-airflow/stable/start/docker.html
161 | 
162 | Python Books & Videos:
163 | ===================================================
164 | 
165 | `(Book) Python Crash Course, 2nd Edition <https://www.amazon.com/Python-Crash-Course-2nd-Edition/dp/1593279280>`_
166 | 
167 | `(Book) Introducing Python: Modern Computing in Simple Packages <https://www.amazon.com/Introducing-Python-Modern-Computing-Packages-ebook/dp/B0815R5543>`_
168 | 
169 | `(Book) Learning Python, 5th Edition <https://www.amazon.com/Learning-Python-5th-Mark-Lutz/dp/1449355730>`_
170 | 
171 | `(Book) Automate the Boring Stuff with Python, 2nd Edition: Practical Programming for Total Beginners <https://www.amazon.com/Automate-Boring-Stuff-Python-2nd/dp/1593279922>`_
172 | 
173 | `(Book) Think Python: How to Think Like a Computer Scientist <https://www.amazon.com/Think-Python-Like-Computer-Scientist-ebook/dp/B018UXJ9EQ>`_
174 | 
175 | `(Book) The Quick Python Book (Book) <https://www.amazon.com/Quick-Python-Book-Naomi-Ceder/dp/1617294039>`_
176 | 
177 | `(Book) Serious Python: Black-Belt Advice on Deployment, Scalability, Testing, and More  <https://www.amazon.com/Serious-Python-Black-Belt-Deployment-Scalability/dp/1593278780>`_
178 | 
179 | `(Github) A Whirlwind Tour of Python  <https://github.com/jakevdp/WhirlwindTourOfPython>`_
180 | 
181 | `(Github) Python Data Science Handbook  <https://github.com/jakevdp/PythonDataScienceHandbook>`_
182 | 
183 | `(Github) Introduction to Python <https://github.com/ipeirotis/introduction-to-python>`_
184 | 
185 | Cookiecutter:
186 | ===================================================
187 | 
188 | $ pip install cookiecutter
189 | 
190 | Resources:
191 | 
192 | https://github.com/cookiecutter/cookiecutter
193 | 
194 | https://github.com/audreyfeldroy/cookiecutter-pypackage
195 | 
196 | https://towardsdatascience.com/cookiecutter-creating-custom-reusable-project-templates-fc85c8627b07
197 | 
198 | Requests
199 | ===================================================
200 | 
201 | $ pip install requests
202 | 
203 | Resources:
204 | 
205 | https://python.readthedocs.io/en/stable/library/stdtypes.html
206 | 
207 | https://realpython.com/python-requests/
208 | 
209 | Have you mastered Requests? Then you should check out multithreading, concurrency, asyncio, message queues, parallelism.
210 | 
211 | https://yasoob.me/2019/05/29/speedingw-up-python-code-using-multithreading/
212 | 
213 | https://www.toptal.com/python/beginners-guide-to-concurrency-and-parallelism-in-python
214 | 
215 | https://creativedata.stream/multi-threading-api-requests-in-python/
216 | 
217 | https://levelup.gitconnected.com/asynchronous-tasks-in-python-with-celery-rabbitmq-redis-480f6e506d76
218 | 
219 | https://tests4geeks.com/blog/python-celery-rabbitmq-tutorial/
220 | 
221 | https://codeburst.io/automated-web-scraping-with-python-and-celery-ac02a4a9ce51
222 | 
223 | https://github.com/ryansmccoy/zmq-high-speed-subs
224 | 
225 | 
226 | Pandas
227 | ===================================================
228 | 
229 | $ pip install pandas
230 | 
231 | Resources:
232 | 
233 | `Dealing With Data <https://github.com/ipeirotis/dealing_with_data>`_
234 | 
235 | `Pandas Cookbook <https://github.com/jvns/pandas-cookbook>`_
236 | 
237 | `brandon-rhodes\pycon-pandas-tutorial <https://github.com/brandon-rhodes/pycon-pandas-tutorial>`_
238 | 
239 | `Python pandas Q&A video series <https://github.com/justmarkham/pandas-videos>`_
240 | 
241 | `Master Data Analysis with Python <https://github.com/tdpetrou/Learn-Pandas>`_
242 | 
243 | Have you mastered Pandas?  Then you check out Dask and Spark.
244 | 
245 | https://dask.org/
246 | 
247 | https://spark.apache.org/docs/latest/api/python/
248 | 
249 | Visualization:
250 | ===================================================
251 | 
252 | $ pip install matplotlib
253 | 
254 | Resources:
255 | 
256 | https://github.com/fasouto/awesome-dataviz
257 | 
258 | https://pandas.pydata.org/pandas-docs/stable/user_guide/visualization.html
259 | 
260 | https://www.toptal.com/designers/data-visualization/data-visualization-tools
261 | 
262 | https://realpython.com/pandas-plot-python/
263 | 
264 | Have you mastered Matplotlilb?  Then you should checkout Javascript, D3, React, Tableau
265 | 
266 | Flask:
267 | ===================================================
268 | 
269 | $ pip install flask
270 | 
271 | Resources:
272 | 
273 | https://www.fullstackpython.com/flask.html
274 | 
275 | https://blog.miguelgrinberg.com/
276 | 
277 | Have you mastered Flask?  Then you should checkout FastAPI, Javascript, Node, React
278 | 


--------------------------------------------------------------------------------
/data/WA_Fn-UseC_-HR-Employee-Attrition.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/data/WA_Fn-UseC_-HR-Employee-Attrition.xlsx


--------------------------------------------------------------------------------
/data/WMT_US.csv:
--------------------------------------------------------------------------------
 1 | ﻿Ticker,Company Name,Year End,Total Sales,Total Expenses
 2 | WMT US,WAL-MART STORES INC,12/31/2014,476293988352,460271988736
 3 | WMT US,WAL-MART STORES INC,12/31/2013,469162000384,452163000320
 4 | WMT US,WAL-MART STORES INC,12/31/2012,446950014976,431251014656
 5 | WMT US,WAL-MART STORES INC,12/31/2011,421849006080,405460005888
 6 | WMT US,WAL-MART STORES INC,12/31/2010,408214011904,393879012352
 7 | WMT US,WAL-MART STORES INC,12/31/2009,405606989824,392206989312
 8 | WMT US,WAL-MART STORES INC,12/31/2008,378798997504,366067997696
 9 | WMT US,WAL-MART STORES INC,12/31/2007,348650012672,337366012928
10 | WMT US,WAL-MART STORES INC,12/31/2006,312426987520,301195987968
11 | WMT US,WAL-MART STORES INC,12/31/2005,287989006336,277722006528
12 | WMT US,WAL-MART STORES INC,12/31/2004,256329007104,247275006976
13 | WMT US,WAL-MART STORES INC,12/31/2003,229615992832,221660993024
14 | 


--------------------------------------------------------------------------------
/data/WMT_US_pandas.csv:
--------------------------------------------------------------------------------
 1 | ,Ticker,Company Name,Year End,Total Sales,Total Expenses,Total Profit
 2 | 0,WMT US,WAL-MART STORES INC,12/31/2014,476293988352,460271988736,16021999616
 3 | 1,WMT US,WAL-MART STORES INC,12/31/2013,469162000384,452163000320,16999000064
 4 | 2,WMT US,WAL-MART STORES INC,12/31/2012,446950014976,431251014656,15699000320
 5 | 3,WMT US,WAL-MART STORES INC,12/31/2011,421849006080,405460005888,16389000192
 6 | 4,WMT US,WAL-MART STORES INC,12/31/2010,408214011904,393879012352,14334999552
 7 | 5,WMT US,WAL-MART STORES INC,12/31/2009,405606989824,392206989312,13400000512
 8 | 6,WMT US,WAL-MART STORES INC,12/31/2008,378798997504,366067997696,12730999808
 9 | 7,WMT US,WAL-MART STORES INC,12/31/2007,348650012672,337366012928,11283999744
10 | 8,WMT US,WAL-MART STORES INC,12/31/2006,312426987520,301195987968,11230999552
11 | 9,WMT US,WAL-MART STORES INC,12/31/2005,287989006336,277722006528,10266999808
12 | 10,WMT US,WAL-MART STORES INC,12/31/2004,256329007104,247275006976,9054000128
13 | 11,WMT US,WAL-MART STORES INC,12/31/2003,229615992832,221660993024,7954999808
14 | 


--------------------------------------------------------------------------------
/data/WMT_US_updated.csv:
--------------------------------------------------------------------------------
 1 | ,ticker,name,date,sales,expenses,profit
 2 | 0,WMT US,WAL-MART STORES INC,2014-12-31,476293988352,460271988736,16021999616
 3 | 1,WMT US,WAL-MART STORES INC,2013-12-31,469162000384,452163000320,16999000064
 4 | 2,WMT US,WAL-MART STORES INC,2012-12-31,446950014976,431251014656,15699000320
 5 | 3,WMT US,WAL-MART STORES INC,2011-12-31,421849006080,405460005888,16389000192
 6 | 4,WMT US,WAL-MART STORES INC,2010-12-31,408214011904,393879012352,14334999552
 7 | 5,WMT US,WAL-MART STORES INC,2009-12-31,405606989824,392206989312,13400000512
 8 | 6,WMT US,WAL-MART STORES INC,2008-12-31,378798997504,366067997696,12730999808
 9 | 7,WMT US,WAL-MART STORES INC,2007-12-31,348650012672,337366012928,11283999744
10 | 8,WMT US,WAL-MART STORES INC,2006-12-31,312426987520,301195987968,11230999552
11 | 9,WMT US,WAL-MART STORES INC,2005-12-31,287989006336,277722006528,10266999808
12 | 10,WMT US,WAL-MART STORES INC,2004-12-31,256329007104,247275006976,9054000128
13 | 11,WMT US,WAL-MART STORES INC,2003-12-31,229615992832,221660993024,7954999808
14 | 


--------------------------------------------------------------------------------
/data/country_timeseries.csv:
--------------------------------------------------------------------------------
  1 | Date,Day,Cases_Guinea,Cases_Liberia,Cases_SierraLeone,Cases_Nigeria,Cases_Senegal,Cases_UnitedStates,Cases_Spain,Cases_Mali,Deaths_Guinea,Deaths_Liberia,Deaths_SierraLeone,Deaths_Nigeria,Deaths_Senegal,Deaths_UnitedStates,Deaths_Spain,Deaths_Mali
  2 | 1/5/2015,289,2776,,10030,,,,,,1786,,2977,,,,,
  3 | 1/4/2015,288,2775,,9780,,,,,,1781,,2943,,,,,
  4 | 1/3/2015,287,2769,8166,9722,,,,,,1767,3496,2915,,,,,
  5 | 1/2/2015,286,,8157,,,,,,,,3496,,,,,,
  6 | 12/31/2014,284,2730,8115,9633,,,,,,1739,3471,2827,,,,,
  7 | 12/28/2014,281,2706,8018,9446,,,,,,1708,3423,2758,,,,,
  8 | 12/27/2014,280,2695,,9409,,,,,,1697,,2732,,,,,
  9 | 12/24/2014,277,2630,7977,9203,,,,,,,3413,2655,,,,,
 10 | 12/21/2014,273,2597,,9004,,,,,,1607,,2582,,,,,
 11 | 12/20/2014,272,2571,7862,8939,,,,,,1586,3384,2556,,,,,
 12 | 12/18/2014,271,,7830,,,,,,,,3376,,,,,,
 13 | 12/14/2014,267,2416,,8356,,,,,,1525,,2085,,,,,
 14 | 12/9/2014,262,,7797,,,,,,,,3290,,,,,,
 15 | 12/7/2014,260,2292,,7897,20,1,4,1,7,1428,,1768,8,0,1,0,6
 16 | 12/3/2014,256,,7719,,,,,,,,3177,,,,,,
 17 | 11/30/2014,253,2164,,7312,20,1,4,1,7,1327,,1583,8,0,1,0,6
 18 | 11/28/2014,251,,7635,,,,,,,,3145,,,,,,
 19 | 11/23/2014,246,2134,,6599,20,1,4,1,7,1260,,1398,8,0,1,0,6
 20 | 11/22/2014,245,,7168,,,,,,,,3016,,,,,,
 21 | 11/18/2014,241,2047,7082,6190,20,1,4,1,6,1214,2963,1267,8,0,1,0,6
 22 | 11/16/2014,239,1971,,6073,20,1,4,1,5,1192,,1250,8,0,1,0,5
 23 | 11/15/2014,238,,7069,,,,,,,,2964,,,,,,
 24 | 11/11/2014,234,1919,,5586,20,1,4,1,4,1166,,1187,8,0,1,0,3
 25 | 11/10/2014,233,,6878,,,,,,,,2812,,,,,,
 26 | 11/9/2014,232,1878,,5368,20,1,4,1,1,1142,,1169,8,0,1,0,1
 27 | 11/8/2014,231,,6822,,,,,,,,2836,,,,,,
 28 | 11/4/2014,227,,6619,4862,20,1,4,1,1,,2766,1130,8,0,1,0,1
 29 | 11/3/2014,226,1760,,,,,,,,1054,,,,,,,
 30 | 11/2/2014,225,1731,,4759,20,1,4,1,1,1041,,1070,8,0,1,0,1
 31 | 10/31/2014,222,,6525,,,,,,,,2697,,,,,,
 32 | 10/29/2014,220,1667,,5338,20,1,4,1,1,1018,,1510,8,0,1,0,1
 33 | 10/27/2014,218,1906,,5235,20,1,4,1,1,997,,1500,8,0,1,0,1
 34 | 10/25/2014,216,,6535,,,,,,,,2413,,,,,,
 35 | 10/22/2014,214,,,3896,,,4,1,1,,,1281,,,1,0,1
 36 | 10/21/2014,213,1553,,,,,,,,926,,,,,,,
 37 | 10/19/2014,211,1540,,3706,20,1,3,1,,904,,1259,8,0,1,0,
 38 | 10/18/2014,210,,4665,,,,,,,,2705,,,,,,
 39 | 10/14/2014,206,1519,,3410,20,1,3,1,,862,,1200,8,0,0,1,
 40 | 10/13/2014,205,,4262,,,,,,,,2484,,,,,,
 41 | 10/12/2014,204,1472,,3252,20,1,2,1,,843,,1183,8,0,1,1,
 42 | 10/11/2014,203,,4249,,,,,,,,2458,,,,,,
 43 | 10/8/2014,200,,,2950,20,1,1,1,,,,930,8,0,1,1,
 44 | 10/7/2014,199,1350,4076,,,,,,,778,2316,,,,,,
 45 | 10/5/2014,197,1298,,2789,20,1,1,,,768,,879,8,0,0,,
 46 | 10/4/2014,196,,3924,,,,,,,,2210,,,,,,
 47 | 10/1/2014,193,1199,3834,2437,20,1,1,,,739,2069,623,8,0,0,,
 48 | 9/28/2014,190,1157,3696,2304,20,1,,,,710,1998,622,8,0,,,
 49 | 9/23/2014,185,1074,3458,2021,20,1,,,,648,1830,605,8,0,,,
 50 | 9/21/2014,183,1022,3280,1940,20,1,,,,635,1677,597,8,0,,,
 51 | 9/20/2014,182,,,1813,,,,,,,,593,,,,,
 52 | 9/19/2014,181,1008,,,,,,,,632,,,,,,,
 53 | 9/17/2014,179,,3022,,,,,,,,1578,,,,,,
 54 | 9/14/2014,176,942,2710,1673,,,,,,601,1459,562,,,,,
 55 | 9/13/2014,175,936,,1620,21,1,,,,595,1296,562,8,0,,,
 56 | 9/10/2014,172,899,,1478,21,1,,,,568,,536,8,,,,
 57 | 9/9/2014,171,,2407,,,,,,,,,,,,,,
 58 | 9/7/2014,169,861,2081,1424,21,3,,,,557,1137,524,8,0,,,
 59 | 9/5/2014,167,812,1871,1261,22,1,,,,517,1089,491,8,,,,
 60 | 8/31/2014,162,771,1698,1216,21,1,,,,494,871,476,7,,,,
 61 | 8/26/2014,157,648,1378,1026,17,,,,,430,694,422,6,,,,
 62 | 8/20/2014,151,607,1082,910,16,,,,,406,624,392,5,,,,
 63 | 8/18/2014,149,579,972,907,15,,,,,396,576,374,4,,,,
 64 | 8/16/2014,147,543,834,848,15,,,,,394,466,365,4,,,,
 65 | 8/13/2014,144,519,786,810,12,,,,,380,413,348,4,,,,
 66 | 8/11/2014,142,510,670,783,12,,,,,377,355,334,3,,,,
 67 | 8/9/2014,140,506,599,730,13,,,,,373,323,315,2,,,,
 68 | 8/6/2014,137,495,554,717,13,,,,,367,294,298,2,,,,
 69 | 8/4/2014,135,495,516,691,9,,,,,363,282,286,1,,,,
 70 | 8/1/2014,132,485,468,646,4,,,,,358,255,273,1,,,,
 71 | 7/30/2014,129,472,391,574,3,,,,,346,227,252,1,,,,
 72 | 7/27/2014,126,460,329,533,1,,,,,339,156,233,1,,,,
 73 | 7/23/2014,123,427,249,525,0,,,,,319,129,224,0,,,,
 74 | 7/20/2014,120,415,224,454,,,,,,314,127,219,,,,,
 75 | 7/17/2014,117,410,196,442,,,,,,310,116,206,,,,,
 76 | 7/14/2014,114,411,174,397,,,,,,310,106,197,,,,,
 77 | 7/12/2014,112,406,172,386,,,,,,304,105,194,,,,,
 78 | 7/8/2014,108,409,142,337,,,,,,309,88,142,,,,,
 79 | 7/6/2014,106,408,131,305,,,,,,307,84,127,,,,,
 80 | 7/2/2014,102,412,115,252,,,,,,305,75,101,,,,,
 81 | 6/30/2014,100,413,107,239,,,,,,303,65,99,,,,,
 82 | 6/22/2014,92,,51,,,,,,,,34,,,,,,
 83 | 6/20/2014,90,390,,158,,,,,,270,,34,,,,,
 84 | 6/19/2014,89,,41,,,,,,,,25,,,,,,
 85 | 6/18/2014,88,390,,136,,,,,,267,,28,,,,,
 86 | 6/17/2014,87,,,97,,,,,,,,49,,,,,
 87 | 6/16/2014,86,398,33,,,,,,,264,24,,,,,,
 88 | 6/10/2014,80,351,13,89,,,,,,226,24,7,,,,,
 89 | 6/5/2014,75,,13,81,,,,,,,,6,,,,,
 90 | 6/3/2014,73,344,13,,,,,,,215,12,6,,,,,
 91 | 6/1/2014,71,328,13,79,,,,,,208,12,6,,,,,
 92 | 5/28/2014,67,291,13,50,,,,,,193,12,6,,,,,
 93 | 5/27/2014,66,281,12,16,,,,,,186,11,5,,,,,
 94 | 5/23/2014,62,258,12,0,,,,,,174,11,0,,,,,
 95 | 5/12/2014,51,248,12,0,,,,,,171,11,0,,,,,
 96 | 5/10/2014,49,233,12,0,,,,,,157,11,0,,,,,
 97 | 5/7/2014,46,236,13,0,,,,,,158,11,0,,,,,
 98 | 5/5/2014,44,235,13,0,,,,,,157,11,0,,,,,
 99 | 5/3/2014,42,231,13,0,,,,,,155,11,0,,,,,
100 | 5/1/2014,40,226,13,0,,,,,,149,11,0,,,,,
101 | 4/26/2014,35,224,,0,,,,,,143,,0,,,,,
102 | 4/24/2014,33,,35,0,,,,,,,,0,,,,,
103 | 4/23/2014,32,218,,0,,,,,,141,,0,,,,,
104 | 4/22/2014,31,,,0,,,,,,,,0,,,,,
105 | 4/21/2014,30,,34,,,,,,,,11,,,,,,
106 | 4/20/2014,29,208,,,,,,,,136,6,,,,,,
107 | 4/17/2014,26,203,27,,,,,,,129,,,,,,,
108 | 4/16/2014,25,197,27,,,,,,,122,13,,,,,,
109 | 4/15/2014,24,,,12,,,,,,,,,,,,,
110 | 4/14/2014,23,168,,,,,,,,108,,,,,,,
111 | 4/11/2014,20,159,26,2,,,,,,106,13,2,,,,,
112 | 4/9/2014,18,158,25,2,,,,,,101,12,2,,,,,
113 | 4/7/2014,16,151,21,2,,,,,,95,10,2,,,,,
114 | 4/4/2014,13,143,18,2,,,,,,86,7,2,,,,,
115 | 4/1/2014,10,127,8,2,,,,,,83,5,2,,,,,
116 | 3/31/2014,9,122,8,2,,,,,,80,4,2,,,,,
117 | 3/29/2014,7,112,7,,,,,,,70,2,,,,,,
118 | 3/28/2014,6,112,3,2,,,,,,70,3,2,,,,,
119 | 3/27/2014,5,103,8,6,,,,,,66,6,5,,,,,
120 | 3/26/2014,4,86,,,,,,,,62,,,,,,,
121 | 3/25/2014,3,86,,,,,,,,60,,,,,,,
122 | 3/24/2014,2,86,,,,,,,,59,,,,,,,
123 | 3/22/2014,0,49,,,,,,,,29,,,,,,,


--------------------------------------------------------------------------------
/data/fortune_1000.csv:
--------------------------------------------------------------------------------
 1 | rank,name,industry,location,employees,revenues_millions
 2 | 1,Walmart,General Merchandisers,"Bentonville, AR","2,200,000","$523,964 "
 3 | 2,Amazon,Internet Services and Retailing,"Seattle, WA","798,000","$280,522 "
 4 | 3,Exxon Mobil,Petroleum Refining,"Irving, TX","74,900","$264,938 "
 5 | 4,Apple,"Computers, Office Equipment","Cupertino, CA","137,000","$260,174 "
 6 | 5,CVS Health,Food and Drug Stores,"Woonsocket, RI","290,000","$256,776 "
 7 | 6,Berkshire Hathaway,Insurance: Property and Casualty (Stock),"Omaha, NE","391,500","$254,616 "
 8 | 7,UnitedHealth Group,Health Care: Insurance and Managed Care,"Minnetonka, MN","325,000","$242,155 "
 9 | 8,McKesson,Wholesalers: Health Care,"San Francisco, CA","70,000","$214,319 "
10 | 9,AT&T,Telecommunications,"Dallas, TX","247,800","$181,193 "
11 | 10,AmerisourceBergen,Wholesalers: Health Care,"Chesterbrook, PA","21,500","$179,589 "
12 | 12,Ford Motor,Motor Vehicles and Parts,"Dearborn, MI","190,000","$155,900 "
13 | 13,Cigna,Health Care: Insurance and Managed Care,"Bloomfield, CT","73,700","$153,566 "
14 | 14,Costco Wholesale,General Merchandisers,"Issaquah, WA","201,500","$152,703 "
15 | 15,Chevron,Petroleum Refining,"San Ramon, CA","48,200","$146,516 "
16 | 16,Cardinal Health,Wholesalers: Health Care,"Dublin, OH","49,500","$145,534 "
17 | 17,JPMorgan Chase,Commercial Banks,"New York, NY","256,981","$142,422 "
18 | 18,General Motors,Motor Vehicles and Parts,"Detroit, MI","164,000","$137,237 "
19 | 19,Walgreens Boots Alliance,Food and Drug Stores,"Deerfield, IL","287,000","$136,866 "
20 | 20,Verizon Communications,Telecommunications,"New York, NY","135,000","$131,868 "
21 | 21,Microsoft,Computer Software,"Redmond, WA","144,000","$125,843 "
22 | 22,Marathon Petroleum,Petroleum Refining,"Findlay, OH","60,910","$124,813 "
23 | 23,Kroger,Food and Drug Stores,"Cincinnati, OH","435,000","$122,286 "
24 | 24,Fannie Mae,Diversified Financials,"Washington, DC","7,500","$120,304 "
25 | 25,Bank of America,Commercial Banks,"Charlotte, NC","208,131","$113,589 "
26 | 26,Home Depot,Specialty Retailers: Other,"Atlanta, GA","415,700","$110,225 "
27 | 27,Phillips 66,Petroleum Refining,"Houston, TX","14,500","$109,559 "
28 | 28,Comcast NBCUniversal,Telecommunications,"Philadelphia, PA","190,000","$108,942 "
29 | 29,Anthem,Health Care: Insurance and Managed Care,"Indianapolis, IN","70,600","$104,213 "
30 | 30,Wells Fargo,Commercial Banks,"San Francisco, CA","259,800","$103,915 "
31 | 


--------------------------------------------------------------------------------
/data/linkedin_industries.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <title>Title</title>
  6 | </head>
  7 | <body>
  8 | <table>
  9 |     <tbody>
 10 |     <tr>
 11 |         <td>Code</td>
 12 |         <td>Groups</td>
 13 |         <td>Description<br></td>
 14 |     </tr>
 15 |     <tr>
 16 |         <td>47</td>
 17 |         <td>corp, fin</td>
 18 |         <td>Accounting</td>
 19 |     </tr>
 20 |     <tr>
 21 |         <td>94</td>
 22 |         <td>man, tech, tran</td>
 23 |         <td>Airlines/Aviation</td>
 24 |     </tr>
 25 |     <tr>
 26 |         <td>120</td>
 27 |         <td>leg, org</td>
 28 |         <td>Alternative Dispute Resolution</td>
 29 |     </tr>
 30 |     <tr>
 31 |         <td>125</td>
 32 |         <td>hlth</td>
 33 |         <td>Alternative Medicine</td>
 34 |     </tr>
 35 |     <tr>
 36 |         <td>127</td>
 37 |         <td>art, med</td>
 38 |         <td>Animation</td>
 39 |     </tr>
 40 |     <tr>
 41 |         <td>19</td>
 42 |         <td>good</td>
 43 |         <td>Apparel &amp; Fashion</td>
 44 |     </tr>
 45 |     <tr>
 46 |         <td>50</td>
 47 |         <td>cons</td>
 48 |         <td>Architecture &amp; Planning</td>
 49 |     </tr>
 50 |     <tr>
 51 |         <td>111</td>
 52 |         <td>art, med, rec</td>
 53 |         <td>Arts and Crafts</td>
 54 |     </tr>
 55 |     <tr>
 56 |         <td>53</td>
 57 |         <td>man</td>
 58 |         <td>Automotive</td>
 59 |     </tr>
 60 |     <tr>
 61 |         <td>52</td>
 62 |         <td>gov, man</td>
 63 |         <td>Aviation &amp; Aerospace</td>
 64 |     </tr>
 65 |     <tr>
 66 |         <td>41</td>
 67 |         <td>fin</td>
 68 |         <td>Banking</td>
 69 |     </tr>
 70 |     <tr>
 71 |         <td>12</td>
 72 |         <td>gov, hlth, tech</td>
 73 |         <td>Biotechnology</td>
 74 |     </tr>
 75 |     <tr>
 76 |         <td>36</td>
 77 |         <td>med, rec</td>
 78 |         <td>Broadcast Media</td>
 79 |     </tr>
 80 |     <tr>
 81 |         <td>49</td>
 82 |         <td>cons</td>
 83 |         <td>Building Materials</td>
 84 |     </tr>
 85 |     <tr>
 86 |         <td>138</td>
 87 |         <td>corp, man</td>
 88 |         <td>Business Supplies and Equipment</td>
 89 |     </tr>
 90 |     <tr>
 91 |         <td>129</td>
 92 |         <td>fin</td>
 93 |         <td>Capital Markets</td>
 94 |     </tr>
 95 |     <tr>
 96 |         <td>54</td>
 97 |         <td>man</td>
 98 |         <td>Chemicals</td>
 99 |     </tr>
100 |     <tr>
101 |         <td>90</td>
102 |         <td>org, serv</td>
103 |         <td>Civic &amp; Social Organization</td>
104 |     </tr>
105 |     <tr>
106 |         <td>51</td>
107 |         <td>cons, gov</td>
108 |         <td>Civil Engineering</td>
109 |     </tr>
110 |     <tr>
111 |         <td>128</td>
112 |         <td>cons, corp, fin</td>
113 |         <td>Commercial Real Estate</td>
114 |     </tr>
115 |     <tr>
116 |         <td>118</td>
117 |         <td>tech</td>
118 |         <td>Computer &amp; Network Security</td>
119 |     </tr>
120 |     <tr>
121 |         <td>109</td>
122 |         <td>med, rec</td>
123 |         <td>Computer Games</td>
124 |     </tr>
125 |     <tr>
126 |         <td>3</td>
127 |         <td>tech</td>
128 |         <td>Computer Hardware</td>
129 |     </tr>
130 |     <tr>
131 |         <td>5</td>
132 |         <td>tech</td>
133 |         <td>Computer Networking</td>
134 |     </tr>
135 |     <tr>
136 |         <td>4</td>
137 |         <td>tech</td>
138 |         <td>Computer Software</td>
139 |     </tr>
140 |     <tr>
141 |         <td>48</td>
142 |         <td>cons</td>
143 |         <td>Construction</td>
144 |     </tr>
145 |     <tr>
146 |         <td>24</td>
147 |         <td>good, man</td>
148 |         <td>Consumer Electronics</td>
149 |     </tr>
150 |     <tr>
151 |         <td>25</td>
152 |         <td>good, man</td>
153 |         <td>Consumer Goods</td>
154 |     </tr>
155 |     <tr>
156 |         <td>91</td>
157 |         <td>org, serv</td>
158 |         <td>Consumer Services</td>
159 |     </tr>
160 |     <tr>
161 |         <td>18</td>
162 |         <td>good</td>
163 |         <td>Cosmetics</td>
164 |     </tr>
165 |     <tr>
166 |         <td>65</td>
167 |         <td>agr</td>
168 |         <td>Dairy</td>
169 |     </tr>
170 |     <tr>
171 |         <td>1</td>
172 |         <td>gov, tech</td>
173 |         <td>Defense &amp; Space</td>
174 |     </tr>
175 |     <tr>
176 |         <td>99</td>
177 |         <td>art, med</td>
178 |         <td>Design</td>
179 |     </tr>
180 |     <tr>
181 |         <td>69</td>
182 |         <td>edu</td>
183 |         <td>Education Management</td>
184 |     </tr>
185 |     <tr>
186 |         <td>132</td>
187 |         <td>edu, org</td>
188 |         <td>E-Learning</td>
189 |     </tr>
190 |     <tr>
191 |         <td>112</td>
192 |         <td>good, man</td>
193 |         <td>Electrical/Electronic Manufacturing</td>
194 |     </tr>
195 |     <tr>
196 |         <td>28</td>
197 |         <td>med, rec</td>
198 |         <td>Entertainment</td>
199 |     </tr>
200 |     <tr>
201 |         <td>86</td>
202 |         <td>org, serv</td>
203 |         <td>Environmental Services</td>
204 |     </tr>
205 |     <tr>
206 |         <td>110</td>
207 |         <td>corp, rec, serv</td>
208 |         <td>Events Services</td>
209 |     </tr>
210 |     <tr>
211 |         <td>76</td>
212 |         <td>gov</td>
213 |         <td>Executive Office</td>
214 |     </tr>
215 |     <tr>
216 |         <td>122</td>
217 |         <td>corp, serv</td>
218 |         <td>Facilities Services</td>
219 |     </tr>
220 |     <tr>
221 |         <td>63</td>
222 |         <td>agr</td>
223 |         <td>Farming</td>
224 |     </tr>
225 |     <tr>
226 |         <td>43</td>
227 |         <td>fin</td>
228 |         <td>Financial Services</td>
229 |     </tr>
230 |     <tr>
231 |         <td>38</td>
232 |         <td>art, med, rec</td>
233 |         <td>Fine Art</td>
234 |     </tr>
235 |     <tr>
236 |         <td>66</td>
237 |         <td>agr</td>
238 |         <td>Fishery</td>
239 |     </tr>
240 |     <tr>
241 |         <td>34</td>
242 |         <td>rec, serv</td>
243 |         <td>Food &amp; Beverages</td>
244 |     </tr>
245 |     <tr>
246 |         <td>23</td>
247 |         <td>good, man, serv</td>
248 |         <td>Food Production</td>
249 |     </tr>
250 |     <tr>
251 |         <td>101</td>
252 |         <td>org</td>
253 |         <td>Fund-Raising</td>
254 |     </tr>
255 |     <tr>
256 |         <td>26</td>
257 |         <td>good, man</td>
258 |         <td>Furniture</td>
259 |     </tr>
260 |     <tr>
261 |         <td>29</td>
262 |         <td>rec</td>
263 |         <td>Gambling &amp; Casinos</td>
264 |     </tr>
265 |     <tr>
266 |         <td>145</td>
267 |         <td>cons, man</td>
268 |         <td>Glass, Ceramics &amp; Concrete</td>
269 |     </tr>
270 |     <tr>
271 |         <td>75</td>
272 |         <td>gov</td>
273 |         <td>Government Administration</td>
274 |     </tr>
275 |     <tr>
276 |         <td>148</td>
277 |         <td>gov</td>
278 |         <td>Government Relations</td>
279 |     </tr>
280 |     <tr>
281 |         <td>140</td>
282 |         <td>art, med</td>
283 |         <td>Graphic Design</td>
284 |     </tr>
285 |     <tr>
286 |         <td>124</td>
287 |         <td>hlth, rec</td>
288 |         <td>Health, Wellness and Fitness</td>
289 |     </tr>
290 |     <tr>
291 |         <td>68</td>
292 |         <td>edu</td>
293 |         <td>Higher Education</td>
294 |     </tr>
295 |     <tr>
296 |         <td>14</td>
297 |         <td>hlth</td>
298 |         <td>Hospital &amp; Health Care</td>
299 |     </tr>
300 |     <tr>
301 |         <td>31</td>
302 |         <td>rec, serv, tran</td>
303 |         <td>Hospitality</td>
304 |     </tr>
305 |     <tr>
306 |         <td>137</td>
307 |         <td>corp</td>
308 |         <td>Human Resources</td>
309 |     </tr>
310 |     <tr>
311 |         <td>134</td>
312 |         <td>corp, good, tran</td>
313 |         <td>Import and Export</td>
314 |     </tr>
315 |     <tr>
316 |         <td>88</td>
317 |         <td>org, serv</td>
318 |         <td>Individual &amp; Family Services</td>
319 |     </tr>
320 |     <tr>
321 |         <td>147</td>
322 |         <td>cons, man</td>
323 |         <td>Industrial Automation</td>
324 |     </tr>
325 |     <tr>
326 |         <td>84</td>
327 |         <td>med, serv</td>
328 |         <td>Information Services</td>
329 |     </tr>
330 |     <tr>
331 |         <td>96</td>
332 |         <td>tech</td>
333 |         <td>Information Technology and Services</td>
334 |     </tr>
335 |     <tr>
336 |         <td>42</td>
337 |         <td>fin</td>
338 |         <td>Insurance</td>
339 |     </tr>
340 |     <tr>
341 |         <td>74</td>
342 |         <td>gov</td>
343 |         <td>International Affairs</td>
344 |     </tr>
345 |     <tr>
346 |         <td>141</td>
347 |         <td>gov, org, tran</td>
348 |         <td>International Trade and Development</td>
349 |     </tr>
350 |     <tr>
351 |         <td>6</td>
352 |         <td>tech</td>
353 |         <td>Internet</td>
354 |     </tr>
355 |     <tr>
356 |         <td>45</td>
357 |         <td>fin</td>
358 |         <td>Investment Banking</td>
359 |     </tr>
360 |     <tr>
361 |         <td>46</td>
362 |         <td>fin</td>
363 |         <td>Investment Management</td>
364 |     </tr>
365 |     <tr>
366 |         <td>73</td>
367 |         <td>gov, leg</td>
368 |         <td>Judiciary</td>
369 |     </tr>
370 |     <tr>
371 |         <td>77</td>
372 |         <td>gov, leg</td>
373 |         <td>Law Enforcement</td>
374 |     </tr>
375 |     <tr>
376 |         <td>9</td>
377 |         <td>leg</td>
378 |         <td>Law Practice</td>
379 |     </tr>
380 |     <tr>
381 |         <td>10</td>
382 |         <td>leg</td>
383 |         <td>Legal Services</td>
384 |     </tr>
385 |     <tr>
386 |         <td>72</td>
387 |         <td>gov, leg</td>
388 |         <td>Legislative Office</td>
389 |     </tr>
390 |     <tr>
391 |         <td>30</td>
392 |         <td>rec, serv, tran</td>
393 |         <td>Leisure, Travel &amp; Tourism</td>
394 |     </tr>
395 |     <tr>
396 |         <td>85</td>
397 |         <td>med, rec, serv</td>
398 |         <td>Libraries</td>
399 |     </tr>
400 |     <tr>
401 |         <td>116</td>
402 |         <td>corp, tran</td>
403 |         <td>Logistics and Supply Chain</td>
404 |     </tr>
405 |     <tr>
406 |         <td>143</td>
407 |         <td>good</td>
408 |         <td>Luxury Goods &amp; Jewelry</td>
409 |     </tr>
410 |     <tr>
411 |         <td>55</td>
412 |         <td>man</td>
413 |         <td>Machinery</td>
414 |     </tr>
415 |     <tr>
416 |         <td>11</td>
417 |         <td>corp</td>
418 |         <td>Management Consulting</td>
419 |     </tr>
420 |     <tr>
421 |         <td>95</td>
422 |         <td>tran</td>
423 |         <td>Maritime</td>
424 |     </tr>
425 |     <tr>
426 |         <td>97</td>
427 |         <td>corp</td>
428 |         <td>Market Research</td>
429 |     </tr>
430 |     <tr>
431 |         <td>80</td>
432 |         <td>corp, med</td>
433 |         <td>Marketing and Advertising</td>
434 |     </tr>
435 |     <tr>
436 |         <td>135</td>
437 |         <td>cons, gov, man</td>
438 |         <td>Mechanical or Industrial Engineering</td>
439 |     </tr>
440 |     <tr>
441 |         <td>126</td>
442 |         <td>med, rec</td>
443 |         <td>Media Production</td>
444 |     </tr>
445 |     <tr>
446 |         <td>17</td>
447 |         <td>hlth</td>
448 |         <td>Medical Devices</td>
449 |     </tr>
450 |     <tr>
451 |         <td>13</td>
452 |         <td>hlth</td>
453 |         <td>Medical Practice</td>
454 |     </tr>
455 |     <tr>
456 |         <td>139</td>
457 |         <td>hlth</td>
458 |         <td>Mental Health Care</td>
459 |     </tr>
460 |     <tr>
461 |         <td>71</td>
462 |         <td>gov</td>
463 |         <td>Military</td>
464 |     </tr>
465 |     <tr>
466 |         <td>56</td>
467 |         <td>man</td>
468 |         <td>Mining &amp; Metals</td>
469 |     </tr>
470 |     <tr>
471 |         <td>35</td>
472 |         <td>art, med, rec</td>
473 |         <td>Motion Pictures and Film</td>
474 |     </tr>
475 |     <tr>
476 |         <td>37</td>
477 |         <td>art, med, rec</td>
478 |         <td>Museums and Institutions</td>
479 |     </tr>
480 |     <tr>
481 |         <td>115</td>
482 |         <td>art, rec</td>
483 |         <td>Music</td>
484 |     </tr>
485 |     <tr>
486 |         <td>114</td>
487 |         <td>gov, man, tech</td>
488 |         <td>Nanotechnology</td>
489 |     </tr>
490 |     <tr>
491 |         <td>81</td>
492 |         <td>med, rec</td>
493 |         <td>Newspapers</td>
494 |     </tr>
495 |     <tr>
496 |         <td>100</td>
497 |         <td>org</td>
498 |         <td>Non-Profit Organization Management</td>
499 |     </tr>
500 |     <tr>
501 |         <td>57</td>
502 |         <td>man</td>
503 |         <td>Oil &amp; Energy</td>
504 |     </tr>
505 |     <tr>
506 |         <td>113</td>
507 |         <td>med</td>
508 |         <td>Online Media</td>
509 |     </tr>
510 |     <tr>
511 |         <td>123</td>
512 |         <td>corp</td>
513 |         <td>Outsourcing/Offshoring</td>
514 |     </tr>
515 |     <tr>
516 |         <td>87</td>
517 |         <td>serv, tran</td>
518 |         <td>Package/Freight Delivery</td>
519 |     </tr>
520 |     <tr>
521 |         <td>146</td>
522 |         <td>good, man</td>
523 |         <td>Packaging and Containers</td>
524 |     </tr>
525 |     <tr>
526 |         <td>61</td>
527 |         <td>man</td>
528 |         <td>Paper &amp; Forest Products</td>
529 |     </tr>
530 |     <tr>
531 |         <td>39</td>
532 |         <td>art, med, rec</td>
533 |         <td>Performing Arts</td>
534 |     </tr>
535 |     <tr>
536 |         <td>15</td>
537 |         <td>hlth, tech</td>
538 |         <td>Pharmaceuticals</td>
539 |     </tr>
540 |     <tr>
541 |         <td>131</td>
542 |         <td>org</td>
543 |         <td>Philanthropy</td>
544 |     </tr>
545 |     <tr>
546 |         <td>136</td>
547 |         <td>art, med, rec</td>
548 |         <td>Photography</td>
549 |     </tr>
550 |     <tr>
551 |         <td>117</td>
552 |         <td>man</td>
553 |         <td>Plastics</td>
554 |     </tr>
555 |     <tr>
556 |         <td>107</td>
557 |         <td>gov, org</td>
558 |         <td>Political Organization</td>
559 |     </tr>
560 |     <tr>
561 |         <td>67</td>
562 |         <td>edu</td>
563 |         <td>Primary/Secondary Education</td>
564 |     </tr>
565 |     <tr>
566 |         <td>83</td>
567 |         <td>med, rec</td>
568 |         <td>Printing</td>
569 |     </tr>
570 |     <tr>
571 |         <td>105</td>
572 |         <td>corp</td>
573 |         <td>Professional Training &amp; Coaching</td>
574 |     </tr>
575 |     <tr>
576 |         <td>102</td>
577 |         <td>corp, org</td>
578 |         <td>Program Development</td>
579 |     </tr>
580 |     <tr>
581 |         <td>79</td>
582 |         <td>gov</td>
583 |         <td>Public Policy</td>
584 |     </tr>
585 |     <tr>
586 |         <td>98</td>
587 |         <td>corp</td>
588 |         <td>Public Relations and Communications</td>
589 |     </tr>
590 |     <tr>
591 |         <td>78</td>
592 |         <td>gov</td>
593 |         <td>Public Safety</td>
594 |     </tr>
595 |     <tr>
596 |         <td>82</td>
597 |         <td>med, rec</td>
598 |         <td>Publishing</td>
599 |     </tr>
600 |     <tr>
601 |         <td>62</td>
602 |         <td>man</td>
603 |         <td>Railroad Manufacture</td>
604 |     </tr>
605 |     <tr>
606 |         <td>64</td>
607 |         <td>agr</td>
608 |         <td>Ranching</td>
609 |     </tr>
610 |     <tr>
611 |         <td>44</td>
612 |         <td>cons, fin, good</td>
613 |         <td>Real Estate</td>
614 |     </tr>
615 |     <tr>
616 |         <td>40</td>
617 |         <td>rec, serv</td>
618 |         <td>Recreational Facilities and Services</td>
619 |     </tr>
620 |     <tr>
621 |         <td>89</td>
622 |         <td>org, serv</td>
623 |         <td>Religious Institutions</td>
624 |     </tr>
625 |     <tr>
626 |         <td>144</td>
627 |         <td>gov, man, org</td>
628 |         <td>Renewables &amp; Environment</td>
629 |     </tr>
630 |     <tr>
631 |         <td>70</td>
632 |         <td>edu, gov</td>
633 |         <td>Research</td>
634 |     </tr>
635 |     <tr>
636 |         <td>32</td>
637 |         <td>rec, serv</td>
638 |         <td>Restaurants</td>
639 |     </tr>
640 |     <tr>
641 |         <td>27</td>
642 |         <td>good, man</td>
643 |         <td>Retail</td>
644 |     </tr>
645 |     <tr>
646 |         <td>121</td>
647 |         <td>corp, org, serv</td>
648 |         <td>Security and Investigations</td>
649 |     </tr>
650 |     <tr>
651 |         <td>7</td>
652 |         <td>tech</td>
653 |         <td>Semiconductors</td>
654 |     </tr>
655 |     <tr>
656 |         <td>58</td>
657 |         <td>man</td>
658 |         <td>Shipbuilding</td>
659 |     </tr>
660 |     <tr>
661 |         <td>20</td>
662 |         <td>good, rec</td>
663 |         <td>Sporting Goods</td>
664 |     </tr>
665 |     <tr>
666 |         <td>33</td>
667 |         <td>rec</td>
668 |         <td>Sports</td>
669 |     </tr>
670 |     <tr>
671 |         <td>104</td>
672 |         <td>corp</td>
673 |         <td>Staffing and Recruiting</td>
674 |     </tr>
675 |     <tr>
676 |         <td>22</td>
677 |         <td>good</td>
678 |         <td>Supermarkets</td>
679 |     </tr>
680 |     <tr>
681 |         <td>8</td>
682 |         <td>gov, tech</td>
683 |         <td>Telecommunications</td>
684 |     </tr>
685 |     <tr>
686 |         <td>60</td>
687 |         <td>man</td>
688 |         <td>Textiles</td>
689 |     </tr>
690 |     <tr>
691 |         <td>130</td>
692 |         <td>gov, org</td>
693 |         <td>Think Tanks</td>
694 |     </tr>
695 |     <tr>
696 |         <td>21</td>
697 |         <td>good</td>
698 |         <td>Tobacco</td>
699 |     </tr>
700 |     <tr>
701 |         <td>108</td>
702 |         <td>corp, gov, serv</td>
703 |         <td>Translation and Localization</td>
704 |     </tr>
705 |     <tr>
706 |         <td>92</td>
707 |         <td>tran</td>
708 |         <td>Transportation/Trucking/Railroad</td>
709 |     </tr>
710 |     <tr>
711 |         <td>59</td>
712 |         <td>man</td>
713 |         <td>Utilities</td>
714 |     </tr>
715 |     <tr>
716 |         <td>106</td>
717 |         <td>fin, tech</td>
718 |         <td>Venture Capital &amp; Private Equity</td>
719 |     </tr>
720 |     <tr>
721 |         <td>16</td>
722 |         <td>hlth</td>
723 |         <td>Veterinary</td>
724 |     </tr>
725 |     <tr>
726 |         <td>93</td>
727 |         <td>tran</td>
728 |         <td>Warehousing</td>
729 |     </tr>
730 |     <tr>
731 |         <td>133</td>
732 |         <td>good</td>
733 |         <td>Wholesale</td>
734 |     </tr>
735 |     <tr>
736 |         <td>142</td>
737 |         <td>good, man, rec</td>
738 |         <td>Wine and Spirits</td>
739 |     </tr>
740 |     <tr>
741 |         <td>119</td>
742 |         <td>tech</td>
743 |         <td>Wireless</td>
744 |     </tr>
745 |     <tr>
746 |         <td>103</td>
747 |         <td>art, med, rec</td>
748 |         <td>Writing and Editing</td>
749 |     </tr>
750 |     </tbody>
751 | </table>
752 | </body>
753 | </html>
754 | 


--------------------------------------------------------------------------------
/data/msft_stock_key_data.csv:
--------------------------------------------------------------------------------
 1 | Symbol,MSFT
 2 | Name,Microsoft Corporation Common Stock
 3 | Exchange,NASDAQ-GS
 4 | Sector,Technology
 5 | Industry,Computer Software: Prepackaged Software
 6 | 1 Year Target,$277.50
 7 | Today's High/Low,$261.00/$257.60
 8 | Share Volume,"24,878,582"
 9 | Average Volume,"28,320,974"
10 | Previous Close,$259.50
11 | 52 Week High/Low,$259.93/$166.11
12 | Market Cap,"1,966,557,339,088"
13 | P/E Ratio,38.86
14 | Forward P/E 1 Yr.,35.21
15 | Earnings Per Share(EPS),$6.71
16 | Annualized Dividend,$2.24
17 | Ex Dividend Date,19-May-21
18 | Dividend Pay Date,10-Jun-21
19 | Current Yield,0.88%
20 | Beta,0.8
21 | 


--------------------------------------------------------------------------------
/data/pew.csv:
--------------------------------------------------------------------------------
 1 | "religion","<$10k","$10-20k","$20-30k","$30-40k","$40-50k","$50-75k","$75-100k","$100-150k",">150k","Don't know/refused"
 2 | "Agnostic",27,34,60,81,76,137,122,109,84,96
 3 | "Atheist",12,27,37,52,35,70,73,59,74,76
 4 | "Buddhist",27,21,30,34,33,58,62,39,53,54
 5 | "Catholic",418,617,732,670,638,1116,949,792,633,1489
 6 | "Don’t know/refused",15,14,15,11,10,35,21,17,18,116
 7 | "Evangelical Prot",575,869,1064,982,881,1486,949,723,414,1529
 8 | "Hindu",1,9,7,9,11,34,47,48,54,37
 9 | "Historically Black Prot",228,244,236,238,197,223,131,81,78,339
10 | "Jehovah's Witness",20,27,24,24,21,30,15,11,6,37
11 | "Jewish",19,19,25,25,30,95,69,87,151,162
12 | "Mainline Prot",289,495,619,655,651,1107,939,753,634,1328
13 | "Mormon",29,40,48,51,56,112,85,49,42,69
14 | "Muslim",6,7,9,10,9,23,16,8,6,22
15 | "Orthodox",13,17,23,32,32,47,38,42,46,73
16 | "Other Christian",9,7,11,13,13,14,18,14,12,18
17 | "Other Faiths",20,33,40,46,49,63,46,40,41,71
18 | "Other World Religions",5,2,3,4,2,7,3,4,4,8
19 | "Unaffiliated",217,299,374,365,341,528,407,321,258,597
20 | 


--------------------------------------------------------------------------------
/data/portfolio.csv:
--------------------------------------------------------------------------------
1 | Ticker,Date,Shares,Price
2 | GOOG,2019-10-01,100,1
3 | MSFT,2019-10-01,200,1
4 | IBM,2019-10-01,500,1
5 | TSLA,2019-10-01,300,1
6 | 
7 | 


--------------------------------------------------------------------------------
/data/pycon_sponsor_levels.csv:
--------------------------------------------------------------------------------
 1 | sponsor_level,amount
 2 | VISIONARY,150000
 3 | SUSTAINABILITY,90000
 4 | MAINTAINING,60000
 5 | CONTRIBUTING,30000
 6 | SUPPORTING,15000
 7 | PARTNER,7500
 8 | PARTICIPATING,3750
 9 | ASSOCIATE,1500
10 | 


--------------------------------------------------------------------------------
/data/pycon_sponsors.csv:
--------------------------------------------------------------------------------
 1 | symbol,name,sponsor_level
 2 | GOOG,ALPHABET INC.,VISIONARY
 3 | AMZN,AMAZON COM INC,SUSTAINABILITY
 4 | #N/A,BLOOMBERG,VISIONARY
 5 | COF,CAPITAL ONE FINANCIAL CORP,MAINTAINING
 6 | GLW,CORNING INC,MAINTAINING
 7 | ESTC,ELASTIC N.V.,PARTNER
 8 | FB,FACEBOOK INC,SUSTAINABILITY
 9 | #N/A,HUAWEI TECHNOLOGIES,SUSTAINABILITY
10 | IBM,INTERNATIONAL BUSINESS MACHINES CORP,CONTRIBUTING
11 | JPM,JPMORGAN CHASE & CO,SUPPORTING
12 | MSFT,MICROSOFT CORP,VISIONARY
13 | NFLX,NETFLIX INC,PARTNER
14 | CRM,SALESFORCE.COM INC.,SUSTAINABILITY
15 | WORK,SLACK TECHNOLOGIES INC.,MAINTAINING
16 | 


--------------------------------------------------------------------------------
/data/retail_sales.csv:
--------------------------------------------------------------------------------
 1 | date,sales
 2 | 2009-10-01,338630
 3 | 2009-11-01,339386
 4 | 2009-12-01,400264
 5 | 2010-01-01,314640
 6 | 2010-02-01,311022
 7 | 2010-03-01,360819
 8 | 2010-04-01,356460
 9 | 2010-05-01,365713
10 | 2010-06-01,358675
11 | 2010-07-01,362027
12 | 2010-08-01,362682
13 | 2010-09-01,346069
14 | 2010-10-01,355212
15 | 2010-11-01,365809
16 | 2010-12-01,426654
17 | 2011-01-01,335608
18 | 2011-02-01,337352
19 | 2011-03-01,387092
20 | 2011-04-01,380754
21 | 2011-05-01,391970
22 | 2011-06-01,388636
23 | 2011-07-01,384600
24 | 2011-08-01,394548
25 | 2011-09-01,374895
26 | 2011-10-01,379364
27 | 2011-11-01,391081
28 | 2011-12-01,451669
29 | 2012-01-01,355058
30 | 2012-02-01,372523
31 | 2012-03-01,414275
32 | 2012-04-01,393035
33 | 2012-05-01,418648
34 | 2012-06-01,400996
35 | 2012-07-01,396020
36 | 2012-08-01,417911
37 | 2012-09-01,385597
38 | 2012-10-01,399341
39 | 2012-11-01,410992
40 | 2012-12-01,461994
41 | 2013-01-01,375537
42 | 2013-02-01,373938
43 | 2013-03-01,421638
44 | 2013-04-01,408381
45 | 2013-05-01,436985
46 | 2013-06-01,414701
47 | 2013-07-01,422357
48 | 2013-08-01,434950
49 | 2013-09-01,396199
50 | 2013-10-01,415740
51 | 2013-11-01,423611
52 | 2013-12-01,477205
53 | 2014-01-01,383399
54 | 2014-02-01,380315
55 | 2014-03-01,432806
56 | 2014-04-01,431415
57 | 2014-05-01,458822
58 | 2014-06-01,433152
59 | 2014-07-01,443005
60 | 2014-08-01,450913
61 | 2014-09-01,420871
62 | 2014-10-01,437702
63 | 2014-11-01,437910
64 | 2014-12-01,501232
65 | 2015-01-01,397252
66 | 2015-02-01,386935
67 | 2015-03-01,444110
68 | 2015-04-01,438217
69 | 2015-05-01,462615
70 | 2015-06-01,448229
71 | 2015-07-01,457710
72 | 2015-08-01,456340
73 | 2015-09-01,430917
74 | 


--------------------------------------------------------------------------------
/data/sponsors_vlookup.csv:
--------------------------------------------------------------------------------
 1 | symbol,name,sponsor_level,amount
 2 | GOOG,ALPHABET INC.,VISIONARY,150000
 3 | AMZN,AMAZON COM INC,SUSTAINABILITY,90000
 4 | #N/A,BLOOMBERG,VISIONARY,150000
 5 | COF,CAPITAL ONE FINANCIAL CORP,MAINTAINING,60000
 6 | GLW,CORNING INC,MAINTAINING,60000
 7 | ESTC,ELASTIC N.V.,PARTNER,7500
 8 | FB,FACEBOOK INC,SUSTAINABILITY,90000
 9 | #N/A,HUAWEI TECHNOLOGIES,SUSTAINABILITY,90000
10 | IBM,INTERNATIONAL BUSINESS MACHINES CORP,CONTRIBUTING,30000
11 | JPM,JPMORGAN CHASE & CO,SUPPORTING,15000
12 | MSFT,MICROSOFT CORP,VISIONARY,150000
13 | NFLX,NETFLIX INC,PARTNER,7500
14 | CRM,SALESFORCE.COM INC.,SUSTAINABILITY,90000
15 | WORK,SLACK TECHNOLOGIES INC.,MAINTAINING,60000
16 | 


--------------------------------------------------------------------------------
/data/stlcom_larget_employers.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/data/stlcom_larget_employers.xlsx


--------------------------------------------------------------------------------
/data/stlregionalchamber_largest_employers_.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/data/stlregionalchamber_largest_employers_.xlsx


--------------------------------------------------------------------------------
/data/stock_data_simple.csv:
--------------------------------------------------------------------------------
 1 | ticker,company_name,sector,trade_date,price,price_change_percent,market_capitalization,annual_sales,shares_outstanding
 2 | WMT,Wal-Mart Stores,Retail,1/16/2014,76.76,-1.20%,"248,377","55,688",3235772
 3 | AAPL,Apple Inc,Technology,1/16/2014,554.25,-0.60%,"494,697","37,472",892553
 4 | IBM,Intl Business Machines,Technology,1/16/2014,188.76,0.50%,"204,965","23,720",1085854
 5 | BAC,Bank Of America Corp,Financial,1/16/2014,17.08,-0.40%,"182,177","23,553",10666133
 6 | SGL.KR,Samsung Electronics,Technology,1/16/2014,"1,301,000.00",0.20%,"180,329","23,444",147299
 7 | NESN.CH,Nestle 'R',Consumer Staple,1/16/2014,67.45,1.20%,"239,974","22,584",3224798
 8 | MSFT,Microsoft Corp,Technology,1/16/2014,36.89,0.40%,"307,956","18,529",8347968
 9 | AMZN,Amazon.Com Inc,Retail,1/16/2014,395.8,0.00%,"181,170","17,092",457733
10 | GOOG,Google Inc,Technology,1/16/2014,"1,156.22",0.70%,"386,278","14,893",334087
11 | PFE,Pfizer Inc,Health Care,1/16/2014,31.17,0.00%,"202,014","12,643",6481070
12 | 


--------------------------------------------------------------------------------
/data/stock_data_simple.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/data/stock_data_simple.xlsx


--------------------------------------------------------------------------------
/data/table1.csv:
--------------------------------------------------------------------------------
1 | "country","year","cases","population"
2 | "Afghanistan",1999,745,19987071
3 | "Afghanistan",2000,2666,20595360
4 | "Brazil",1999,37737,172006362
5 | "Brazil",2000,80488,174504898
6 | "China",1999,212258,1272915272
7 | "China",2000,213766,1280428583
8 | 


--------------------------------------------------------------------------------
/data/table2.csv:
--------------------------------------------------------------------------------
 1 | "country","year","type","count"
 2 | "Afghanistan",1999,"cases",745
 3 | "Afghanistan",1999,"population",19987071
 4 | "Afghanistan",2000,"cases",2666
 5 | "Afghanistan",2000,"population",20595360
 6 | "Brazil",1999,"cases",37737
 7 | "Brazil",1999,"population",172006362
 8 | "Brazil",2000,"cases",80488
 9 | "Brazil",2000,"population",174504898
10 | "China",1999,"cases",212258
11 | "China",1999,"population",1272915272
12 | "China",2000,"cases",213766
13 | "China",2000,"population",1280428583
14 | 


--------------------------------------------------------------------------------
/data/table3.csv:
--------------------------------------------------------------------------------
1 | "country","year","rate"
2 | "Afghanistan",1999,"745/19987071"
3 | "Afghanistan",2000,"2666/20595360"
4 | "Brazil",1999,"37737/172006362"
5 | "Brazil",2000,"80488/174504898"
6 | "China",1999,"212258/1272915272"
7 | "China",2000,"213766/1280428583"
8 | 


--------------------------------------------------------------------------------
/data/table4a.csv:
--------------------------------------------------------------------------------
1 | "country","1999","2000"
2 | "Afghanistan",745,2666
3 | "Brazil",37737,80488
4 | "China",212258,213766
5 | 


--------------------------------------------------------------------------------
/data/table4b.csv:
--------------------------------------------------------------------------------
1 | "country","1999","2000"
2 | "Afghanistan",19987071,20595360
3 | "Brazil",172006362,174504898
4 | "China",1272915272,1280428583
5 | 


--------------------------------------------------------------------------------
/data/weather.csv:
--------------------------------------------------------------------------------
 1 | "id","year","month","element","d1","d2","d3","d4","d5","d6","d7","d8","d9","d10","d11","d12","d13","d14","d15","d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29","d30","d31"
 2 | "MX17004",2010,1,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,27.8,NA
 3 | "MX17004",2010,1,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,14.5,NA
 4 | "MX17004",2010,2,"tmax",NA,27.3,24.1,NA,NA,NA,NA,NA,NA,NA,29.7,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,29.9,NA,NA,NA,NA,NA,NA,NA,NA
 5 | "MX17004",2010,2,"tmin",NA,14.4,14.4,NA,NA,NA,NA,NA,NA,NA,13.4,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,10.7,NA,NA,NA,NA,NA,NA,NA,NA
 6 | "MX17004",2010,3,"tmax",NA,NA,NA,NA,32.1,NA,NA,NA,NA,34.5,NA,NA,NA,NA,NA,31.1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 7 | "MX17004",2010,3,"tmin",NA,NA,NA,NA,14.2,NA,NA,NA,NA,16.8,NA,NA,NA,NA,NA,17.6,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 8 | "MX17004",2010,4,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,36.3,NA,NA,NA,NA
 9 | "MX17004",2010,4,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,16.7,NA,NA,NA,NA
10 | "MX17004",2010,5,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,33.2,NA,NA,NA,NA
11 | "MX17004",2010,5,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,18.2,NA,NA,NA,NA
12 | "MX17004",2010,6,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,28,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,30.1,NA,NA
13 | "MX17004",2010,6,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,17.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,18,NA,NA
14 | "MX17004",2010,7,"tmax",NA,NA,28.6,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,29.9,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
15 | "MX17004",2010,7,"tmin",NA,NA,17.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,16.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
16 | "MX17004",2010,8,"tmax",NA,NA,NA,NA,29.6,NA,NA,29,NA,NA,NA,NA,29.8,NA,NA,NA,NA,NA,NA,NA,NA,NA,26.4,NA,29.7,NA,NA,NA,28,NA,25.4
17 | "MX17004",2010,8,"tmin",NA,NA,NA,NA,15.8,NA,NA,17.3,NA,NA,NA,NA,16.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,15,NA,15.6,NA,NA,NA,15.3,NA,15.4
18 | "MX17004",2010,10,"tmax",NA,NA,NA,NA,27,NA,28.1,NA,NA,NA,NA,NA,NA,29.5,28.7,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,31.2,NA,NA,NA
19 | "MX17004",2010,10,"tmin",NA,NA,NA,NA,14,NA,12.9,NA,NA,NA,NA,NA,NA,13,10.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,15,NA,NA,NA
20 | "MX17004",2010,11,"tmax",NA,31.3,NA,27.2,26.3,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,28.1,27.7,NA,NA,NA,NA
21 | "MX17004",2010,11,"tmin",NA,16.3,NA,12,7.9,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,12.1,14.2,NA,NA,NA,NA
22 | "MX17004",2010,12,"tmax",29.9,NA,NA,NA,NA,27.8,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
23 | "MX17004",2010,12,"tmin",13.8,NA,NA,NA,NA,10.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
24 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = python -msphinx
 7 | SPHINXPROJ    = spreadsheets_to_dataframes
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/authors.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../AUTHORS.rst
2 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # spreadsheets_to_dataframes documentation build configuration file, created by
  5 | # sphinx-quickstart on Fri Jun  9 13:47:02 2017.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another
 17 | # directory, add these directories to sys.path here. If the directory is
 18 | # relative to the documentation root, use os.path.abspath to make it
 19 | # absolute, like shown here.
 20 | #
 21 | import os
 22 | import sys
 23 | sys.path.insert(0, os.path.abspath('..'))
 24 | 
 25 | import spreadsheets_to_dataframes
 26 | 
 27 | # -- General configuration ---------------------------------------------
 28 | 
 29 | # If your documentation needs a minimal Sphinx version, state it here.
 30 | #
 31 | # needs_sphinx = '1.0'
 32 | 
 33 | # Add any Sphinx extension module names here, as strings. They can be
 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 35 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode']
 36 | 
 37 | # Add any paths that contain templates here, relative to this directory.
 38 | templates_path = ['_templates']
 39 | 
 40 | # The suffix(es) of source filenames.
 41 | # You can specify multiple suffix as a list of string:
 42 | #
 43 | # source_suffix = ['.rst', '.md']
 44 | source_suffix = '.rst'
 45 | 
 46 | # The master toctree document.
 47 | master_doc = 'index'
 48 | 
 49 | # General information about the project.
 50 | project = u'Spreadsheets to DataFrames'
 51 | copyright = u"2019, Ryan S. McCoy"
 52 | author = u"Ryan S. McCoy"
 53 | 
 54 | # The version info for the project you're documenting, acts as replacement
 55 | # for |version| and |release|, also used in various other places throughout
 56 | # the built documents.
 57 | #
 58 | # The short X.Y version.
 59 | version = spreadsheets_to_dataframes.__version__
 60 | # The full version, including alpha/beta/rc tags.
 61 | release = spreadsheets_to_dataframes.__version__
 62 | 
 63 | # The language for content autogenerated by Sphinx. Refer to documentation
 64 | # for a list of supported languages.
 65 | #
 66 | # This is also used if you do content translation via gettext catalogs.
 67 | # Usually you set "language" from the command line for these cases.
 68 | language = None
 69 | 
 70 | # List of patterns, relative to source directory, that match files and
 71 | # directories to ignore when looking for source files.
 72 | # This patterns also effect to html_static_path and html_extra_path
 73 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 74 | 
 75 | # The name of the Pygments (syntax highlighting) style to use.
 76 | pygments_style = 'sphinx'
 77 | 
 78 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 79 | todo_include_todos = False
 80 | 
 81 | 
 82 | # -- Options for HTML output -------------------------------------------
 83 | 
 84 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 85 | # a list of builtin themes.
 86 | #
 87 | html_theme = 'alabaster'
 88 | 
 89 | # Theme options are theme-specific and customize the look and feel of a
 90 | # theme further.  For a list of options available for each theme, see the
 91 | # documentation.
 92 | #
 93 | # html_theme_options = {}
 94 | 
 95 | # Add any paths that contain custom static files (such as style sheets) here,
 96 | # relative to this directory. They are copied after the builtin static files,
 97 | # so a file named "default.css" will overwrite the builtin "default.css".
 98 | html_static_path = ['_static']
 99 | 
100 | 
101 | # -- Options for HTMLHelp output ---------------------------------------
102 | 
103 | # Output file base name for HTML help builder.
104 | htmlhelp_basename = 'spreadsheets_to_dataframesdoc'
105 | 
106 | 
107 | # -- Options for LaTeX output ------------------------------------------
108 | 
109 | latex_elements = {
110 |     # The paper size ('letterpaper' or 'a4paper').
111 |     #
112 |     # 'papersize': 'letterpaper',
113 | 
114 |     # The font size ('10pt', '11pt' or '12pt').
115 |     #
116 |     # 'pointsize': '10pt',
117 | 
118 |     # Additional stuff for the LaTeX preamble.
119 |     #
120 |     # 'preamble': '',
121 | 
122 |     # Latex figure (float) alignment
123 |     #
124 |     # 'figure_align': 'htbp',
125 | }
126 | 
127 | # Grouping the document tree into LaTeX files. List of tuples
128 | # (source start file, target name, title, author, documentclass
129 | # [howto, manual, or own class]).
130 | latex_documents = [
131 |     (master_doc, 'spreadsheets_to_dataframes.tex',
132 |      u'Spreadsheets to DataFrames Documentation',
133 |      u'Ryan S. McCoy', 'manual'),
134 | ]
135 | 
136 | 
137 | # -- Options for manual page output ------------------------------------
138 | 
139 | # One entry per manual page. List of tuples
140 | # (source start file, name, description, authors, manual section).
141 | man_pages = [
142 |     (master_doc, 'spreadsheets_to_dataframes',
143 |      u'Spreadsheets to DataFrames Documentation',
144 |      [author], 1)
145 | ]
146 | 
147 | 
148 | # -- Options for Texinfo output ----------------------------------------
149 | 
150 | # Grouping the document tree into Texinfo files. List of tuples
151 | # (source start file, target name, title, author,
152 | #  dir menu entry, description, category)
153 | texinfo_documents = [
154 |     (master_doc, 'spreadsheets_to_dataframes',
155 |      u'Spreadsheets to DataFrames Documentation',
156 |      author,
157 |      'spreadsheets_to_dataframes',
158 |      'One line description of project.',
159 |      'Miscellaneous'),
160 | ]
161 | 
162 | 
163 | 
164 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CONTRIBUTING.rst
2 | 


--------------------------------------------------------------------------------
/docs/history.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../HISTORY.rst
2 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to Spreadsheets to DataFrames's documentation!
 2 | ======================================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 2
 6 |    :caption: Contents:
 7 | 
 8 |    readme
 9 |    installation
10 |    usage
11 |    modules
12 |    contributing
13 |    authors
14 |    history
15 | 
16 | Indices and tables
17 | ==================
18 | * :ref:`genindex`
19 | * :ref:`modindex`
20 | * :ref:`search`
21 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | .. highlight:: shell
 2 | 
 3 | ============
 4 | Installation
 5 | ============
 6 | 
 7 | 
 8 | Stable release
 9 | --------------
10 | 
11 | To install Spreadsheets to DataFrames, run this command in your terminal:
12 | 
13 | .. code-block:: console
14 | 
15 |     $ pip install spreadsheets_to_dataframes
16 | 
17 | This is the preferred method to install Spreadsheets to DataFrames, as it will always install the most recent stable release.
18 | 
19 | If you don't have `pip`_ installed, this `Python installation guide`_ can guide
20 | you through the process.
21 | 
22 | .. _pip: https://pip.pypa.io
23 | .. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/
24 | 
25 | 
26 | From sources
27 | ------------
28 | 
29 | The sources for Spreadsheets to DataFrames can be downloaded from the `Github repo`_.
30 | 
31 | You can either clone the public repository:
32 | 
33 | .. code-block:: console
34 | 
35 |     $ git clone git://github.com/ryansmccoy/spreadsheets_to_dataframes
36 | 
37 | Or download the `tarball`_:
38 | 
39 | .. code-block:: console
40 | 
41 |     $ curl -OJL https://github.com/ryansmccoy/spreadsheets_to_dataframes/tarball/master
42 | 
43 | Once you have a copy of the source, you can install it with:
44 | 
45 | .. code-block:: console
46 | 
47 |     $ python setup.py install
48 | 
49 | 
50 | .. _Github repo: https://github.com/ryansmccoy/spreadsheets_to_dataframes
51 | .. _tarball: https://github.com/ryansmccoy/spreadsheets_to_dataframes/tarball/master
52 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=python -msphinx
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=spreadsheets_to_dataframes
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The Sphinx module was not found. Make sure you have Sphinx installed,
20 | 	echo.then set the SPHINXBUILD environment variable to point to the full
21 | 	echo.path of the 'sphinx-build' executable. Alternatively you may add the
22 | 	echo.Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/docs/readme.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.rst
2 | 


--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
1 | =====
2 | Usage
3 | =====
4 | 
5 | To use Spreadsheets to DataFrames in a project::
6 | 
7 |     import spreadsheets_to_dataframes
8 | 


--------------------------------------------------------------------------------
/img/basics/basic_python_style.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/basic_python_style.png


--------------------------------------------------------------------------------
/img/basics/built-in_data_structures.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/built-in_data_structures.png


--------------------------------------------------------------------------------
/img/basics/built-in_functions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/built-in_functions.png


--------------------------------------------------------------------------------
/img/basics/built-in_len.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/built-in_len.png


--------------------------------------------------------------------------------
/img/basics/calculations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/calculations.png


--------------------------------------------------------------------------------
/img/basics/cell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/cell.png


--------------------------------------------------------------------------------
/img/basics/cell_ex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/cell_ex.png


--------------------------------------------------------------------------------
/img/basics/cell_types.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/cell_types.png


--------------------------------------------------------------------------------
/img/basics/cells.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/cells.png


--------------------------------------------------------------------------------
/img/basics/comments.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/comments.png


--------------------------------------------------------------------------------
/img/basics/data-types.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/data-types.png


--------------------------------------------------------------------------------
/img/basics/data_collections.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/data_collections.png


--------------------------------------------------------------------------------
/img/basics/excel-built-in-string.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/excel-built-in-string.png


--------------------------------------------------------------------------------
/img/basics/excel-built-in.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/excel-built-in.png


--------------------------------------------------------------------------------
/img/basics/excel-pre-installed-add-ins.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/excel-pre-installed-add-ins.png


--------------------------------------------------------------------------------
/img/basics/jupyter-method.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/jupyter-method.png


--------------------------------------------------------------------------------
/img/basics/pycharm-function-pop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/pycharm-function-pop.png


--------------------------------------------------------------------------------
/img/basics/pycharm-function-popup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/pycharm-function-popup.png


--------------------------------------------------------------------------------
/img/basics/pycharm-methods.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/pycharm-methods.png


--------------------------------------------------------------------------------
/img/basics/pycon-files.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/pycon-files.png


--------------------------------------------------------------------------------
/img/basics/pycon_sponsor_levels.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/pycon_sponsor_levels.png


--------------------------------------------------------------------------------
/img/basics/pycon_sponsors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/pycon_sponsors.png


--------------------------------------------------------------------------------
/img/basics/python-pre-installed-add-ins.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/python-pre-installed-add-ins.png


--------------------------------------------------------------------------------
/img/basics/reserved_words.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/reserved_words.png


--------------------------------------------------------------------------------
/img/basics/standard-library-import.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/standard-library-import.png


--------------------------------------------------------------------------------
/img/basics/standard-library.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/standard-library.png


--------------------------------------------------------------------------------
/img/basics/vscode-method.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/basics/vscode-method.png


--------------------------------------------------------------------------------
/img/dataframe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/dataframe.png


--------------------------------------------------------------------------------
/img/dataframe_components.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/dataframe_components.png


--------------------------------------------------------------------------------
/img/excel_table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/excel_table.png


--------------------------------------------------------------------------------
/img/pandas_dataframe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/pandas_dataframe.png


--------------------------------------------------------------------------------
/img/split_apply_combine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryansmccoy/spreadsheets-to-dataframes/22b4a3393626a8df24e8f5a188b3407d20a6430f/img/split_apply_combine.png


--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
 1 | Click
 2 | 
 3 | pandas
 4 | numpy
 5 | scipy
 6 | requests
 7 | openpyxl
 8 | cookiecutter
 9 | sqlalchemy
10 | flask
11 | feedparser
12 | bs4
13 | # selenium
14 | 
15 | # statsmodels
16 | # tldextract
17 | # pyflux
18 | # fbprophet
19 | lxml
20 | jupyter
21 | matplotlib
22 | 
23 | # celery==3.1.25
24 | 
25 | # alpha_vantage
26 | 


--------------------------------------------------------------------------------
/section1_challenge_1.py:
--------------------------------------------------------------------------------
  1 | # Perform an Excel VLOOKUP with a Python Dictionary
  2 | 
  3 | # Challenge 1
  4 | # Modify the code below to match the Expected Output at the bottom
  5 | 
  6 | import csv
  7 | import os
  8 | from pprint import pprint
  9 | 
 10 | current_directory = os.getcwd()
 11 | 
 12 | pycon_sponsors_filename = 'pycon_sponsors.csv'
 13 | pycon_sponsors_filepath = os.path.join(current_directory, "data", pycon_sponsors_filename)
 14 | 
 15 | # print(pycon_sponsors_filepath)
 16 | 
 17 | sponsor_levels = [{'sponsor_level': 'VISIONARY', 'amount': 150000},
 18 |                  {'sponsor_level': 'SUSTAINABILITY', 'amount': 90000},
 19 |                  {'sponsor_level': 'MAINTAINING', 'amount': 60000},
 20 |                  {'sponsor_level': 'CONTRIBUTING', 'amount': 30000},
 21 |                  {'sponsor_level': 'SUPPORTING', 'amount': 15000},
 22 |                  {'sponsor_level': 'PARTNER', 'amount': 7500},
 23 |                  {'sponsor_level': 'PARTICIPATING', 'amount': 3750},
 24 |                  {'sponsor_level': 'ASSOCIATE', 'amount': 1500}]
 25 | 
 26 | pprint(sponsor_levels)
 27 | 
 28 | pycon_sponsors = []
 29 | 
 30 | # print(pycon_sponsors_filepath)
 31 | 
 32 | with open(pycon_sponsors_filepath, 'r') as f:
 33 |     rows = csv.reader(f)
 34 | 
 35 |     header = next(f)
 36 | 
 37 |     for row_number, row in enumerate(rows):
 38 |         print("Row Number:\t", row_number, "Values:\t", row)
 39 | 
 40 | """
 41 | Current Output:
 42 | 
 43 | [{'amount': 150000, 'sponsor_level': 'VISIONARY'},
 44 |  {'amount': 90000, 'sponsor_level': 'SUSTAINABILITY'},
 45 |  {'amount': 60000, 'sponsor_level': 'MAINTAINING'},
 46 |  {'amount': 30000, 'sponsor_level': 'CONTRIBUTING'},
 47 |  {'amount': 15000, 'sponsor_level': 'SUPPORTING'},
 48 |  {'amount': 7500, 'sponsor_level': 'PARTNER'},
 49 |  {'amount': 3750, 'sponsor_level': 'PARTICIPATING'},
 50 |  {'amount': 1500, 'sponsor_level': 'ASSOCIATE'}]
 51 |  
 52 | Row Number:	 0 Values:	 ['GOOG', 'ALPHABET INC.', 'VISIONARY']
 53 | Row Number:	 1 Values:	 ['AMZN', 'AMAZON COM INC', 'SUSTAINABILITY']
 54 | Row Number:	 2 Values:	 ['#N/A', 'BLOOMBERG', 'VISIONARY']
 55 | Row Number:	 3 Values:	 ['COF', 'CAPITAL ONE FINANCIAL CORP', 'MAINTAINING']
 56 | Row Number:	 4 Values:	 ['GLW', 'CORNING INC', 'MAINTAINING']
 57 | Row Number:	 5 Values:	 ['ESTC', 'ELASTIC N.V.', 'PARTNER']
 58 | Row Number:	 6 Values:	 ['FB', 'FACEBOOK INC', 'SUSTAINABILITY']
 59 | Row Number:	 7 Values:	 ['#N/A', 'HUAWEI TECHNOLOGIES', 'SUSTAINABILITY']
 60 | Row Number:	 8 Values:	 ['IBM', 'INTERNATIONAL BUSINESS MACHINES CORP', 'CONTRIBUTING']
 61 | Row Number:	 9 Values:	 ['JPM', 'JPMORGAN CHASE & CO', 'SUPPORTING']
 62 | Row Number:	 10 Values:	 ['MSFT', 'MICROSOFT CORP', 'VISIONARY']
 63 | Row Number:	 11 Values:	 ['NFLX', 'NETFLIX INC', 'PARTNER']
 64 | Row Number:	 12 Values:	 ['CRM', 'SALESFORCE.COM INC.', 'SUSTAINABILITY']
 65 | Row Number:	 13 Values:	 ['WORK', 'SLACK TECHNOLOGIES INC.', 'MAINTAINING']
 66 | 
 67 | Expected Output: 
 68 | 
 69 | Company Number:	 0 
 70 | 	DCompany: ALPHABET INC. 
 71 | 	Level:  VISIONARY 
 72 | 	Donated: 150000 
 73 | Company Number:	 1 
 74 | 	DCompany: AMAZON COM INC 
 75 | 	Level:  SUSTAINABILITY 
 76 | 	Donated: 90000 
 77 | Company Number:	 2 
 78 | 	DCompany: BLOOMBERG 
 79 | 	Level:  VISIONARY 
 80 | 	Donated: 150000 
 81 | Company Number:	 3 
 82 | 	DCompany: CAPITAL ONE FINANCIAL CORP 
 83 | 	Level:  MAINTAINING 
 84 | 	Donated: 60000 
 85 | Company Number:	 4 
 86 | 	DCompany: CORNING INC 
 87 | 	Level:  MAINTAINING 
 88 | 	Donated: 60000 
 89 | Company Number:	 5 
 90 | 	DCompany: ELASTIC N.V. 
 91 | 	Level:  PARTNER 
 92 | 	Donated: 7500 
 93 | Company Number:	 6 
 94 | 	DCompany: FACEBOOK INC 
 95 | 	Level:  SUSTAINABILITY 
 96 | 	Donated: 90000 
 97 | Company Number:	 7 
 98 | 	DCompany: HUAWEI TECHNOLOGIES 
 99 | 	Level:  SUSTAINABILITY 
100 | 	Donated: 90000 
101 | Company Number:	 8 
102 | 	DCompany: INTERNATIONAL BUSINESS MACHINES CORP 
103 | 	Level:  CONTRIBUTING 
104 | 	Donated: 30000 
105 | Company Number:	 9 
106 | 	DCompany: JPMORGAN CHASE & CO 
107 | 	Level:  SUPPORTING 
108 | 	Donated: 15000 
109 | Company Number:	 10 
110 | 	DCompany: MICROSOFT CORP 
111 | 	Level:  VISIONARY 
112 | 	Donated: 150000 
113 | Company Number:	 11 
114 | 	DCompany: NETFLIX INC 
115 | 	Level:  PARTNER 
116 | 	Donated: 7500 
117 | Company Number:	 12 
118 | 	DCompany: SALESFORCE.COM INC. 
119 | 	Level:  SUSTAINABILITY 
120 | 	Donated: 90000 
121 | Company Number:	 13 
122 | 	DCompany: SLACK TECHNOLOGIES INC. 
123 | 	Level:  MAINTAINING 
124 | 	Donated: 60000 
125 | 
126 | """
127 | 


--------------------------------------------------------------------------------
/section1_challenge_1_answer.py:
--------------------------------------------------------------------------------
  1 | # Perform an Excel VLOOKUP with a Python Dictionary
  2 | 
  3 | # Modify the code below to match the Expected Output at the bottom
  4 | 
  5 | import csv
  6 | import os
  7 | from pprint import pprint
  8 | 
  9 | current_directory = os.getcwd()
 10 | 
 11 | pycon_sponsors_filename = 'pycon_sponsors.csv'
 12 | pycon_sponsors_filepath = os.path.join(current_directory, "data", pycon_sponsors_filename)
 13 | 
 14 | print(pycon_sponsors_filepath)
 15 | 
 16 | sponsor_levels = [{'sponsor_level': 'VISIONARY', 'amount': 150000},
 17 |                  {'sponsor_level': 'SUSTAINABILITY', 'amount': 90000},
 18 |                  {'sponsor_level': 'MAINTAINING', 'amount': 60000},
 19 |                  {'sponsor_level': 'CONTRIBUTING', 'amount': 30000},
 20 |                  {'sponsor_level': 'SUPPORTING', 'amount': 15000},
 21 |                  {'sponsor_level': 'PARTNER', 'amount': 7500},
 22 |                  {'sponsor_level': 'PARTICIPATING', 'amount': 3750},
 23 |                  {'sponsor_level': 'ASSOCIATE', 'amount': 1500}]
 24 | 
 25 | sponsor_vlookup = {}
 26 | 
 27 | for sponsor_level in sponsor_levels:
 28 |     sponsor_vlookup[sponsor_level['sponsor_level']] = sponsor_level['amount']
 29 | 
 30 | pprint(sponsor_levels)
 31 | 
 32 | pycon_sponsors = []
 33 | 
 34 | print(pycon_sponsors_filepath)
 35 | 
 36 | with open(pycon_sponsors_filepath, 'r') as f:
 37 |     rows = csv.reader(f)
 38 | 
 39 |     header = next(f)
 40 | 
 41 |     for row_number, row in enumerate(rows):
 42 |         ticker, name, level = row
 43 |         print("Company Number:\t", row_number, "\n\tDCompany:", name, "\n\tLevel: ",level, "\n\tDonated:", sponsor_vlookup[row[2]], "\n")
 44 | 
 45 | """
 46 | Company Number:	 0 
 47 | 	DCompany: ALPHABET INC. 
 48 | 	Level:  VISIONARY 
 49 | 	Donated: 150000 
 50 | Company Number:	 1 
 51 | 	DCompany: AMAZON COM INC 
 52 | 	Level:  SUSTAINABILITY 
 53 | 	Donated: 90000 
 54 | Company Number:	 2 
 55 | 	DCompany: BLOOMBERG 
 56 | 	Level:  VISIONARY 
 57 | 	Donated: 150000 
 58 | Company Number:	 3 
 59 | 	DCompany: CAPITAL ONE FINANCIAL CORP 
 60 | 	Level:  MAINTAINING 
 61 | 	Donated: 60000 
 62 | Company Number:	 4 
 63 | 	DCompany: CORNING INC 
 64 | 	Level:  MAINTAINING 
 65 | 	Donated: 60000 
 66 | Company Number:	 5 
 67 | 	DCompany: ELASTIC N.V. 
 68 | 	Level:  PARTNER 
 69 | 	Donated: 7500 
 70 | Company Number:	 6 
 71 | 	DCompany: FACEBOOK INC 
 72 | 	Level:  SUSTAINABILITY 
 73 | 	Donated: 90000 
 74 | Company Number:	 7 
 75 | 	DCompany: HUAWEI TECHNOLOGIES 
 76 | 	Level:  SUSTAINABILITY 
 77 | 	Donated: 90000 
 78 | Company Number:	 8 
 79 | 	DCompany: INTERNATIONAL BUSINESS MACHINES CORP 
 80 | 	Level:  CONTRIBUTING 
 81 | 	Donated: 30000 
 82 | Company Number:	 9 
 83 | 	DCompany: JPMORGAN CHASE & CO 
 84 | 	Level:  SUPPORTING 
 85 | 	Donated: 15000 
 86 | Company Number:	 10 
 87 | 	DCompany: MICROSOFT CORP 
 88 | 	Level:  VISIONARY 
 89 | 	Donated: 150000 
 90 | Company Number:	 11 
 91 | 	DCompany: NETFLIX INC 
 92 | 	Level:  PARTNER 
 93 | 	Donated: 7500 
 94 | Company Number:	 12 
 95 | 	DCompany: SALESFORCE.COM INC. 
 96 | 	Level:  SUSTAINABILITY 
 97 | 	Donated: 90000 
 98 | Company Number:	 13 
 99 | 	DCompany: SLACK TECHNOLOGIES INC. 
100 | 	Level:  MAINTAINING 
101 | 	Donated: 60000 
102 | """
103 | 


--------------------------------------------------------------------------------
/section1_challenge_2.py:
--------------------------------------------------------------------------------
  1 | # Perform an Excel VLOOKUP with a Python Dictionary
  2 | 
  3 | # Challenge 2
  4 | # Modify the code below to sum up all the donations by the companies in the list
  5 | 
  6 | import csv
  7 | import os
  8 | from pprint import pprint
  9 | 
 10 | current_directory = os.getcwd()
 11 | 
 12 | pycon_sponsors_filename = 'pycon_sponsors.csv'
 13 | pycon_sponsors_filepath = os.path.join(current_directory, "data", pycon_sponsors_filename)
 14 | 
 15 | # print(pycon_sponsors_filepath)
 16 | 
 17 | sponsor_levels = [{'sponsor_level': 'VISIONARY', 'amount': 150000},
 18 |                  {'sponsor_level': 'SUSTAINABILITY', 'amount': 90000},
 19 |                  {'sponsor_level': 'MAINTAINING', 'amount': 60000},
 20 |                  {'sponsor_level': 'CONTRIBUTING', 'amount': 30000},
 21 |                  {'sponsor_level': 'SUPPORTING', 'amount': 15000},
 22 |                  {'sponsor_level': 'PARTNER', 'amount': 7500},
 23 |                  {'sponsor_level': 'PARTICIPATING', 'amount': 3750},
 24 |                  {'sponsor_level': 'ASSOCIATE', 'amount': 1500}]
 25 | 
 26 | sponsor_vlookup = {}
 27 | 
 28 | for sponsor_level in sponsor_levels:
 29 |     sponsor_vlookup[sponsor_level['sponsor_level']] = sponsor_level['amount']
 30 | 
 31 | pprint(sponsor_levels)
 32 | 
 33 | pycon_sum = []
 34 | 
 35 | # print(pycon_sponsors_filepath)
 36 | 
 37 | with open(pycon_sponsors_filepath, 'r') as f:
 38 |     rows = csv.reader(f)
 39 | 
 40 |     header = next(f)
 41 | 
 42 |     for row_number, row in enumerate(rows):
 43 |         ticker, name, level = row
 44 |         print("Company Number:\t", row_number, "\n\tDCompany:", name, "\n\tLevel: ",level, "\n\tDonated:", sponsor_vlookup[row[2]], "\n")
 45 | 
 46 | """
 47 | Current Output:
 48 | 
 49 | [{'amount': 150000, 'sponsor_level': 'VISIONARY'},
 50 |  {'amount': 90000, 'sponsor_level': 'SUSTAINABILITY'},
 51 |  {'amount': 60000, 'sponsor_level': 'MAINTAINING'},
 52 |  {'amount': 30000, 'sponsor_level': 'CONTRIBUTING'},
 53 |  {'amount': 15000, 'sponsor_level': 'SUPPORTING'},
 54 |  {'amount': 7500, 'sponsor_level': 'PARTNER'},
 55 |  {'amount': 3750, 'sponsor_level': 'PARTICIPATING'},
 56 |  {'amount': 1500, 'sponsor_level': 'ASSOCIATE'}]
 57 | 
 58 | Company Number:	 0 
 59 | 	DCompany: ALPHABET INC. 
 60 | 	Level:  VISIONARY 
 61 | 	Donated: 150000 
 62 | Company Number:	 1 
 63 | 	DCompany: AMAZON COM INC 
 64 | 	Level:  SUSTAINABILITY 
 65 | 	Donated: 90000 
 66 | Company Number:	 2 
 67 | 	DCompany: BLOOMBERG 
 68 | 	Level:  VISIONARY 
 69 | 	Donated: 150000 
 70 | Company Number:	 3 
 71 | 	DCompany: CAPITAL ONE FINANCIAL CORP 
 72 | 	Level:  MAINTAINING 
 73 | 	Donated: 60000 
 74 | Company Number:	 4 
 75 | 	DCompany: CORNING INC 
 76 | 	Level:  MAINTAINING 
 77 | 	Donated: 60000 
 78 | Company Number:	 5 
 79 | 	DCompany: ELASTIC N.V. 
 80 | 	Level:  PARTNER 
 81 | 	Donated: 7500 
 82 | Company Number:	 6 
 83 | 	DCompany: FACEBOOK INC 
 84 | 	Level:  SUSTAINABILITY 
 85 | 	Donated: 90000 
 86 | Company Number:	 7 
 87 | 	DCompany: HUAWEI TECHNOLOGIES 
 88 | 	Level:  SUSTAINABILITY 
 89 | 	Donated: 90000 
 90 | Company Number:	 8 
 91 | 	DCompany: INTERNATIONAL BUSINESS MACHINES CORP 
 92 | 	Level:  CONTRIBUTING 
 93 | 	Donated: 30000 
 94 | Company Number:	 9 
 95 | 	DCompany: JPMORGAN CHASE & CO 
 96 | 	Level:  SUPPORTING 
 97 | 	Donated: 15000 
 98 | Company Number:	 10 
 99 | 	DCompany: MICROSOFT CORP 
100 | 	Level:  VISIONARY 
101 | 	Donated: 150000 
102 | Company Number:	 11 
103 | 	DCompany: NETFLIX INC 
104 | 	Level:  PARTNER 
105 | 	Donated: 7500 
106 | Company Number:	 12 
107 | 	DCompany: SALESFORCE.COM INC. 
108 | 	Level:  SUSTAINABILITY 
109 | 	Donated: 90000 
110 | Company Number:	 13 
111 | 	DCompany: SLACK TECHNOLOGIES INC. 
112 | 	Level:  MAINTAINING 
113 | 	Donated: 60000 
114 | 
115 | """
116 | """
117 | Expected Output:
118 | 
119 | Total Sum: 1050000
120 | 
121 | """
122 | 


--------------------------------------------------------------------------------
/section1_challenge_2_answer.py:
--------------------------------------------------------------------------------
 1 | # Perform an Excel VLOOKUP with a Python Dictionary
 2 | # Modify the code below to sum up all the donations by the companies in the list
 3 | 
 4 | import csv
 5 | import os
 6 | from pprint import pprint
 7 | 
 8 | current_directory = os.getcwd()
 9 | 
10 | pycon_sponsors_filename = 'pycon_sponsors.csv'
11 | pycon_sponsors_filepath = os.path.join(current_directory, "data", pycon_sponsors_filename)
12 | 
13 | print(pycon_sponsors_filepath)
14 | 
15 | sponsor_levels = [{'sponsor_level': 'VISIONARY', 'amount': 150000},
16 |                  {'sponsor_level': 'SUSTAINABILITY', 'amount': 90000},
17 |                  {'sponsor_level': 'MAINTAINING', 'amount': 60000},
18 |                  {'sponsor_level': 'CONTRIBUTING', 'amount': 30000},
19 |                  {'sponsor_level': 'SUPPORTING', 'amount': 15000},
20 |                  {'sponsor_level': 'PARTNER', 'amount': 7500},
21 |                  {'sponsor_level': 'PARTICIPATING', 'amount': 3750},
22 |                  {'sponsor_level': 'ASSOCIATE', 'amount': 1500}]
23 | 
24 | sponsor_vlookup = {}
25 | 
26 | for sponsor_level in sponsor_levels:
27 |     sponsor_vlookup[sponsor_level['sponsor_level']] = sponsor_level['amount']
28 | 
29 | pprint(sponsor_levels)
30 | 
31 | pycon_sum = []
32 | 
33 | print(pycon_sponsors_filepath)
34 | 
35 | with open(pycon_sponsors_filepath, 'r') as f:
36 |     rows = csv.reader(f)
37 | 
38 |     header = next(f)
39 | 
40 |     for row_number, row in enumerate(rows):
41 |         ticker, name, level = row
42 |         print("Company Number:\t", row_number, "\n\tDCompany:", name, "\n\tLevel: ",level, "\n\tDonated:", sponsor_vlookup[row[2]], "\n")
43 |         value = int(sponsor_vlookup[row[2]])
44 |         pycon_sum.append(value)
45 | 
46 | print("Total Sum", sum(pycon_sum))
47 | 
48 | """
49 | Output:
50 | 
51 | 1050000
52 | 
53 | """
54 | 


--------------------------------------------------------------------------------
/section1_challenge_3.py:
--------------------------------------------------------------------------------
 1 | # Perform an Excel VLOOKUP with a Python Dictionary
 2 | 
 3 | # Challenge 3
 4 | # Create a function that takes a filepath as a parameter and returns the sum of donations
 5 | 
 6 | import csv
 7 | import os
 8 | from pprint import pprint
 9 | 
10 | current_directory = os.getcwd()
11 | 
12 | pycon_sponsors_filename = 'pycon_sponsors.csv'
13 | pycon_sponsors_filepath = os.path.join(current_directory, "data", pycon_sponsors_filename)
14 | 
15 | print(pycon_sponsors_filepath)
16 | 
17 | sponsor_levels = [{'sponsor_level': 'VISIONARY', 'amount': 150000},
18 |                  {'sponsor_level': 'SUSTAINABILITY', 'amount': 90000},
19 |                  {'sponsor_level': 'MAINTAINING', 'amount': 60000},
20 |                  {'sponsor_level': 'CONTRIBUTING', 'amount': 30000},
21 |                  {'sponsor_level': 'SUPPORTING', 'amount': 15000},
22 |                  {'sponsor_level': 'PARTNER', 'amount': 7500},
23 |                  {'sponsor_level': 'PARTICIPATING', 'amount': 3750},
24 |                  {'sponsor_level': 'ASSOCIATE', 'amount': 1500}]
25 | 
26 | sponsor_vlookup = {}
27 | 
28 | for sponsor_level in sponsor_levels:
29 |     sponsor_vlookup[sponsor_level['sponsor_level']] = sponsor_level['amount']
30 | 
31 | pprint(sponsor_levels)
32 | 
33 | pycon_sum = []
34 | 
35 | print(pycon_sponsors_filepath)
36 | 
37 | with open(pycon_sponsors_filepath, 'r') as f:
38 |     rows = csv.reader(f)
39 | 
40 |     header = next(f)
41 | 
42 |     for row_number, row in enumerate(rows):
43 |         ticker, name, level = row
44 |         print("Company Number:\t", row_number, "\n\tDCompany:", name, "\n\tLevel: ",level, "\n\tDonated:", sponsor_vlookup[row[2]], "\n")
45 |         value = int(sponsor_vlookup[row[2]])
46 |         pycon_sum.append(value)
47 | 
48 | print("Total Sum", sum(pycon_sum))
49 | 
50 | """
51 | Output:
52 | 
53 | 1050000
54 | 
55 | """
56 | 


--------------------------------------------------------------------------------
/section1_challenge_3_answer.py:
--------------------------------------------------------------------------------
 1 | # Perform an Excel VLOOKUP with a Python Dictionary
 2 | 
 3 | # Challenge 3
 4 | # Create a function that takes a filepath as a parameter and returns the sum of donations
 5 | 
 6 | import csv
 7 | import os
 8 | from pprint import pprint
 9 | 
10 | current_directory = os.getcwd()
11 | 
12 | pycon_sponsors_filename = 'pycon_sponsors.csv'
13 | pycon_sponsors_filepath = os.path.join(current_directory, "data", pycon_sponsors_filename)
14 | 
15 | print(pycon_sponsors_filepath)
16 | 
17 | def sum_donations(filepath):
18 | 
19 |     sponsor_levels = [{'sponsor_level': 'VISIONARY', 'amount': 150000},
20 |                       {'sponsor_level': 'SUSTAINABILITY', 'amount': 90000},
21 |                       {'sponsor_level': 'MAINTAINING', 'amount': 60000},
22 |                       {'sponsor_level': 'CONTRIBUTING', 'amount': 30000},
23 |                       {'sponsor_level': 'SUPPORTING', 'amount': 15000},
24 |                       {'sponsor_level': 'PARTNER', 'amount': 7500},
25 |                       {'sponsor_level': 'PARTICIPATING', 'amount': 3750},
26 |                       {'sponsor_level': 'ASSOCIATE', 'amount': 1500}]
27 | 
28 |     sponsor_vlookup = {}
29 | 
30 |     for sponsor_level in sponsor_levels:
31 |         sponsor_vlookup[sponsor_level['sponsor_level']] = sponsor_level['amount']
32 | 
33 |     pycon_sum = []
34 | 
35 |     with open(filepath, 'r') as f:
36 |         rows = csv.reader(f)
37 | 
38 |         header = next(f)
39 | 
40 |         for row_number, row in enumerate(rows):
41 |             ticker, name, level = row
42 |             value = int(sponsor_vlookup[row[2]])
43 |             pycon_sum.append(value)
44 | 
45 |     return pycon_sum
46 | 
47 | 
48 | """
49 | Output:
50 | 
51 | pycon_sum = sum_donation(pycon_sponsors_filepath)
52 | 
53 | print("Total Donation:\t", pycon_sum)
54 | 
55 | """
56 | 


--------------------------------------------------------------------------------
/section2-02-real-world-example-refactored.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import os
  3 | import zipfile
  4 | import re  # regular expression
  5 | 
  6 | import requests
  7 | 
  8 | import pandas as pd
  9 | 
 10 | # fixes display of dataframes in Python Console
 11 | pd.set_option('display.float_format', lambda x: f'{x:.5f}')
 12 | pd.set_option('display.max_columns', 500)
 13 | pd.set_option('display.max_rows', 500)
 14 | pd.set_option('display.width', 600)
 15 | 
 16 | current_directory = os.getcwd()
 17 | 
 18 | 
 19 | def extract_zip_contents(filepath):
 20 |     zip_file_local_extract_path = filepath.replace(".zip", "")
 21 | 
 22 |     # create directory for zip files
 23 |     if os.path.exists(zip_file_local_extract_path):
 24 | 
 25 |         print("Folder already Exists!")
 26 | 
 27 |     else:
 28 |         try:
 29 | 
 30 |             z = zipfile.ZipFile(zip_file_local_extract_path)
 31 | 
 32 |             z.extractall(zip_file_local_extract_path)
 33 | 
 34 |             print("Extracting Contents: \t", zip_file_local_extract_path)
 35 |         except:
 36 |             print("Issue Extracting, Going to Skip :)")
 37 |             return None
 38 | 
 39 |     return zip_file_local_extract_path
 40 | 
 41 | 
 42 | def download_filings(start_year, end_year, output_directory):
 43 |     quarters = ['q1', 'q2', 'q3', 'q4']
 44 | 
 45 |     zip_filepaths = []
 46 | 
 47 |     for year in range(start_year, end_year):
 48 |         for quarter in quarters:
 49 | 
 50 |             url = rf'https://www.sec.gov/files/dera/data/financial-statement-data-sets/{year}{quarter}.zip'
 51 | 
 52 |             try:
 53 | 
 54 |                 # we can get the filename (basename) of the url using basename
 55 |                 basename = os.path.basename(url)
 56 | 
 57 |                 print(basename)
 58 | 
 59 |                 zip_file_local_filepath = os.path.join(output_directory, basename)
 60 | 
 61 |                 print(zip_file_local_filepath)
 62 | 
 63 |                 zip_filepaths.append(zip_file_local_filepath)
 64 | 
 65 |                 if not os.path.exists(zip_file_local_filepath):
 66 | 
 67 |                     print(f"Downloading: \t{url}")
 68 | 
 69 |                     r = requests.get(url)
 70 | 
 71 |                     if r.status_code == 200:
 72 | 
 73 |                         print(f"Download Complete")
 74 | 
 75 |                         with open(zip_file_local_filepath, 'wb') as fd:
 76 |                             fd.write(r.content)
 77 | 
 78 |                     else:
 79 |                         print("Got an Error Code!")
 80 | 
 81 |                 else:
 82 |                     print("It appears Zip File already exists", zip_file_local_filepath)
 83 | 
 84 |             except Exception as E:
 85 |                 print("Error Downloading", url, E)
 86 | 
 87 |     return zip_filepaths
 88 | 
 89 | 
 90 | def transform_data(numbers_filepath, submissions_filepath, df_sic_list, df_symbol_cik, metric="Revenues", form_type='10-'):
 91 |     print("Transforming ", numbers_filepath)
 92 | 
 93 |     df_numbers = pd.read_csv(numbers_filepath, delimiter="\t")
 94 | 
 95 |     df_submissions = pd.read_csv(submissions_filepath, delimiter="\t")
 96 | 
 97 |     # convert sic to string
 98 |     df_submissions['sic'] = df_submissions['sic'].astype('Int64').astype('str')
 99 | 
100 |     df_submissions = df_submissions[['adsh', 'cik', 'name', 'sic', 'countryba', 'stprba', 'fye', 'form', 'period', 'filed', 'instance']]
101 | 
102 |     df_symbol_cik['symbol'] = df_symbol_cik['symbol'].str.upper()
103 | 
104 |     # create list of dataframe column names
105 |     submissions_columns = df_submissions.columns.tolist()
106 | 
107 |     # going to merge two dataframes into one
108 |     df_submissions_symbols = pd.merge(df_submissions, df_symbol_cik)
109 | 
110 |     # merge sic codes onto submission dataframe
111 |     df_submissions_symbols = pd.merge(df_submissions_symbols, df_sic_list, on="sic")
112 | 
113 |     # we can drop columns by name using drop
114 |     df_submissions_symbols = df_submissions_symbols.drop(columns=['instance'])
115 | 
116 |     new_submissions_columns = ["symbol", "industry_title"] + submissions_columns
117 | 
118 |     df_submissions_symbols = df_submissions_symbols.reindex(columns=new_submissions_columns)
119 | 
120 |     df_submissions_symbols = df_submissions_symbols[df_submissions_symbols['form'].str.contains(form_type, flags=re.IGNORECASE, regex=True)]
121 | 
122 |     df_submission_numbers = pd.merge(df_numbers, df_submissions_symbols, left_on='adsh', right_on='adsh', how='inner')
123 | 
124 |     new_column_order = ['cik',
125 |                         'symbol',
126 |                         'name',
127 |                         'sic',
128 |                         'industry_title',
129 |                         'countryba',
130 |                         'stprba',
131 |                         'fye',
132 |                         'form',
133 |                         'period',
134 |                         'filed',
135 |                         'adsh',
136 |                         'tag',
137 |                         'version',
138 |                         'coreg',
139 |                         'ddate',
140 |                         'qtrs',
141 |                         'uom',
142 |                         'value'
143 |                         ]
144 | 
145 |     # reorder columns
146 |     df_submission_numbers = df_submission_numbers.reindex(columns=new_column_order)
147 | 
148 |     # Group by: split-apply-combine
149 |     if metric:
150 |         df_values = df_submission_numbers[df_submission_numbers['tag'].isin([metric])]
151 |     else:
152 |         df_values = df_submission_numbers.copy()
153 | 
154 |     df_values = df_values.dropna(subset=['value'])
155 | 
156 |     # only show companies with 4 quarters (1 year) worth of data
157 |     df_values = df_values[df_values['qtrs'] == 4]
158 |     df_values = df_values[(df_values['uom'] == "USD") | (df_values['uom'] == "EUR")]
159 | 
160 |     df_values = df_values.sort_values('ddate', ascending=True)
161 | 
162 |     group = []
163 | 
164 |     for (symbol, qtrs), df_group in df_values.groupby(["symbol", "qtrs"]):
165 |         df_group['pct_change'] = df_group['value'].pct_change()
166 |         group.append(df_group)
167 | 
168 |     df_values_pct = pd.concat(group)
169 | 
170 |     df_values_pct = df_values_pct.sort_values('ddate', ascending=False)
171 | 
172 |     print("Done Transforming ", numbers_filepath)
173 | 
174 |     return df_values_pct
175 | 
176 | 
177 | def filter_ticker_list(df_submissions_symbols):
178 |     pycon_sponsors = [{'symbol': 'GOOG', 'name': 'ALPHABET INC.', 'sponsor_level': 'VISIONARY'},
179 |                       {'symbol': 'AMZN', 'name': 'AMAZON COM INC', 'sponsor_level': 'SUSTAINABILITY'},
180 |                       {'symbol': '#N/A', 'name': 'BLOOMBERG', 'sponsor_level': 'VISIONARY'},
181 |                       {'symbol': 'COF', 'name': 'CAPITAL ONE FINANCIAL CORP', 'sponsor_level': 'MAINTAINING'},
182 |                       {'symbol': 'GLW', 'name': 'CORNING INC', 'sponsor_level': 'MAINTAINING'},
183 |                       {'symbol': 'ESTC', 'name': 'ELASTIC N.V.', 'sponsor_level': 'PARTNER'},
184 |                       {'symbol': 'FB', 'name': 'FACEBOOK INC', 'sponsor_level': 'SUSTAINABILITY'},
185 |                       {'symbol': '#N/A', 'name': 'HUAWEI TECHNOLOGIES', 'sponsor_level': 'SUSTAINABILITY'},
186 |                       {'symbol': 'IBM', 'name': 'INTERNATIONAL BUSINESS MACHINES CORP', 'sponsor_level': 'CONTRIBUTING'},
187 |                       {'symbol': 'JPM', 'name': 'JPMORGAN CHASE & CO', 'sponsor_level': 'SUPPORTING'},
188 |                       {'symbol': 'MSFT', 'name': 'MICROSOFT CORP', 'sponsor_level': 'VISIONARY'},
189 |                       {'symbol': 'NFLX', 'name': 'NETFLIX INC', 'sponsor_level': 'PARTNER'},
190 |                       {'symbol': 'CRM', 'name': 'SALESFORCE.COM INC.', 'sponsor_level': 'SUSTAINABILITY'},
191 |                       {'symbol': 'WORK', 'name': 'SLACK TECHNOLOGIES INC.', 'sponsor_level': 'MAINTAINING'}]
192 | 
193 |     df_companies = pd.DataFrame(pycon_sponsors)
194 | 
195 |     ticker_list_pycon_sponsors = df_companies['symbol'].tolist()
196 | 
197 |     df_selected_submissions = df_submissions_symbols[df_submissions_symbols['symbol'].isin(ticker_list_pycon_sponsors)]
198 | 
199 |     new_submissions_columns = ['cik',
200 |                                'symbol',
201 |                                'name',
202 |                                'sic',
203 |                                'industry_title',
204 |                                'countryba',
205 |                                'stprba',
206 |                                'fye',
207 |                                'form',
208 |                                'period',
209 |                                'filed',
210 |                                'adsh'
211 |                                ]
212 | 
213 |     df_selected_submissions = df_selected_submissions.reindex(columns=new_submissions_columns)
214 | 
215 |     return df_selected_submissions
216 | 
217 | 
218 | def main(start_year, end_year):
219 |     url = 'https://www.sec.gov/include/ticker.txt'
220 | 
221 |     df_symbol_cik = pd.read_csv(url, delimiter="\t", names=['symbol', 'cik'])
222 | 
223 |     # standard industrial classification
224 |     sic_url = r'https://www.sec.gov/info/edgar/siccodes.htm'
225 |     # we can extract table from html by passing in url
226 |     sics_tables = pd.read_html(sic_url)
227 |     df_sic_list = sics_tables[0]
228 | 
229 |     # rename columns to lower, no spaces, and rename sic_code to sic
230 |     df_sic_list.columns = df_sic_list.columns.str.lower().str.replace(" ", "_").str.replace("sic_code", "sic")
231 | 
232 |     # convert sic column to string
233 |     df_sic_list['sic'] = df_sic_list['sic'].astype('Int64').astype('str')
234 | 
235 |     output_directory = os.path.join(current_directory, "zip-data")
236 | 
237 |     # create directory for zip files
238 |     if os.path.exists(output_directory):
239 |         print("Folder already Exists!")
240 |     else:
241 |         print("Folder doesn't exist")
242 |         os.mkdir(output_directory)
243 |         print("Created Directory!")
244 | 
245 |     zip_filepaths = download_filings(start_year, end_year, output_directory)
246 | 
247 |     zip_folders = []
248 | 
249 |     for zip_filepath in zip_filepaths:
250 |         zip_folder = extract_zip_contents(zip_filepath)
251 | 
252 |         if zip_folder:
253 |             zip_folders.append(zip_folder)
254 | 
255 |     # get list of all extracted files
256 |     files = glob.glob(output_directory + "\\*\\*.*")
257 | 
258 |     num_files = [file for file in files if "num.txt" in file]
259 |     sub_files = [file for file in files if "sub.txt" in file]
260 | 
261 |     pre_files = [file for file in files if "pre.txt" in file]
262 |     tag_files = [file for file in files if "tag.txt" in file]
263 |     readme_files = [file for file in files if "readme.htm" in file]
264 | 
265 |     num_files.sort(reverse=True)
266 |     sub_files.sort(reverse=True)
267 | 
268 |     if len(num_files) == len(sub_files):
269 |         sub_num_files = list(zip(sub_files, num_files))
270 | 
271 |         filings = []
272 | 
273 |         for sub_file, num_file in sub_num_files[1:5]:
274 |             df_companies_pct_chg = transform_data(num_file, sub_file, df_sic_list, df_symbol_cik, metric="Revenues", form_type='10-')
275 | 
276 |             filings.append(df_companies_pct_chg)
277 | 
278 |         df_all_filings = pd.concat(filings)
279 | 
280 |         # df_all_filings = df_all_filings.dropna(subset=['pct_change'])
281 | 
282 |         # df_all_filings = df_all_filings[df_all_filings['pct_change'] > 0]
283 |         #
284 |         # df_all_filings = df_all_filings.drop_duplicates(keep='first', subset=['cik']).sort_values('value', ascending=False)
285 | 
286 |         df_all_filings.to_csv('all_filings.csv')
287 | 
288 | 
289 | if __name__ == "__main__":
290 |     start_year = 2020
291 |     end_year = 2022
292 | 
293 |     main(start_year, end_year)
294 | 


--------------------------------------------------------------------------------
/section2_challenge.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # Visit Awesome Public Datasets on Github
 4 | 
 5 | https://github.com/awesomedata/awesome-public-datasets
 6 | 
 7 | # Find a dataset you find interesting, easily downloadable, and format Pandas can work with
 8 | 
 9 | # Create a Github Username
10 | 
11 | # Think of an awesome project Name
12 | 
13 | # Create a Github Repository
14 | 
15 | # Create a new project structure using Cookiecutter
16 | 
17 | # Commit and push your new project to Github
18 | 
19 | # Write Code to Download the Dataset using Requests
20 | 
21 | # Write Code to Transform & Analyze the Dataset using Pandas
22 | 
23 | # Write Code to Visualize Dataset using Your favorite Pandas Visualization Library
24 | 
25 | # Write Code to Display your Data in a Browser Using Flask
26 | 
27 | # Refactor & clean up your code to make it easy to maintain and share
28 | 
29 | # Setup Airflow Schedule to automatically download, transform, and output your results of your Analysis
30 | 
31 | # Repeat, but with a different dataset
32 | 
33 | 
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | current_version = 0.1.0
 3 | commit = True
 4 | tag = True
 5 | 
 6 | [bumpversion:file:setup.py]
 7 | search = version='{current_version}'
 8 | replace = version='{new_version}'
 9 | 
10 | [bumpversion:file:spreadsheets_to_dataframes/__init__.py]
11 | search = __version__ = '{current_version}'
12 | replace = __version__ = '{new_version}'
13 | 
14 | [bdist_wheel]
15 | universal = 1
16 | 
17 | [flake8]
18 | exclude = docs
19 | 
20 | [aliases]
21 | # Define setup.py command aliases here
22 | test = pytest
23 | 
24 | [tool:pytest]
25 | collect_ignore = ['setup.py']
26 | 
27 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """The setup script."""
 5 | 
 6 | from setuptools import setup, find_packages
 7 | 
 8 | with open('README.rst') as readme_file:
 9 |     readme = readme_file.read()
10 | 
11 | with open('HISTORY.rst') as history_file:
12 |     history = history_file.read()
13 | 
14 | requirements = ['Click>=7.0', ]
15 | 
16 | setup_requirements = ['pytest-runner', ]
17 | 
18 | test_requirements = ['pytest>=3', ]
19 | 
20 | setup(
21 |     author="Ryan S. McCoy",
22 |     author_email='github@ryansmccoy.com',
23 |     python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*',
24 |     classifiers=[
25 |         'Development Status :: 2 - Pre-Alpha',
26 |         'Intended Audience :: Developers',
27 |         'License :: OSI Approved :: MIT License',
28 |         'Natural Language :: English',
29 |         "Programming Language :: Python :: 2",
30 |         'Programming Language :: Python :: 2.7',
31 |         'Programming Language :: Python :: 3',
32 |         'Programming Language :: Python :: 3.5',
33 |         'Programming Language :: Python :: 3.6',
34 |         'Programming Language :: Python :: 3.7',
35 |     ],
36 |     description="Examples from Presentation",
37 |     entry_points={
38 |         'console_scripts': [
39 |             'spreadsheets_to_dataframes=spreadsheets_to_dataframes.cli:main',
40 |         ],
41 |     },
42 |     install_requires=requirements,
43 |     license="MIT license",
44 |     long_description=readme + '\n\n' + history,
45 |     include_package_data=True,
46 |     keywords='spreadsheets_to_dataframes',
47 |     name='spreadsheets_to_dataframes',
48 |     packages=find_packages(include=['spreadsheets_to_dataframes', 'spreadsheets_to_dataframes.*']),
49 |     setup_requires=setup_requirements,
50 |     test_suite='tests',
51 |     tests_require=test_requirements,
52 |     url='https://github.com/ryansmccoy/spreadsheets_to_dataframes',
53 |     version='0.1.0',
54 |     zip_safe=False,
55 | )
56 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py27, py35, py36, py37 flake8
 3 | 
 4 | [travis]
 5 | python =
 6 |     3.7: py37
 7 |     3.6: py36
 8 |     3.5: py35
 9 |     2.7: py27
10 | 
11 | [testenv:flake8]
12 | basepython = python
13 | deps = flake8
14 | commands = flake8 spreadsheets_to_dataframes
15 | 
16 | [testenv]
17 | setenv =
18 |     PYTHONPATH = {toxinidir}
19 | deps =
20 |     -r{toxinidir}/requirements_dev.txt
21 | ; If you want to make tox run the tests with the same versions, create a
22 | ; requirements.txt with the pinned versions and uncomment the following line:
23 | ;     -r{toxinidir}/requirements.txt
24 | commands =
25 |     pip install -U pip
26 |     pytest --basetemp={envtmpdir}
27 | 
28 | 


--------------------------------------------------------------------------------