├── .gitattributes
├── .gitignore
├── 01-Pandas
    ├── A1-Introduction_Descriptive_Analysis.ipynb
    ├── A2-Introduction_Basic_Data_Manipulation_Techniques.ipynb
    ├── A3-Advanced_Data_Manipulation_Techniques.ipynb
    ├── A4-NYPD_Vehicle_Collisions.ipynb
    ├── A5-Dangerous_streets_NYPD_Collisions.ipynb
    ├── B1-Pandas_Reading_Data.ipynb
    ├── B2-Storing_from_Pandas_to_SQL.ipynb
    ├── B2a-Inserting_Data_in_MySQL_using_PythonPandas.ipynb
    ├── B3-Pandas-vs-SQL.ipynb
    ├── C1-Multidimensional_Data_Analysis-Normalizing_Raw_Values.ipynb
    ├── C2-Multidimensional_Data_Analysis-Lift_and_Logodds.ipynb
    ├── D-Common_DataFrames_Operations.ipynb
    ├── README.md
    └── TODO.md
├── 02-WebAPIs
    ├── A-Accessing_Web_APIs_using_Python.ipynb
    ├── A2-NewsAPI.ipynb
    ├── A3_OpenAI.ipynb
    ├── B1-Google_Vision_API.ipynb
    ├── B2-Google_Natural_Language_API.ipynb
    ├── B2-IBM_Watson_Natural_Language_API.ipynb
    ├── B3-Google_Maps_API.ipynb
    ├── B4-Yelp.ipynb
    ├── E-Spotify_API_AuthenticationExample.ipynb
    ├── README.md
    ├── TODO.md
    └── images
    │   ├── Python_Twitter.jpg
    │   ├── app_1.jpg
    │   ├── app_2.jpg
    │   ├── app_3.jpg
    │   ├── app_4.jpg
    │   ├── app_5.jpg
    │   ├── app_6.jpg
    │   ├── app_7.jpg
    │   ├── create_an_application_1.jpg
    │   ├── create_an_application_2.jpg
    │   ├── create_an_application_3.jpg
    │   ├── create_an_application_4.jpg
    │   └── logo.png
├── 03-Regular_Expressions
    ├── A-Regular_Expressions_Intro.ipynb
    ├── B-Regular_Expressions_in_Python.ipynb
    ├── C-Regular_Expressions.pptx
    └── regular-expressions-cheat-sheet-v2.pdf
├── 04-Web_Scraping
    ├── A-Hierarchical_Data_and_XPath_Tutorial.ipynb
    ├── B-Crawling_HTML_Pages.ipynb
    ├── B-Crawling_altmetric.ipynb
    ├── G-Crawling_with_BeatifulSoup.ipynb
    ├── G-Crawling_with_BeatifulSoup_solution.ipynb
    ├── README.md
    ├── books.xml
    ├── images
    │   ├── cursor.png
    │   ├── html1.png
    │   ├── html2.png
    │   ├── html3.jpg
    │   ├── html4.jpg
    │   ├── html5.jpg
    │   ├── imdb1.jpg
    │   ├── imdb2.jpg
    │   ├── imdb3.jpg
    │   ├── imdb4.jpg
    │   ├── imdb5.jpg
    │   └── scraping.png
    └── imdb_movies_1500.json
├── 05-Time_Series
    ├── A-Introduction_to_Time_Series_using_Pandas.ipynb
    ├── B-ERCOT.ipynb
    ├── C-ERCOT_using_Prophet.ipynb
    ├── D-Example_Citibike_Time_Series_Analysis.ipynb
    ├── README.md
    └── Z-Misc_Time_Series_Notes.ipynb
├── 06-Spatial_Data_and_Maps
    ├── A-Intro_to_Spatial_Data.ipynb
    ├── B-Showing_Census_Variables_on_a_Map.ipynb
    ├── C-Deep_Dive_into_Choropleths.ipynb
    ├── D-Citibike_Station_Visualization_using_Folium.ipynb
    ├── README.md
    ├── Y-OpenStreetMap_Data.ipynb
    └── Z-Plotting_Spatial_Data.ipynb
├── 07-TextMining_NLP
    ├── A-NLP_Tokenization_Zipf.ipynb
    ├── B-Part_of_Speech_Tagging.ipynb
    ├── C-Intro_to_Classification.ipynb
    ├── D-Document_Classification.ipynb
    ├── D-WordNet.ipynb
    ├── E-AlchemyAPI_EntityExtraction.ipynb
    ├── F-Creating_Wordclouds.ipynb
    ├── G-Classifying_Inaugural_Speeches.ipynb
    ├── G-Classifying_Inaugural_Speeches_Solution.ipynb
    ├── H-Language Detection.ipynb
    ├── I-NLP_with_Spacy.ipynb
    ├── RDF-triplet.ipynb
    ├── README.md
    ├── StanfordNLP.ipynb
    └── Z-Lemmatization.ipynb
├── 08-Visualization
    ├── A-Matplotlib_and_Pandas.ipynb
    ├── B-Solar_Activity_Case_Study.ipynb
    ├── README.md
    ├── Visualization_Examples-Cars.ipynb
    ├── Visualization_Examples-EastIndies.ipynb
    ├── Visualization_Examples-NCDC_Normals.ipynb
    └── nycEnergyGradesAnalysis.ipynb
├── 11-Flask
    ├── A-Hello_World_WebServer.ipynb
    ├── A-main1.py
    ├── A-main2.py
    ├── B-Create_API_call_and_Connecting_to_MySQL.ipynb
    ├── B-main3.py
    ├── C-Adding_HTML_and_Templates.ipynb
    ├── C2-Jinja_and_Templates.ipynb
    ├── D-Displaying_JSON_data_as_a_Table.ipynb
    ├── G-Using_Forms.ipynb
    ├── I-All_Elements_Together.ipynb
    ├── images
    │   ├── add_new_grade.gif
    │   ├── add_new_student.gif
    │   ├── colab-flask-1.PNG
    │   ├── colab-flask-2.PNG
    │   ├── crfolder1.jpg
    │   ├── crfolder2.jpg
    │   ├── crfolder3.jpg
    │   ├── crfolderandfile1.jpg
    │   ├── db.jpg
    │   ├── file1.jpg
    │   ├── file2.jpg
    │   ├── gradebook.jpg
    │   ├── helloworld.jpg
    │   ├── logo.png
    │   ├── sql.jpg
    │   └── wd.jpg
    └── list_stations.html
├── 12-UNIX_Basics
    ├── A-Basic_Unix_Shell_Commands.ipynb
    ├── B-Fetching_Data_Using_CURL.ipynb
    ├── C-Pipes_Filters_Redirection.ipynb
    ├── D-Running_Tasks_In_The_Background.ipynb
    └── cronhelp.ipynb
├── 13-Network_Analysis
    ├── Neo4j and NetworkX.ipynb
    ├── Neo4j with Python.ipynb
    ├── Networkx.ipynb
    ├── README.md
    ├── data
    │   └── imdb_movies_1500.json
    └── images
    │   ├── MySQL_scheme.jpg
    │   ├── RDBMS_vs_GRAPHDB.png
    │   ├── ex.png
    │   ├── free_movies.jpg
    │   ├── graph.jpg
    │   ├── graph.png
    │   ├── neo4j-python.png
    │   ├── new_db_1.jpg
    │   ├── new_db_2.jpg
    │   └── scheme.jpg
├── 14-Crowdsourcing
    └── Mechanical Turk with Python.ipynb
├── 15-Predictive_Modeling
    ├── Intro to ML with scikit-learn.ipynb
    └── images
    │   ├── digits.png
    │   ├── iris_petal_sepal.png
    │   ├── linear_regression.png
    │   ├── ml.png
    │   ├── sklearn_logo.png
    │   ├── svm1.png
    │   └── svm2.png
├── 16-OpenCV
    ├── OpenCV - Canny Edge Detection.ipynb
    ├── OpenCV - Changing Colorspaces.ipynb
    ├── OpenCV - Contours in OpenCV.ipynb
    ├── OpenCV - Geometric Transformations of Images.ipynb
    ├── OpenCV - Histograms in OpenCV - 1.ipynb
    ├── OpenCV - Histograms in OpenCV - 2.ipynb
    ├── OpenCV - Image Gradients.ipynb
    ├── OpenCV - Image Pyramids.ipynb
    ├── OpenCV - Image Thresholding.ipynb
    ├── OpenCV - Morphological Transformations.ipynb
    └── OpenCV - Smoothing Images.ipynb
├── 17-StockTrading
    └── Stock Analysis.ipynb
├── 18-Elastic
    ├── ElasticSearch.ipynb
    ├── Elasticsearch_Lesson_1.ipynb
    ├── Elasticsearch_Lesson_2.ipynb
    ├── datasets
    │   ├── airports.csv
    │   ├── movie_metadata.csv
    │   └── table.csv
    └── images
    │   ├── 01.png
    │   ├── download.jpg
    │   ├── elastic_logo.png
    │   ├── g01.png
    │   ├── g02.png
    │   ├── g03.png
    │   ├── g04.png
    │   ├── g05.png
    │   ├── g06.png
    │   ├── g07.png
    │   ├── g08.png
    │   ├── g09.png
    │   ├── g10.png
    │   ├── g11.png
    │   ├── g12.png
    │   ├── k01.png
    │   ├── k02.png
    │   ├── k03.png
    │   ├── k04.png
    │   ├── k05.png
    │   ├── k06.png
    │   ├── k07.png
    │   ├── l01_ex01.png
    │   ├── l01_ex03.png
    │   ├── l02_ex01.png
    │   ├── l02_ex03.png
    │   └── solr_vs_elasticsearch.jpg
├── 21-Slack
    ├── S1-Slack_GetPermissions.ipynb
    ├── S2-Slack-EventProcessing.ipynb
    ├── S3-Citibike_Slack_Bot_Example.ipynb
    ├── S3-Citibike_Slack_Bot_Example.py
    ├── Slack Chatbots.ipynb
    ├── images
    │   ├── 1.jpg
    │   ├── 10.jpg
    │   ├── 11.jpg
    │   ├── 12.jpg
    │   ├── 13.jpg
    │   ├── 14.jpg
    │   ├── 15.jpg
    │   ├── 16.jpg
    │   ├── 17.jpg
    │   ├── 18.jpg
    │   ├── 19.jpg
    │   ├── 2.jpg
    │   ├── 20.jpg
    │   ├── 21.jpg
    │   ├── 3.jpg
    │   ├── 4.jpg
    │   ├── 5.jpg
    │   ├── 6.jpg
    │   ├── 7.jpg
    │   ├── 8.jpg
    │   └── 9.jpg
    ├── slack_app.json
    ├── slack_secret.json
    ├── templates
    │   └── install_slack_app.html
    └── webserver.py
├── COURSES.md
├── Citibike_Folium_Pydeck.ipynb
├── DATA_SOURCES.md
├── LICENSE
├── README.md
├── jupyterhub
    ├── Makefile
    ├── README.md
    ├── deployment
    │   └── helm
    │   │   └── config.yaml.example
    └── docker
    │   └── single-user
    │       ├── Dockerfile
    │       ├── jupyter_notebook_config.py
    │       └── start-notebook.sh
├── mnist-vae.ipynb
├── start_jupyter.sh
├── stop_jupyter.sh
├── sync_data.sh
├── sync_notebooks.sh
├── test_notebooks.ipynb
├── upgrade_linux.sh
└── upgrade_python.sh


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.gz filter=lfs diff=lfs merge=lfs -text
2 | *.geojson filter=lfs diff=lfs merge=lfs -text
3 | *.csv filter=lfs diff=lfs merge=lfs -text
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore checkpoints
2 | .ipynb_checkpoints
3 | __pycache__/
4 | 


--------------------------------------------------------------------------------
/01-Pandas/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Tidy Data
 3 | 
 4 | * [Tidy Data paper](https://www.jstatsoft.org/article/view/v059i10)
 5 | * https://sejdemyr.github.io/r-tutorials/basics/wide-and-long/
 6 | * https://en.wikipedia.org/wiki/Wide_and_narrow_data
 7 | 
 8 | 
 9 | ## Misc Links
10 | 
11 | * [Cheat sheets for Pandas](https://medium.com/@kailashahirwar/essential-cheat-sheets-for-machine-learning-and-deep-learning-researchers-efb6a8ebd2e5)
12 | * [fecon235 :: Computational data tools for financial economics](https://github.com/rsvp/fecon235)
13 | * [The 7 Sinc of NYC](https://nycdatascience.com/blog/r/7-sins-in-nyc/)
14 | 
15 | ## Proposed future data plan
16 | 
17 | 1. Reading data (+ Pandas vs SQL, especially if coming from SQL)
18 | 2. Storing data in SQL (or online bucket, or BigQuery etc.)
19 | 3. Analyzing a single variable (histograms, density plots, bar charts)
20 | 4. Analyzing two variables together (line plots, scatterplots)
21 | 5. Pandas data manipulation and SQL equivalents: filter, query, agg, sort_values, drop_duplicates, merge, groupby, etc)
22 | 6. Pandas data manipitation not doable in SQL: Pivot, normalization, transpose, assign/apply, 
23 | 7. Advanced plotting (Multiple channels: shape, color, style, marker size etc; visualizing distributions boxplots, violin plots; etc)
24 | 8. Temporal analysis (resampling, seasonal adjustment, extracting trends; basic models; ARIMA)
25 | 9. Spatial data (shapefiles, spatial joins, etc)
26 | 


--------------------------------------------------------------------------------
/01-Pandas/TODO.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Ideas for notebooks
 3 | 
 4 | * Split in a variety of topics
 5 | 
 6 | 1. Input and output
 7 | * Read_sql (various queries), play a bit with head/tail and UI. Explain the concept of variables and dataframes
 8 | * The content of B1 but without the transformations
 9 | * Minimal example of writing to SQL
10 | 
11 | 2. Descriptive statistics and plots (histograms, KDE, line plots, bar plots, scatterplots, 2d histograms, 2d density plots)
12 | * Single-variable descriptive statistics for numeric/dates (histograms + KDE)
13 | * Single-variable descriprive statitistics for categorical variables (bar plots)
14 | * Two-dimensional descriptive statistics (line plots, scatterplots, 2d-histograms, 2d-density plots)
15 | * Visualization lecture (channels, color, etc)
16 | 
17 | 3. Data manipulation and transformations
18 | * Filter, query, rename, drop
19 | * Pivot tables and group by (without temporal resampling)
20 | * Normalization notebook
21 | * Political views and books notebook
22 | 
23 | 4. Time series
24 | * Resampling datetime index
25 | * Autocorrelation and lag plots
26 | * Seasonal decomposition
27 | * Prophet
28 | 
29 | 5. Geographical data
30 | * Basic plots, 2d kernel plots etc
31 | * Shapefiles for boundaries
32 | * Shapefiles and choropleths
33 | * Shapefiles and spatial joins
34 | 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/02-WebAPIs/B4-Yelp.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "kernelspec": {
  6 |       "display_name": "Python 3",
  7 |       "language": "python",
  8 |       "name": "python3"
  9 |     },
 10 |     "language_info": {
 11 |       "codemirror_mode": {
 12 |         "name": "ipython",
 13 |         "version": 3
 14 |       },
 15 |       "file_extension": ".py",
 16 |       "mimetype": "text/x-python",
 17 |       "name": "python",
 18 |       "nbconvert_exporter": "python",
 19 |       "pygments_lexer": "ipython3",
 20 |       "version": "3.6.7"
 21 |     },
 22 |     "colab": {
 23 |       "name": "B4-Yelp.ipynb",
 24 |       "provenance": [],
 25 |       "include_colab_link": true
 26 |     }
 27 |   },
 28 |   "cells": [
 29 |     {
 30 |       "cell_type": "markdown",
 31 |       "metadata": {
 32 |         "id": "view-in-github",
 33 |         "colab_type": "text"
 34 |       },
 35 |       "source": [
 36 |         "<a href=\"https://colab.research.google.com/github/ipeirotis/dealing_with_data/blob/master/02-WebAPIs/B4-Yelp.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 37 |       ]
 38 |     },
 39 |     {
 40 |       "cell_type": "code",
 41 |       "source": [
 42 |         "import requests\n",
 43 |         "import pandas as pd"
 44 |       ],
 45 |       "metadata": {
 46 |         "id": "9ac6Na_NU2Fz"
 47 |       },
 48 |       "execution_count": null,
 49 |       "outputs": []
 50 |     },
 51 |     {
 52 |       "cell_type": "code",
 53 |       "metadata": {
 54 |         "id": "VenA9eKOqoqu"
 55 |       },
 56 |       "source": [
 57 |         "# Go to https://www.yelp.com/developers/\n",
 58 |         "# create an app, and get the key\n",
 59 |         "key = 'iI4dsdXt-g13ESGVicCEP7g8svwE657yNKQtQx0UXtrjgteJuPjddz_RYf92YIWiUJxpMx_To53E2hXoMQmnpOSc3Jws0L7SAQTH2Qdno9XFlOih236mUlx6AIEhW3Yx'\n",
 60 |         "auth_header = {'Authorization': 'Bearer ' + key}\n",
 61 |         "\n",
 62 |         "# We demo the \"Business Search\" part of the Yelp API\n",
 63 |         "# See https://www.yelp.com/developers/documentation/v3/business_search \n",
 64 |         "# for documentation\n",
 65 |         "url = 'https://api.yelp.com/v3/businesses/search'\n",
 66 |         "\n",
 67 |         "# We search for \"Village Taverna in New York\"\n",
 68 |         "# Check the documentation for other parameters\n",
 69 |         "parameters = {\n",
 70 |         "    \"term\": \"village taverna\",\n",
 71 |         "    \"location\": \"new york, ny\",\n",
 72 |         "    \"limit\": 50\n",
 73 |         "}\n",
 74 |         "\n",
 75 |         "# Issue the authenticated request\n",
 76 |         "resp = requests.get(url, headers=auth_header, params=parameters)\n",
 77 |         "data = resp.json()"
 78 |       ],
 79 |       "execution_count": null,
 80 |       "outputs": []
 81 |     },
 82 |     {
 83 |       "cell_type": "code",
 84 |       "metadata": {
 85 |         "id": "QNyXMIsfqoqz"
 86 |       },
 87 |       "source": [
 88 |         "# Here is what we get back from Yelp\n",
 89 |         "data"
 90 |       ],
 91 |       "execution_count": null,
 92 |       "outputs": []
 93 |     },
 94 |     {
 95 |       "cell_type": "code",
 96 |       "metadata": {
 97 |         "id": "9WsiAauOqoq1"
 98 |       },
 99 |       "source": [
100 |         "# We get back the answer. It contains three keys\n",
101 |         "# The main one is the \"business\", which contains a list of the results\n",
102 |         "data.keys()"
103 |       ],
104 |       "execution_count": null,
105 |       "outputs": []
106 |     },
107 |     {
108 |       "cell_type": "code",
109 |       "source": [
110 |         "data['total']"
111 |       ],
112 |       "metadata": {
113 |         "id": "Z9H5EZ_pUJzF"
114 |       },
115 |       "execution_count": null,
116 |       "outputs": []
117 |     },
118 |     {
119 |       "cell_type": "code",
120 |       "source": [
121 |         "data['region']"
122 |       ],
123 |       "metadata": {
124 |         "id": "cn9jI2poUJCj"
125 |       },
126 |       "execution_count": null,
127 |       "outputs": []
128 |     },
129 |     {
130 |       "cell_type": "code",
131 |       "source": [
132 |         "# This is the key part of the JSON object that contains the \n",
133 |         "# results. It is a list of dictionaries. Each dictionary\n",
134 |         "# contains the entry for a single result\n",
135 |         "data['businesses']"
136 |       ],
137 |       "metadata": {
138 |         "id": "lYG_TbR0UINp"
139 |       },
140 |       "execution_count": null,
141 |       "outputs": []
142 |     },
143 |     {
144 |       "cell_type": "code",
145 |       "source": [
146 |         "# Let's take a look at the first result\n",
147 |         "data['businesses'][0]"
148 |       ],
149 |       "metadata": {
150 |         "id": "zsefBT9KUbXX"
151 |       },
152 |       "execution_count": null,
153 |       "outputs": []
154 |     },
155 |     {
156 |       "cell_type": "code",
157 |       "metadata": {
158 |         "id": "Zvej2IKlqoq5"
159 |       },
160 |       "source": [
161 |         "# We can put the results directly into a dataframe like that:\n",
162 |         "# although this is not ideal\n",
163 |         "df = pd.DataFrame(data['businesses'])\n",
164 |         "\n",
165 |         "# The issue with the approach above is that some columns (e.g location) \n",
166 |         "# are composite and instead of containing values, the datafrane now\n",
167 |         "# contain *dictionaries* as cell entries\n",
168 |         "df.head(5)"
169 |       ],
170 |       "execution_count": null,
171 |       "outputs": []
172 |     },
173 |     {
174 |       "cell_type": "code",
175 |       "metadata": {
176 |         "id": "HWdNXg-qqoq8"
177 |       },
178 |       "source": [
179 |         "# In principle, the json_normalize command should be \n",
180 |         "# able to parse the JSON response and create a dataframe\n",
181 |         "# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.json_normalize.html\n",
182 |         "\n",
183 |         "df = pd.json_normalize(data['businesses'])\n",
184 |         "\n",
185 |         "# We also drop columns that we do not need\n",
186 |         "df = df.drop('categories', axis='columns')\n",
187 |         "df = df.drop('transactions', axis='columns')\n",
188 |         "df = df.drop('image_url', axis='columns')\n",
189 |         "df = df.drop('location.address2', axis='columns')\n",
190 |         "df = df.drop('location.address3', axis='columns')\n",
191 |         "df = df.drop('location.display_address', axis='columns')\n",
192 |         "\n",
193 |         "df"
194 |       ],
195 |       "execution_count": null,
196 |       "outputs": []
197 |     }
198 |   ]
199 | }
200 | 


--------------------------------------------------------------------------------
/02-WebAPIs/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## TODO
 3 | 
 4 | * After the initial discussion of the APIs (GeoIP and OpenweatherMap), discuss two types of APIs:
 5 | 1. Useful APIs for accessing data (NewsAPI, Yelp, US Census, Yelp) 
 6 | 2. Useful APIs for processing data (Google NLP, Census Geocoding an address)
 7 | 
 8 | * Replace IBM Watson API with Google NLP https://cloud.google.com/natural-language/docs/reference/rest
 9 | 
10 | ## Check these out
11 | 
12 | * List of public APIs: https://github.com/public-apis/public-apis
13 | 
14 | * Tutorial for Web APIs: https://github.com/nestauk/im-tutorials/tree/3-ysi-tutorial/notebooks/APIs
15 | 
16 | ## New APIs:
17 | 
18 | * [CEIC API](https://developer.isimarkets.com/en/CEIC/HomePage/Ceic): Macroeconomic data for countries of the world. We subscribe to expanded modules for China, Brazil, Russia, and India. [Immediate access available upon account creation.](https://persistent.library.nyu.edu/arch/NYU03589)
19 | 
20 | * [Global Financial Data API](https://api.globalfinancialdata.com/): Specializes in large runs of numeric data, with some going back as far as the 17th century. (Mediated access--email Neil Rader.)
21 | 
22 | ## Text mining news sources:
23 | 
24 | * [ProQuest TDM Studio](https://guides.nyu.edu/TDMStudio/home): Text mine WSJ and other ProQuest-licensed titles using this web-based platform. NYU is one of the first institutions to subscribe. WSJ project price point used to start at 15K when working with Dow Jones. (Directions for requesting account at link.)
25 | 
26 | * [LexisNexis REST API](https://guides.nyu.edu/lexisnexis-rest-api): Mine current LexisNexis-licensed news. Requires fluency with Python. (Mediated access--see website.)
27 | 
28 | ## New data:
29 | 
30 | * [US Historical Business Data from ReferenceUSA](http://hdl.handle.net/2451/37398): 2020 annual update just uploaded--largest update yet! (Authenticate to view/download.)
31 | 
32 | * [L2 Political Academic Voter File](https://guides.nyu.edu/l2political): State-level data is being added on a rolling basis. L2 is a database of every registered US voter with basic socio-demographic indicators (some modeled). (Mediated access--see website.)
33 | 


--------------------------------------------------------------------------------
/02-WebAPIs/TODO.md:
--------------------------------------------------------------------------------
 1 | * https://www.dataquest.io/blog/python-api-tutorial/
 2 | 
 3 | * https://www.dataquest.io/course/apis-and-scraping
 4 | 
 5 | * Discuss semi-structured data. Not everything is a CSV and/or SQL.
 6 | 
 7 | * Perhaps split Web APIs from semi-structured data, and present a discussion of semi-structured data before Web APIs. For example the Yelp API?
 8 | 
 9 | * Perhaps migrate to Postgress and use the JSON support there. Put a Yelp dataset with JSON data in Postgress?
10 | 
11 | * Discuss the issue of parsing/querying JSON/XML data. Perhaps create a semi-structured module before Web APIs. (Perhaps discuss Mongo, although the fact that Mongo queries are themselves JSON will make it kind of hard.)
12 | 
13 | * Discuss XPath with some simple XML files (e.g., OpenWeatherMap returning XML?
14 | 


--------------------------------------------------------------------------------
/02-WebAPIs/images/Python_Twitter.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/02-WebAPIs/images/Python_Twitter.jpg


--------------------------------------------------------------------------------
/02-WebAPIs/images/app_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/02-WebAPIs/images/app_1.jpg


--------------------------------------------------------------------------------
/02-WebAPIs/images/app_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/02-WebAPIs/images/app_2.jpg


--------------------------------------------------------------------------------
/02-WebAPIs/images/app_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/02-WebAPIs/images/app_3.jpg


--------------------------------------------------------------------------------
/02-WebAPIs/images/app_4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/02-WebAPIs/images/app_4.jpg


--------------------------------------------------------------------------------
/02-WebAPIs/images/app_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/02-WebAPIs/images/app_5.jpg


--------------------------------------------------------------------------------
/02-WebAPIs/images/app_6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/02-WebAPIs/images/app_6.jpg


--------------------------------------------------------------------------------
/02-WebAPIs/images/app_7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/02-WebAPIs/images/app_7.jpg


--------------------------------------------------------------------------------
/02-WebAPIs/images/create_an_application_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/02-WebAPIs/images/create_an_application_1.jpg


--------------------------------------------------------------------------------
/02-WebAPIs/images/create_an_application_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/02-WebAPIs/images/create_an_application_2.jpg


--------------------------------------------------------------------------------
/02-WebAPIs/images/create_an_application_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/02-WebAPIs/images/create_an_application_3.jpg


--------------------------------------------------------------------------------
/02-WebAPIs/images/create_an_application_4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/02-WebAPIs/images/create_an_application_4.jpg


--------------------------------------------------------------------------------
/02-WebAPIs/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/02-WebAPIs/images/logo.png


--------------------------------------------------------------------------------
/03-Regular_Expressions/C-Regular_Expressions.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/03-Regular_Expressions/C-Regular_Expressions.pptx


--------------------------------------------------------------------------------
/03-Regular_Expressions/regular-expressions-cheat-sheet-v2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/03-Regular_Expressions/regular-expressions-cheat-sheet-v2.pdf


--------------------------------------------------------------------------------
/04-Web_Scraping/G-Crawling_with_BeatifulSoup_solution.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 18,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from bs4 import BeautifulSoup\n",
 12 |     "import requests\n",
 13 |     "import time\n",
 14 |     "\n",
 15 |     "# Define the function which will scrape data about a person and return it \n",
 16 |     "def get_person_data(url):\n",
 17 |     "    an_actor = BeautifulSoup(requests.get(url).text, 'html.parser')\n",
 18 |     "    info = an_actor.find('table', attrs={'id': 'name-overview-widget-layout'})\n",
 19 |     "    res = {}\n",
 20 |     "    image = info.find('div', 'image').find('img')\n",
 21 |     "    if image is not None:\n",
 22 |     "        res['image_url'] = image['src']\n",
 23 |     "    else: \n",
 24 |     "        res['image_url'] = ''\n",
 25 |     "    birth_data = info.find('div', attrs={'id': 'name-born-info'})\n",
 26 |     "    if birth_data is not None:\n",
 27 |     "        res['born'] = birth_data.find('time')['datetime']\n",
 28 |     "        birth_place = birth_data.findAll('a')[-1].contents[0]\n",
 29 |     "        birth_place = birth_data.findAll('a')[-1].contents[0].split(',')\n",
 30 |     "        res['country'] = birth_place[-1]\n",
 31 |     "        res['city'] = birth_place[0]\n",
 32 |     "    else:\n",
 33 |     "        res['born'] = res['country'] = res['city'] = ''    \n",
 34 |     "    try:\n",
 35 |     "        death_data = info.find('div', attrs={'id': 'name-death-info'})\n",
 36 |     "        res['died'] = death_data.find('time')['datetime']\n",
 37 |     "    except:\n",
 38 |     "        res['died'] = ''\n",
 39 |     "    return res\n",
 40 |     "\n",
 41 |     "\n",
 42 |     "def crawl_page(startswith):\n",
 43 |     "    url = \"http://www.imdb.com/search/title?sort=num_votes,desc&start={}&title_type=feature&year=1900,2015\".format(startswith)\n",
 44 |     "    r = requests.get(url)\n",
 45 |     "    data = []\n",
 46 |     "    bs = BeautifulSoup(r.text, 'html.parser')\n",
 47 |     "    for num, movie in enumerate(bs.findAll('td','title')):\n",
 48 |     "        if (num + 1) % 10 == 0 and num > 0:\n",
 49 |     "            print num + 1\n",
 50 |     "            \n",
 51 |     "        genres = movie.find('span','genre').findAll('a')\n",
 52 |     "        \n",
 53 |     "        dirs, acts = str(movie.find('span','credit')).split(\"With:\")\n",
 54 |     "        dirs = BeautifulSoup(dirs + '</span>')\n",
 55 |     "        acts = BeautifulSoup('<span>' + acts)\n",
 56 |     "\n",
 57 |     "        directors = []\n",
 58 |     "        for i in dirs.findAll('a'):\n",
 59 |     "            directors.append(get_person_data('http://www.imdb.com' + i['href']))\n",
 60 |     "\n",
 61 |     "        actors = []\n",
 62 |     "        for i in acts.findAll('a'):\n",
 63 |     "            actors.append(get_person_data('http://www.imdb.com' + i['href']))\n",
 64 |     "\n",
 65 |     "        # Let's collect each movie data into a dictionary\n",
 66 |     "        data.append({\n",
 67 |     "            'title': movie.find('a').contents[0],\n",
 68 |     "            'genres': [g.contents[0] for g in genres],\n",
 69 |     "            'runtime': movie.find('span','runtime').contents[0].split()[0],\n",
 70 |     "            'rating': movie.find('span','value').contents[0],\n",
 71 |     "            'released': movie.find('span','year_type').contents[0][1:-1],\n",
 72 |     "            'description':  movie.find('span', 'outline').contents[0],\n",
 73 |     "            'directors': directors,\n",
 74 |     "            'actors': actors,\n",
 75 |     "        })\n",
 76 |     "    return data\n",
 77 |     "\n",
 78 |     "def collect_data(start_page, end_page):\n",
 79 |     "    data = []\n",
 80 |     "    start = time.time()\n",
 81 |     "    for i, j in enumerate(range(start_page, end_page, 50)):\n",
 82 |     "        diff = time.time() - start\n",
 83 |     "        print '\\nPage', i+1, \"{} min {} sec\".format(int(diff // 60), int(diff % 60))\n",
 84 |     "        data.extend(crawl_page(i))\n",
 85 |     "    return data\n",
 86 |     "\n",
 87 |     "#print \"\\nData about {} movies were collected\".format(len(data))"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {
 94 |     "collapsed": false
 95 |    },
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "data = collect_data(1, 1501)"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 41,
104 |    "metadata": {
105 |     "collapsed": true
106 |    },
107 |    "outputs": [],
108 |    "source": [
109 |     "import json\n",
110 |     "with open('imdb_movies_1500.json', 'w') as f:\n",
111 |     "    json.dump(data, f)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {
118 |     "collapsed": true
119 |    },
120 |    "outputs": [],
121 |    "source": []
122 |   }
123 |  ],
124 |  "metadata": {
125 |   "kernelspec": {
126 |    "display_name": "Python 3",
127 |    "language": "python",
128 |    "name": "python3"
129 |   },
130 |   "language_info": {
131 |    "codemirror_mode": {
132 |     "name": "ipython",
133 |     "version": 3
134 |    },
135 |    "file_extension": ".py",
136 |    "mimetype": "text/x-python",
137 |    "name": "python",
138 |    "nbconvert_exporter": "python",
139 |    "pygments_lexer": "ipython3",
140 |    "version": "3.5.2"
141 |   }
142 |  },
143 |  "nbformat": 4,
144 |  "nbformat_minor": 0
145 | }
146 | 


--------------------------------------------------------------------------------
/04-Web_Scraping/README.md:
--------------------------------------------------------------------------------
1 | * Tutorial: https://github.com/nestauk/im-tutorials/tree/3-ysi-tutorial/notebooks/Web-Scraping
2 | 


--------------------------------------------------------------------------------
/04-Web_Scraping/books.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | 
 3 | <bookstore>
 4 | 
 5 | <book category="COOKING">
 6 |   <title lang="it">Everyday Italian</title>
 7 |   <author>Giada De Laurentiis</author>
 8 |   <year>2005</year>
 9 |   <price>30.00</price>
10 | </book>
11 | 
12 | <book category="CHILDREN">
13 |   <title lang="en">Harry Potter</title>
14 |   <author>J K. Rowling</author>
15 |   <year>2005</year>
16 |   <price>29.99</price>
17 | </book>
18 | 
19 | <book category="WEB">
20 |   <title lang="en">XQuery Kick Start</title>
21 |   <author>James McGovern</author>
22 |   <author>Per Bothner</author>
23 |   <author>Kurt Cagle</author>
24 |   <author>James Linn</author>
25 |   <author>Vaidyanathan Nagarajan</author>
26 |   <year>2003</year>
27 |   <price>49.99</price>
28 | </book>
29 | 
30 | <book category="WEB">
31 |   <title lang="en">Learning XML</title>
32 |   <author>Erik T. Ray</author>
33 |   <year>2003</year>
34 |   <price>39.95</price>
35 | </book>
36 | 
37 | </bookstore>
38 | 


--------------------------------------------------------------------------------
/04-Web_Scraping/images/cursor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/04-Web_Scraping/images/cursor.png


--------------------------------------------------------------------------------
/04-Web_Scraping/images/html1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/04-Web_Scraping/images/html1.png


--------------------------------------------------------------------------------
/04-Web_Scraping/images/html2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/04-Web_Scraping/images/html2.png


--------------------------------------------------------------------------------
/04-Web_Scraping/images/html3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/04-Web_Scraping/images/html3.jpg


--------------------------------------------------------------------------------
/04-Web_Scraping/images/html4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/04-Web_Scraping/images/html4.jpg


--------------------------------------------------------------------------------
/04-Web_Scraping/images/html5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/04-Web_Scraping/images/html5.jpg


--------------------------------------------------------------------------------
/04-Web_Scraping/images/imdb1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/04-Web_Scraping/images/imdb1.jpg


--------------------------------------------------------------------------------
/04-Web_Scraping/images/imdb2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/04-Web_Scraping/images/imdb2.jpg


--------------------------------------------------------------------------------
/04-Web_Scraping/images/imdb3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/04-Web_Scraping/images/imdb3.jpg


--------------------------------------------------------------------------------
/04-Web_Scraping/images/imdb4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/04-Web_Scraping/images/imdb4.jpg


--------------------------------------------------------------------------------
/04-Web_Scraping/images/imdb5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/04-Web_Scraping/images/imdb5.jpg


--------------------------------------------------------------------------------
/04-Web_Scraping/images/scraping.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/04-Web_Scraping/images/scraping.png


--------------------------------------------------------------------------------
/05-Time_Series/README.md:
--------------------------------------------------------------------------------
 1 | Useful links
 2 | 
 3 | * [Timeseries with pandas](http://nbviewer.jupyter.org/github/changhiskhan/talks/blob/master/pydata2012/pandas_timeseries.ipynb)
 4 | * [Time Series Analysis in Python with statsmodels](http://conference.scipy.org/scipy2011/slides/mckinney_time_series.pdf)
 5 | * [A Simple Time Series analysis of the S&P 500 Index](http://www.johnwittenauer.net/a-simple-time-series-analysis-of-the-sp-500-index/) (demonstrates the use of seasonal/trend decomposition)
 6 | * [Statistical forecasting: notes on regression and time series analysis](http://people.duke.edu/~rnau/411home.htm)
 7 | * [Timeseries Classification: KNN & DTW](http://nbviewer.jupyter.org/github/markdregan/K-Nearest-Neighbors-with-Dynamic-Time-Warping/blob/master/K_Nearest_Neighbor_Dynamic_Time_Warping.ipynb)
 8 | * [Financial data analysis with Pandas.](http://nbviewer.jupyter.org/gist/twiecki/3962843)
 9 | * [TimeGPT](https://docs.nixtla.io/)
10 | 


--------------------------------------------------------------------------------
/06-Spatial_Data_and_Maps/README.md:
--------------------------------------------------------------------------------
 1 | # Course Notes
 2 | 
 3 | * [Spatial data science for sustainable development, Aalto University](https://sustainability-gis.readthedocs.io/en/latest/index.html)
 4 | * [Introduction to Python for Geographic Data Analysis](https://pythongis.org/)
 5 | * [Python Foundation for Spatial Analysis](https://courses.spatialthoughts.com/python-foundation.html)
 6 | 
 7 | # Resources
 8 | 
 9 | * [What are the census tracts?](https://www2.census.gov/geo/pdfs/education/CensusTracts.pdf)
10 | * [The H3 system of Uber](https://eng.uber.com/h3/)
11 | * [Zip codes are not areas](https://georeference.org/doc/zip_codes_are_not_areas.htm)
12 | 
13 | 
14 | # TODO
15 | 
16 | * Consider reading shapefiles from the bigquery public datasets. For example `bigquery-public-data.geo_us_boundaries.states`.
17 | * Consider loading shapefiles in a public Bigquery dataset. The public datasets from Google contain states and counties, but no tracks, blockgroups, or blocks. Also they do not have the "no water" versions.
18 | 
19 | 


--------------------------------------------------------------------------------
/06-Spatial_Data_and_Maps/Y-OpenStreetMap_Data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "private_outputs": true,
  7 |       "provenance": [],
  8 |       "authorship_tag": "ABX9TyPYNbsLOMKvK43N5gcoBCu4",
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "language_info": {
 16 |       "name": "python"
 17 |     }
 18 |   },
 19 |   "cells": [
 20 |     {
 21 |       "cell_type": "markdown",
 22 |       "metadata": {
 23 |         "id": "view-in-github",
 24 |         "colab_type": "text"
 25 |       },
 26 |       "source": [
 27 |         "<a href=\"https://colab.research.google.com/github/ipeirotis/dealing_with_data/blob/master/06-Spatial_Data_and_Maps/Y-OpenStreetMap_Data.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 28 |       ]
 29 |     },
 30 |     {
 31 |       "cell_type": "code",
 32 |       "source": [
 33 |         "!pip install osmnx mapclassify folium"
 34 |       ],
 35 |       "metadata": {
 36 |         "id": "N6FuDCbe55Mw"
 37 |       },
 38 |       "execution_count": null,
 39 |       "outputs": []
 40 |     },
 41 |     {
 42 |       "cell_type": "code",
 43 |       "source": [
 44 |         "import osmnx as ox"
 45 |       ],
 46 |       "metadata": {
 47 |         "id": "om02ayXX9PL-"
 48 |       },
 49 |       "execution_count": null,
 50 |       "outputs": []
 51 |     },
 52 |     {
 53 |       "cell_type": "code",
 54 |       "source": [
 55 |         "# Specify the name that is used to seach for the data\n",
 56 |         "place_name = \"Manhattan, NY USA\""
 57 |       ],
 58 |       "metadata": {
 59 |         "id": "TqRcAxrC8ogR"
 60 |       },
 61 |       "execution_count": null,
 62 |       "outputs": []
 63 |     },
 64 |     {
 65 |       "cell_type": "code",
 66 |       "source": [
 67 |         "# import osmnx\n",
 68 |         "import osmnx as ox\n",
 69 |         "import geopandas as gpd\n",
 70 |         "\n",
 71 |         "# Get place boundary related to the place name as a geodataframe\n",
 72 |         "area = ox.geocode_to_gdf(place_name)"
 73 |       ],
 74 |       "metadata": {
 75 |         "id": "xPuBpqd2915N"
 76 |       },
 77 |       "execution_count": null,
 78 |       "outputs": []
 79 |     },
 80 |     {
 81 |       "cell_type": "code",
 82 |       "source": [
 83 |         "area.explore(tiles='cartodbpositron')"
 84 |       ],
 85 |       "metadata": {
 86 |         "id": "8Om-vrDy9_wM"
 87 |       },
 88 |       "execution_count": null,
 89 |       "outputs": []
 90 |     },
 91 |     {
 92 |       "cell_type": "code",
 93 |       "source": [
 94 |         "# List key-value pairs for tags\n",
 95 |         "tags = {'building': True}\n",
 96 |         "\n",
 97 |         "buildings = ox.features_from_place(place_name, tags)\n",
 98 |         "buildings.head()"
 99 |       ],
100 |       "metadata": {
101 |         "id": "hwWmMH_YAPLU"
102 |       },
103 |       "execution_count": null,
104 |       "outputs": []
105 |     },
106 |     {
107 |       "cell_type": "code",
108 |       "source": [
109 |         "(\n",
110 |         "    buildings[buildings.geometry.geom_type =='Polygon']\n",
111 |         "    .filter([ 'addr:housenumber', 'addr:street','addr:city','addr:state', 'addr:postcode','geometry'])\n",
112 |         "    .sample(1000)\n",
113 |         "    .explore(tiles='cartodbpositron')\n",
114 |         ")"
115 |       ],
116 |       "metadata": {
117 |         "id": "xrBNHfI8Bjm9"
118 |       },
119 |       "execution_count": null,
120 |       "outputs": []
121 |     },
122 |     {
123 |       "cell_type": "code",
124 |       "source": [
125 |         "# Initialize the OSM parser object\n",
126 |         "osm = OSM(fp)"
127 |       ],
128 |       "metadata": {
129 |         "id": "CISdMF2N8GbH"
130 |       },
131 |       "execution_count": null,
132 |       "outputs": []
133 |     }
134 |   ]
135 | }


--------------------------------------------------------------------------------
/07-TextMining_NLP/H-Language Detection.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Requirement already up-to-date: langdetect in /usr/local/lib/python3.5/dist-packages\n",
 13 |       "Requirement already up-to-date: six in /usr/local/lib/python3.5/dist-packages (from langdetect)\n"
 14 |      ]
 15 |     }
 16 |    ],
 17 |    "source": [
 18 |     "!sudo -H pip3 install -U langdetect"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 3,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "from langdetect import detect"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 4,
 33 |    "metadata": {},
 34 |    "outputs": [
 35 |     {
 36 |      "data": {
 37 |       "text/plain": [
 38 |        "'en'"
 39 |       ]
 40 |      },
 41 |      "execution_count": 4,
 42 |      "metadata": {},
 43 |      "output_type": "execute_result"
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "detect(\"War doesn't show who's right, just who's left.\")"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 5,
 53 |    "metadata": {},
 54 |    "outputs": [
 55 |     {
 56 |      "data": {
 57 |       "text/plain": [
 58 |        "'de'"
 59 |       ]
 60 |      },
 61 |      "execution_count": 5,
 62 |      "metadata": {},
 63 |      "output_type": "execute_result"
 64 |     }
 65 |    ],
 66 |    "source": [
 67 |     "detect(\"Ein, zwei, drei, vier\")"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 6,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "from langdetect import detect_langs"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 7,
 82 |    "metadata": {},
 83 |    "outputs": [
 84 |     {
 85 |      "data": {
 86 |       "text/plain": [
 87 |        "[fi:0.5714280680309197, pl:0.42857058581574037]"
 88 |       ]
 89 |      },
 90 |      "execution_count": 7,
 91 |      "metadata": {},
 92 |      "output_type": "execute_result"
 93 |     }
 94 |    ],
 95 |    "source": [
 96 |     "detect_langs(\"Otec matka syn.\")"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 8,
102 |    "metadata": {},
103 |    "outputs": [
104 |     {
105 |      "data": {
106 |       "text/plain": [
107 |        "[el:0.9999999994198512]"
108 |       ]
109 |      },
110 |      "execution_count": 8,
111 |      "metadata": {},
112 |      "output_type": "execute_result"
113 |     }
114 |    ],
115 |    "source": [
116 |     "detect_langs(u\"Καλημέρα. Τι κάνεις;\")"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 18,
122 |    "metadata": {},
123 |    "outputs": [
124 |     {
125 |      "data": {
126 |       "text/plain": [
127 |        "[ru:0.999997242263]"
128 |       ]
129 |      },
130 |      "execution_count": 18,
131 |      "metadata": {},
132 |      "output_type": "execute_result"
133 |     }
134 |    ],
135 |    "source": [
136 |     "detect_langs(u\"президент Соединенных Штатов\")"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "metadata": {
143 |     "collapsed": true
144 |    },
145 |    "outputs": [],
146 |    "source": []
147 |   }
148 |  ],
149 |  "metadata": {
150 |   "kernelspec": {
151 |    "display_name": "Python 3",
152 |    "language": "python",
153 |    "name": "python3"
154 |   },
155 |   "language_info": {
156 |    "codemirror_mode": {
157 |     "name": "ipython",
158 |     "version": 3
159 |    },
160 |    "file_extension": ".py",
161 |    "mimetype": "text/x-python",
162 |    "name": "python",
163 |    "nbconvert_exporter": "python",
164 |    "pygments_lexer": "ipython3",
165 |    "version": "3.5.2"
166 |   }
167 |  },
168 |  "nbformat": 4,
169 |  "nbformat_minor": 1
170 | }
171 | 


--------------------------------------------------------------------------------
/07-TextMining_NLP/README.md:
--------------------------------------------------------------------------------
1 | ### Additional Resources
2 | 
3 | * [Automated spam filtering](http://radimrehurek.com/data_science_python/) ([github](https://github.com/piskvorky/data_science_python))
4 | 


--------------------------------------------------------------------------------
/08-Visualization/README.md:
--------------------------------------------------------------------------------
1 | * [Effective Matplotlib](http://pbpython.com/effective-matplotlib.html)
2 | * [Python Plotting Examples](http://pythonplot.com/)
3 | * [Python Visualization Gallery](https://python-graph-gallery.com): Examples on how to create visualizations
4 | 


--------------------------------------------------------------------------------
/08-Visualization/Visualization_Examples-Cars.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Visualization_Examples",
  7 |       "provenance": [],
  8 |       "collapsed_sections": [],
  9 |       "authorship_tag": "ABX9TyOUGVV0Ykl1UY8mISZaaBtd",
 10 |       "include_colab_link": true
 11 |     },
 12 |     "kernelspec": {
 13 |       "name": "python3",
 14 |       "display_name": "Python 3"
 15 |     },
 16 |     "language_info": {
 17 |       "name": "python"
 18 |     }
 19 |   },
 20 |   "cells": [
 21 |     {
 22 |       "cell_type": "markdown",
 23 |       "metadata": {
 24 |         "id": "view-in-github",
 25 |         "colab_type": "text"
 26 |       },
 27 |       "source": [
 28 |         "<a href=\"https://colab.research.google.com/github/ipeirotis/dealing_with_data/blob/master/08-Visualization/Visualization_Examples-Cars.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 29 |       ]
 30 |     },
 31 |     {
 32 |       "cell_type": "code",
 33 |       "metadata": {
 34 |         "id": "wet0ty3l-hvi"
 35 |       },
 36 |       "source": [
 37 |         "from io import StringIO\n",
 38 |         "import pandas as pd\n",
 39 |         "from matplotlib import pyplot as plt\n",
 40 |         "import seaborn as sns\n",
 41 |         "\n",
 42 |         "plt.figure(figsize=(10,8))\n",
 43 |         "sns.set(font_scale = 1.5)\n",
 44 |         "sns.set_style(\"whitegrid\")\n"
 45 |       ],
 46 |       "execution_count": null,
 47 |       "outputs": []
 48 |     },
 49 |     {
 50 |       "cell_type": "markdown",
 51 |       "source": [
 52 |         "# Visualizing the MTCars Dataset"
 53 |       ],
 54 |       "metadata": {
 55 |         "id": "F-fmO26SHS5_"
 56 |       }
 57 |     },
 58 |     {
 59 |       "cell_type": "code",
 60 |       "source": [
 61 |         "mtcars = '''\n",
 62 |         "\"car_model\",\"mpg\",\"cyl\",\"disp\",\"hp\",\"drat\",\"wt\",\"qsec\",\"vs\",\"am\",\"gear\",\"carb\"\n",
 63 |         "\"Mazda RX4\",21,6,160,110,3.9,2.62,16.46,0,1,4,4\n",
 64 |         "\"Mazda RX4 Wag\",21,6,160,110,3.9,2.875,17.02,0,1,4,4\n",
 65 |         "\"Datsun 710\",22.8,4,108,93,3.85,2.32,18.61,1,1,4,1\n",
 66 |         "\"Hornet 4 Drive\",21.4,6,258,110,3.08,3.215,19.44,1,0,3,1\n",
 67 |         "\"Hornet Sportabout\",18.7,8,360,175,3.15,3.44,17.02,0,0,3,2\n",
 68 |         "\"Valiant\",18.1,6,225,105,2.76,3.46,20.22,1,0,3,1\n",
 69 |         "\"Duster 360\",14.3,8,360,245,3.21,3.57,15.84,0,0,3,4\n",
 70 |         "\"Merc 240D\",24.4,4,146.7,62,3.69,3.19,20,1,0,4,2\n",
 71 |         "\"Merc 230\",22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2\n",
 72 |         "\"Merc 280\",19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4\n",
 73 |         "\"Merc 280C\",17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4\n",
 74 |         "\"Merc 450SE\",16.4,8,275.8,180,3.07,4.07,17.4,0,0,3,3\n",
 75 |         "\"Merc 450SL\",17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3\n",
 76 |         "\"Merc 450SLC\",15.2,8,275.8,180,3.07,3.78,18,0,0,3,3\n",
 77 |         "\"Cadillac Fleetwood\",10.4,8,472,205,2.93,5.25,17.98,0,0,3,4\n",
 78 |         "\"Lincoln Continental\",10.4,8,460,215,3,5.424,17.82,0,0,3,4\n",
 79 |         "\"Chrysler Imperial\",14.7,8,440,230,3.23,5.345,17.42,0,0,3,4\n",
 80 |         "\"Fiat 128\",32.4,4,78.7,66,4.08,2.2,19.47,1,1,4,1\n",
 81 |         "\"Honda Civic\",30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2\n",
 82 |         "\"Toyota Corolla\",33.9,4,71.1,65,4.22,1.835,19.9,1,1,4,1\n",
 83 |         "\"Toyota Corona\",21.5,4,120.1,97,3.7,2.465,20.01,1,0,3,1\n",
 84 |         "\"Dodge Challenger\",15.5,8,318,150,2.76,3.52,16.87,0,0,3,2\n",
 85 |         "\"AMC Javelin\",15.2,8,304,150,3.15,3.435,17.3,0,0,3,2\n",
 86 |         "\"Camaro Z28\",13.3,8,350,245,3.73,3.84,15.41,0,0,3,4\n",
 87 |         "\"Pontiac Firebird\",19.2,8,400,175,3.08,3.845,17.05,0,0,3,2\n",
 88 |         "\"Fiat X1-9\",27.3,4,79,66,4.08,1.935,18.9,1,1,4,1\n",
 89 |         "\"Porsche 914-2\",26,4,120.3,91,4.43,2.14,16.7,0,1,5,2\n",
 90 |         "\"Lotus Europa\",30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2\n",
 91 |         "\"Ford Pantera L\",15.8,8,351,264,4.22,3.17,14.5,0,1,5,4\n",
 92 |         "\"Ferrari Dino\",19.7,6,145,175,3.62,2.77,15.5,0,1,5,6\n",
 93 |         "\"Maserati Bora\",15,8,301,335,3.54,3.57,14.6,0,1,5,8\n",
 94 |         "\"Volvo 142E\",21.4,4,121,109,4.11,2.78,18.6,1,1,4,2\n",
 95 |         "'''"
 96 |       ],
 97 |       "metadata": {
 98 |         "id": "wybP9MxCHWHY"
 99 |       },
100 |       "execution_count": null,
101 |       "outputs": []
102 |     },
103 |     {
104 |       "cell_type": "code",
105 |       "source": [
106 |         "df  = pd.read_csv(StringIO(mtcars), sep=',', header=0)\n",
107 |         "df"
108 |       ],
109 |       "metadata": {
110 |         "id": "YZC5Q3WaHgR3"
111 |       },
112 |       "execution_count": null,
113 |       "outputs": []
114 |     },
115 |     {
116 |       "cell_type": "code",
117 |       "source": [
118 |         "df.dtypes"
119 |       ],
120 |       "metadata": {
121 |         "id": "fREYZHyAI1od"
122 |       },
123 |       "execution_count": null,
124 |       "outputs": []
125 |     },
126 |     {
127 |       "cell_type": "code",
128 |       "source": [
129 |         "\n",
130 |         "sns.relplot(data = df,\n",
131 |         "    x = 'disp', # Displacement on x axis\n",
132 |         "    y = 'mpg', # Miles per gallon on y axis\n",
133 |         "    hue = 'hp', palette='viridis', # Horsepower on color with the viridis palette\n",
134 |         "    size =  'wt', sizes=(100, 800), # Weight assigned to marker size, normalizing marker size between 100 and 800 pixels (?)\n",
135 |         "    style = 'cyl', # Number of cylinders assigned to marker shape/style \n",
136 |         "    height=8, aspect=1.2 # Size of the figure\n",
137 |         ")\n"
138 |       ],
139 |       "metadata": {
140 |         "id": "cmhe65bJHmzr"
141 |       },
142 |       "execution_count": null,
143 |       "outputs": []
144 |     },
145 |     {
146 |       "cell_type": "markdown",
147 |       "source": [
148 |         "# Visualizing the MPG dataset"
149 |       ],
150 |       "metadata": {
151 |         "id": "cVATNsUabcrV"
152 |       }
153 |     },
154 |     {
155 |       "cell_type": "code",
156 |       "source": [
157 |         "mpg = sns.load_dataset(\"mpg\")\n",
158 |         "mpg"
159 |       ],
160 |       "metadata": {
161 |         "id": "OcFil2X3NPQ4"
162 |       },
163 |       "execution_count": null,
164 |       "outputs": []
165 |     },
166 |     {
167 |       "cell_type": "code",
168 |       "source": [
169 |         "# Plot miles per gallon against horsepower with other semantics\n",
170 |         "sns.relplot(data=mpg,\n",
171 |         "            x=\"horsepower\", \n",
172 |         "            y=\"mpg\", \n",
173 |         "            hue=\"origin\", palette=\"muted\", \n",
174 |         "            size=\"weight\", sizes=(40, 400), \n",
175 |         "            style = 'cylinders', edgecolor=\"black\",\n",
176 |         "            alpha=.75, \n",
177 |         "            height=8, aspect=1.2)"
178 |       ],
179 |       "metadata": {
180 |         "id": "NprKaH7mM8Cj"
181 |       },
182 |       "execution_count": null,
183 |       "outputs": []
184 |     },
185 |     {
186 |       "cell_type": "code",
187 |       "source": [
188 |         "# Plot miles per gallon against horsepower with other semantics\n",
189 |         "sns.relplot(data=mpg,\n",
190 |         "            x=\"horsepower\", \n",
191 |         "            y=\"mpg\", \n",
192 |         "            hue=\"acceleration\",  palette='viridis',\n",
193 |         "            size=\"weight\",\n",
194 |         "            style = 'cylinders', edgecolor=\"black\",\n",
195 |         "            col = \"origin\",\n",
196 |         "            row = 'model_year',\n",
197 |         "            sizes=(40, 400), \n",
198 |         "            alpha=.75, \n",
199 |         "            height=6)"
200 |       ],
201 |       "metadata": {
202 |         "id": "XTPsSYbKNX2b"
203 |       },
204 |       "execution_count": null,
205 |       "outputs": []
206 |     }
207 |   ]
208 | }


--------------------------------------------------------------------------------
/11-Flask/A-main1.py:
--------------------------------------------------------------------------------
 1 | # This is a minimal code snippet to get a web server up and running using Repl.it
 2 | 
 3 | from flask import Flask
 4 | 
 5 | app = Flask(__name__)
 6 | 
 7 | @app.route("/")
 8 | def hello():
 9 |     return "Hello Panos!"
10 |   
11 | app.run(host='0.0.0.0', port=5000)
12 | 


--------------------------------------------------------------------------------
/11-Flask/A-main2.py:
--------------------------------------------------------------------------------
 1 | # A slighlty more advanced version of a webserver
 2 | # We have two pages (/ and /hello)
 3 | 
 4 | from datetime import datetime
 5 | from flask import Flask
 6 | 
 7 | app = Flask(__name__)
 8 | 
 9 | # We add a global variable that will be used to count the visitors to a specific URL
10 | visitor_counter = 0
11 | 
12 | # Go to http://<your IP>/hello to see the message
13 | @app.route('/hello')
14 | def hello_visitor():
15 |     global visitor_counter
16 |     visitor_counter += 1
17 |     return '<H1>Hello! You are visitor #{i}</H1>'.format(i=visitor_counter)
18 |   
19 | # The function simply returns a message with the 
20 | # current date and time.
21 | def get_time_message():
22 |     datetime.now().strftime('%Y-%m-%d %H:%M:%S')
23 |     date = datetime.now().strftime('%Y-%m-%d')
24 |     time = datetime.now().strftime('%H:%M:%S')
25 |     message = f"<P>The date is {date}<P>The time is {time}"
26 |     return message
27 | 
28 | 
29 | # Augmenting the basic "Hello world" with a message 
30 | # that shows the date and time
31 | @app.route("/")
32 | def home():
33 |     message = get_time_message()
34 |     return "<H1>Hello World!" + message +"</H1>"
35 | 
36 | app.run(host='0.0.0.0', port=5000)
37 | 
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/11-Flask/B-main3.py:
--------------------------------------------------------------------------------
  1 | # Import necessary libraries
  2 | import base64
  3 | from io import BytesIO
  4 | 
  5 | import matplotlib.pyplot as plt
  6 | import pandas as pd
  7 | from flask import Flask, jsonify, request
  8 | from sqlalchemy import create_engine, text
  9 | import pymysql
 10 | 
 11 | app = Flask(__name__)
 12 | 
 13 | # This code creates a connection to the database
 14 | conn_string = 'mysql+pymysql://{user}:{password}@{host}/{db}?charset={encoding}'.format(
 15 |     host='db.ipeirotis.org',
 16 |     user='student',
 17 |     db='citibike_fall2017',
 18 |     password='dwdstudent2015',
 19 |     encoding='utf8mb4')
 20 | 
 21 | engine = create_engine(conn_string)
 22 | 
 23 | 
 24 | @app.route('/citibike_api', methods=['GET'])
 25 | def citibike_stations():
 26 |   """
 27 |     API endpoint to get Citibike station details from the database.
 28 |     """
 29 |   sql = "SELECT DISTINCT id, name, capacity, lat, lon  FROM status_fall2017"
 30 |   # Connect to the database, execute the query, and get back the results
 31 |   with engine.connect() as connection:
 32 |     stations = pd.read_sql(text(sql), con=connection)
 33 | 
 34 |   # Create the response. We will put the retrieved data as a list of
 35 |   # dictionaries, under the key "stations".
 36 |   list_of_stations = stations.to_dict(orient='records')
 37 | 
 38 |   api_results = {"stations": list_of_stations}
 39 | 
 40 |   # We JSON-ify our dictionary and return it as the API response
 41 |   return jsonify(api_results)
 42 | 
 43 | 
 44 | @app.route('/station_map', methods=['GET'])
 45 | def station_map():
 46 |   """
 47 |     API endpoint to get a scatter plot of Citibike stations on a map.
 48 |     """
 49 |   # Connect to the database, execute the query, and get back the results
 50 |   sql = "SELECT DISTINCT id, name, capacity, lat, lon  FROM status_fall2017"
 51 |   with engine.connect() as connection:
 52 |     stations = pd.read_sql(text(sql), con=connection)
 53 | 
 54 |   fig, ax = plt.subplots()
 55 |   ax = stations.plot(kind='scatter', x='lon', y='lat', ax=ax)
 56 | 
 57 |   buf = BytesIO()
 58 |   fig.savefig(buf, format="png")
 59 |   # Embed the result in the html output.
 60 |   data = base64.b64encode(buf.getbuffer()).decode("ascii")
 61 | 
 62 |   # Create the response. We will put the retrieved data as a list of
 63 |   # dictionaries, under the key "stations".
 64 |   results = {"image": data}
 65 | 
 66 |   # We JSON-ify our dictionary and return it as the API response
 67 |   return jsonify(results)
 68 |   
 69 | @app.route('/station_image', methods=['GET'])
 70 | def station_image():
 71 |   """
 72 |     API endpoint to get a scatter plot of Citibike stations on a map.
 73 |     """
 74 |   # Connect to the database, execute the query, and get back the results
 75 |   sql = "SELECT DISTINCT id, name, capacity, lat, lon  FROM status_fall2017"
 76 |   with engine.connect() as connection:
 77 |     stations = pd.read_sql(text(sql), con=connection)
 78 | 
 79 |   fig, ax = plt.subplots()
 80 |   ax = stations.plot(kind='scatter', x='lon', y='lat', ax=ax)
 81 | 
 82 |   buf = BytesIO()
 83 |   fig.savefig(buf, format="png")
 84 |   # Embed the result in the html output.
 85 |   data = base64.b64encode(buf.getbuffer()).decode("ascii")
 86 | 
 87 |   # Return an image
 88 |   return f"<img src='data:image/png;base64,{data}'/>"
 89 | 
 90 | 
 91 | 
 92 | @app.route('/station_status')
 93 | def station_status():
 94 |   """
 95 |     API endpoint to get the status of a specific Citibike station.
 96 |   """
 97 |   # Get the station ID from the URL parameters
 98 |   param = request.args.get('station_id')
 99 |   try:
100 |     param_value = int(param)
101 |   except:
102 |     return jsonify({"error": "No station_id parameter given or other problem"})
103 | 
104 |   sql = '''SELECT available_bikes,
105 |                       available_docks,
106 |                       capacity,
107 |                       available_bikes / capacity AS percent_full,
108 |                       communication_time
109 |                FROM status_fall2017
110 |                WHERE id = :station_id'''
111 | 
112 |   with engine.connect() as con:
113 |     station_status = pd.read_sql(text(sql),
114 |                                  con=con,
115 |                                  params={"station_id": param_value})
116 | 
117 |   station_status_over_time = station_status.to_dict(orient='records')
118 | 
119 |   api_results = {
120 |       "station_id": param_value,
121 |       "status_over_time": station_status_over_time
122 |   }
123 | 
124 |   # We JSON-ify our dictionary and return it as the API response
125 |   return jsonify(api_results)
126 | 
127 | # Main page
128 | @app.route("/")
129 | def index():
130 |   """
131 |     Main page of the web application.
132 |     """
133 |   page = '''
134 |     <html>
135 |     <body>
136 |         <a href="/citibike_api">Citibike API</a>
137 |         <p>
138 |         <a href="/station_map">Citibike Map as API call</a>
139 |         <p>
140 |         <a href="/station_image">Citibike Map as an image</a>
141 |         <p>
142 |         <a href="/station_status?station_id=72">Status of Station 72</a>
143 |     </body>
144 |     </html>
145 |     '''
146 | 
147 |   return page
148 | 
149 | 
150 | app.run(host='0.0.0.0', port=5000)
151 | 


--------------------------------------------------------------------------------
/11-Flask/D-Displaying_JSON_data_as_a_Table.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "view-in-github",
  7 |         "colab_type": "text"
  8 |       },
  9 |       "source": [
 10 |         "<a href=\"https://colab.research.google.com/github/ipeirotis/dealing_with_data/blob/master/11-Flask/D-Displaying_JSON_data_as_a_Table.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 11 |       ]
 12 |     },
 13 |     {
 14 |       "cell_type": "markdown",
 15 |       "source": [
 16 |         "# Displaying data from API calls in HTML"
 17 |       ],
 18 |       "metadata": {
 19 |         "id": "y_BonzUT--C9"
 20 |       },
 21 |       "id": "y_BonzUT--C9"
 22 |     },
 23 |     {
 24 |       "cell_type": "markdown",
 25 |       "source": [
 26 |         "Now, let's examine how to combine HTML with the API calls that we have been creating.\n",
 27 |         "\n",
 28 |         "The main language for manipulating HTML pages is Javascript. Below, we are going to see a minimal example where we will use Javascript to connect to our APIs and populate an HTML table using the data returned by our API call.\n",
 29 |         "\n",
 30 |         "The HTML below uses [Twitter Bootstrap](https://getbootstrap.com/) to make the final result look more professional than vanilla HTML tags.\n",
 31 |         "\n",
 32 |         "First, let's create a new HTML page, call it [`list_stations.html`](https://github.com/ipeirotis/dealing_with_data/blob/master/11-Flask/list_stations.html) and let's store it under the `templates` folder."
 33 |       ],
 34 |       "metadata": {
 35 |         "id": "wqoXU5BE_9JU"
 36 |       },
 37 |       "id": "wqoXU5BE_9JU"
 38 |     },
 39 |     {
 40 |       "cell_type": "markdown",
 41 |       "source": [
 42 |         "Next we will add a route in the website to return that HTML page"
 43 |       ],
 44 |       "metadata": {
 45 |         "id": "lgnRqt3sFVe3"
 46 |       },
 47 |       "id": "lgnRqt3sFVe3"
 48 |     },
 49 |     {
 50 |       "cell_type": "markdown",
 51 |       "source": [
 52 |         "```python\n",
 53 |         "@app.route('/list_stations.html',  methods=['GET'])\n",
 54 |         "def list_stations():\n",
 55 |         "    return render_template(\"list_stations.html\")\n",
 56 |         "```"
 57 |       ],
 58 |       "metadata": {
 59 |         "id": "yXfHTKUwG4TE"
 60 |       },
 61 |       "id": "yXfHTKUwG4TE"
 62 |     },
 63 |     {
 64 |       "cell_type": "code",
 65 |       "source": [
 66 |         "# @title Setup Flask, ngrok, and MySQL\n",
 67 |         "!pip install -U -q PyMySQL sqlalchemy flask pyngrok\n",
 68 |         "\n",
 69 |         "import os\n",
 70 |         "import pandas as pd\n",
 71 |         "from sqlalchemy import create_engine, text\n",
 72 |         "import base64\n",
 73 |         "from io import BytesIO\n",
 74 |         "import matplotlib.pyplot as plt\n",
 75 |         "from flask import Flask\n",
 76 |         "from pyngrok import ngrok\n",
 77 |         "\n",
 78 |         "from flask import render_template, jsonify\n",
 79 |         "\n",
 80 |         "from google.colab import drive\n",
 81 |         "drive.mount('/content/drive')\n",
 82 |         "\n",
 83 |         "# Open a ngrok tunnel to the HTTP server\n",
 84 |         "ngrok_authtoken = '2WgDffgQcSJcesOPKNnZ1jvwxXJ_5sR4FFXtByxhjgkFB62QP'\n",
 85 |         "ngrok.set_auth_token(ngrok_authtoken)\n",
 86 |         "\n",
 87 |         "# Setup a connection to the database\n",
 88 |         "conn_string = 'mysql+pymysql://student:dwdstudent2015@db.ipeirotis.org/citibike_fall2017'\n",
 89 |         "engine = create_engine(conn_string)"
 90 |       ],
 91 |       "metadata": {
 92 |         "cellView": "form",
 93 |         "id": "Y4w7WvkqQNKY"
 94 |       },
 95 |       "id": "Y4w7WvkqQNKY",
 96 |       "execution_count": null,
 97 |       "outputs": []
 98 |     },
 99 |     {
100 |       "cell_type": "code",
101 |       "source": [
102 |         "# @title Running our web server\n",
103 |         "\n",
104 |         "port = 5000\n",
105 |         "app = Flask(__name__, template_folder = '/content/drive/My Drive/templates')\n",
106 |         "public_url = ngrok.connect(port).public_url\n",
107 |         "app.config[\"BASE_URL\"] = public_url\n",
108 |         "\n",
109 |         "@app.route('/station_map',  methods=['GET'])\n",
110 |         "def station_map():\n",
111 |         "\n",
112 |         "    # Connect to the database, execute the query, and get back the results\n",
113 |         "    sql = \"SELECT DISTINCT id, name, capacity, lat, lon  FROM status_fall2017\"\n",
114 |         "    with engine.connect() as connection:\n",
115 |         "        stations = pd.read_sql(text(sql), con=connection)\n",
116 |         "\n",
117 |         "    fig, ax = plt.subplots()\n",
118 |         "    ax = stations.plot(kind='scatter', x='lon', y='lat', ax=ax)\n",
119 |         "    buf = BytesIO()\n",
120 |         "    fig.savefig(buf, format=\"png\")\n",
121 |         "    data = base64.b64encode(buf.getbuffer()).decode(\"ascii\")\n",
122 |         "\n",
123 |         "    # Create the response. We will put the retrieved data as a list of\n",
124 |         "    # dictionaries, under the key \"stations\".\n",
125 |         "    results = {\"image\": data}\n",
126 |         "\n",
127 |         "    # We JSON-ify our dictionary and return it as the API response\n",
128 |         "    return jsonify(results)\n",
129 |         "\n",
130 |         "@app.route('/citibike_api',  methods=['GET'])\n",
131 |         "def citibike_stations():\n",
132 |         "\n",
133 |         "    sql = \"SELECT DISTINCT id, name, capacity, lat, lon  FROM status_fall2017\"\n",
134 |         "    # Connect to the database, execute the query, and get back the results\n",
135 |         "    with engine.connect() as connection:\n",
136 |         "        stations = pd.read_sql(text(sql), con=connection)\n",
137 |         "\n",
138 |         "    # Create the response. We will put the retrieved data as a list of\n",
139 |         "    # dictionaries, under the key \"stations\".\n",
140 |         "    list_of_stations = stations.to_dict(orient='records')\n",
141 |         "    api_results = {\"stations\": list_of_stations}\n",
142 |         "\n",
143 |         "    # We JSON-ify our dictionary and return it as the API response\n",
144 |         "    return jsonify(api_results)\n",
145 |         "\n",
146 |         "@app.route('/list_stations.html',  methods=['GET'])\n",
147 |         "def list_stations():\n",
148 |         "    return render_template(\"list_stations.html\")\n",
149 |         "\n",
150 |         "\n",
151 |         "print(f\" * Our page is at {public_url}/list_stations.html\")\n",
152 |         "app.run(use_reloader=False, port=port)"
153 |       ],
154 |       "metadata": {
155 |         "id": "SGWo-D4VQK0B"
156 |       },
157 |       "id": "SGWo-D4VQK0B",
158 |       "execution_count": null,
159 |       "outputs": []
160 |     },
161 |     {
162 |       "cell_type": "code",
163 |       "source": [],
164 |       "metadata": {
165 |         "id": "lrsN3Erce_E6"
166 |       },
167 |       "id": "lrsN3Erce_E6",
168 |       "execution_count": null,
169 |       "outputs": []
170 |     },
171 |     {
172 |       "cell_type": "code",
173 |       "source": [],
174 |       "metadata": {
175 |         "id": "rNm_WjDSfNnG"
176 |       },
177 |       "id": "rNm_WjDSfNnG",
178 |       "execution_count": null,
179 |       "outputs": []
180 |     },
181 |     {
182 |       "cell_type": "markdown",
183 |       "source": [
184 |         "### References\n",
185 |         "\n",
186 |         "* [Tutorial 1](https://digitalfox-tutorials.com/tutorial.php?title=Display-JSON-data-in-HTML-table-using-JavaScript)\n",
187 |         "\n",
188 |         "* [Tutorial 2](https://www.w3docs.com/snippets/html/how-to-display-base64-images-in-html.html)\n",
189 |         "\n",
190 |         "* [Tutorial 3](https://www.w3schools.com/jsref/prop_img_src.asp)"
191 |       ],
192 |       "metadata": {
193 |         "id": "MNRGDOvz_EJ-"
194 |       },
195 |       "id": "MNRGDOvz_EJ-"
196 |     }
197 |   ],
198 |   "metadata": {
199 |     "kernelspec": {
200 |       "display_name": "Python 3 (ipykernel)",
201 |       "language": "python",
202 |       "name": "python3"
203 |     },
204 |     "language_info": {
205 |       "codemirror_mode": {
206 |         "name": "ipython",
207 |         "version": 3
208 |       },
209 |       "file_extension": ".py",
210 |       "mimetype": "text/x-python",
211 |       "name": "python",
212 |       "nbconvert_exporter": "python",
213 |       "pygments_lexer": "ipython3",
214 |       "version": "3.10.4"
215 |     },
216 |     "colab": {
217 |       "provenance": [],
218 |       "include_colab_link": true
219 |     }
220 |   },
221 |   "nbformat": 4,
222 |   "nbformat_minor": 5
223 | }


--------------------------------------------------------------------------------
/11-Flask/images/add_new_grade.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/11-Flask/images/add_new_grade.gif


--------------------------------------------------------------------------------
/11-Flask/images/add_new_student.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/11-Flask/images/add_new_student.gif


--------------------------------------------------------------------------------
/11-Flask/images/colab-flask-1.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/11-Flask/images/colab-flask-1.PNG


--------------------------------------------------------------------------------
/11-Flask/images/colab-flask-2.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/11-Flask/images/colab-flask-2.PNG


--------------------------------------------------------------------------------
/11-Flask/images/crfolder1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/11-Flask/images/crfolder1.jpg


--------------------------------------------------------------------------------
/11-Flask/images/crfolder2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/11-Flask/images/crfolder2.jpg


--------------------------------------------------------------------------------
/11-Flask/images/crfolder3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/11-Flask/images/crfolder3.jpg


--------------------------------------------------------------------------------
/11-Flask/images/crfolderandfile1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/11-Flask/images/crfolderandfile1.jpg


--------------------------------------------------------------------------------
/11-Flask/images/db.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/11-Flask/images/db.jpg


--------------------------------------------------------------------------------
/11-Flask/images/file1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/11-Flask/images/file1.jpg


--------------------------------------------------------------------------------
/11-Flask/images/file2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/11-Flask/images/file2.jpg


--------------------------------------------------------------------------------
/11-Flask/images/gradebook.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/11-Flask/images/gradebook.jpg


--------------------------------------------------------------------------------
/11-Flask/images/helloworld.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/11-Flask/images/helloworld.jpg


--------------------------------------------------------------------------------
/11-Flask/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/11-Flask/images/logo.png


--------------------------------------------------------------------------------
/11-Flask/images/sql.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/11-Flask/images/sql.jpg


--------------------------------------------------------------------------------
/11-Flask/images/wd.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/11-Flask/images/wd.jpg


--------------------------------------------------------------------------------
/11-Flask/list_stations.html:
--------------------------------------------------------------------------------
  1 | <html>
  2 | 
  3 | <head>
  4 |   <!-- Link to Twitter Bootstrap -->
  5 |   <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.2/dist/css/bootstrap.min.css" rel="stylesheet"
  6 |     integrity="sha384-Zenh87qX5JnK2Jl0vWa8Ck2rdkQ2Bzep5IDxbcnCeuOxjzrPF/et3URy9Bv1WTRi" crossorigin="anonymous">
  7 | </head>
  8 | 
  9 | <!-- 
 10 |   This part of the HTML checks if the webpage has a query 
 11 |   component (i.e., a "?station_name=...." part). 
 12 |   If there is a station_name param, we populate the api_call_params
 13 |   which we will use to call the API
 14 | -->
 15 | <script>
 16 | const queryString = window.location.search;
 17 | const urlParams = new URLSearchParams(queryString);
 18 | const station_name_param = urlParams.get('station_name');
 19 | 
 20 | api_call_params = new URLSearchParams();
 21 | if (station_name_param) {
 22 |  api_call_params= new URLSearchParams({
 23 |     station_name: station_name_param,
 24 |   })
 25 | };
 26 | 
 27 | </script>
 28 | 
 29 | <body>
 30 |   <div class="container">
 31 |     <div class="panel panel-info">
 32 |       <div class="panel-heading">
 33 |         <h1 class="panel-title" align="center">Citibike Stations</h1>
 34 |       </div>
 35 | 
 36 | 
 37 |       <!-- This is the entry for the search form -->
 38 |       <h3 class="panel-title" align="center">
 39 |       <form action="/list_stations">
 40 |         <label for="station_name">Station Name:</label>
 41 |         <input type="text" name="station_name">
 42 |         <input class="btn btn-primary btn-sm" type="submit" value="Submit">
 43 |       </form>
 44 | 
 45 |       <form action="/station_history">
 46 |         <label for="station_id">Choose station:</label>
 47 |         <select id="station_id" name="station_id">
 48 |           {% for s in stations %}
 49 |           <option value="{{s.id}}">{{s.name}}</option>
 50 |           {% endfor %}
 51 |         </select>
 52 |         <input class="btn btn-primary btn-sm" type="submit" value="Submit">
 53 | 
 54 |       </form>
 55 | 
 56 |       
 57 |       </h3>
 58 |       <!-- end of the web form -->
 59 | 
 60 | 
 61 |       <!-- This is the placeholder for the plot of of the stations -->
 62 |       <!-- We will fill it using Javascript, by calling the API call --> 
 63 |       <!-- that returns back the image --> 
 64 |       <div align="center">
 65 |         <div id="loading-map">
 66 |           Loading map...
 67 |           <div class="spinner-border text-primary" role="status"></div>
 68 |         </div>
 69 |         <img id="station_map_placeholder" class="center-block" />
 70 |       </div>
 71 |       
 72 |       
 73 |       <!-- The javascript code that calls the API call /station_map -->
 74 |       <!-- to fill the image placeholder with the actual image  -->
 75 |       <script>
 76 |       // Calls the "/station_map" API call and gets back the response
 77 |       fetch("/station_map?" + api_call_params)
 78 |       .then(function(response){
 79 |          // Parse API response as a JSON object
 80 |          return response.json();
 81 |       })
 82 |       .then(function(data){
 83 |          // The "data" is the JSON response of the API call
 84 |         
 85 |          // Find the HTML tag that has the id="station_map_placeholder"
 86 |          let map_image_html_tag = document.querySelector("#station_map_placeholder");
 87 |         
 88 |         // The "data" object contains the JSON response of the API
 89 |         // We get the "image_data" key from the JSON response and fill 
 90 |         // the value of the "src" attribute of the HTML <img> tag. 
 91 |          map_image_html_tag.src = 'data:image/png;base64,' + data.image;
 92 | 
 93 |          document.getElementById("loading-map").remove();
 94 |       });
 95 |       </script>
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 |       <!-- This table will be used to display the results of the Citibike stations -->
102 |       <!-- We do not have to use a table, we can create other approaches -->
103 |       <!-- for showing the data (e.g., create one "box" or "card" for each station) --> 
104 |       <table class="table table-striped table-bordered table-hover">
105 |         <thead>
106 |           <tr>
107 |             <th>ID</th>
108 |             <th>Name</th>
109 |             <th>Capacity</th>
110 |           </tr>
111 |         </thead>
112 |         <!-- Placeholder for the list of Citibike stations -->
113 |         <!-- Notice the use of id="data-output" that will allow us to reference that HTML part later -->
114 |         <tbody id="data-output">
115 |           <!-- The line below puts a "spinner" to show that content is loading  -->
116 |           <tr id="loading-row">
117 |             <td align="center" colspan="5">
118 |               Loading...<div class="spinner-border text-primary" role="status"></div>
119 |             </td>
120 |           </tr>
121 |           <!-- We will put the list of stations from the API call here. -->
122 |         </tbody>
123 |       </table>
124 | 
125 |       <!-- 
126 |         This is the Javascript code that will call
127 |         the /citibike_api and then populate the table   
128 |       --> 
129 |       <script>
130 |       // Calls the /citibike_api API call and gets back the JSON
131 |       fetch("/citibike_api?" + api_call_params)
132 |       .then(function(response){
133 |         return response.json();
134 |       })
135 |       .then(function(data){
136 |         // The "data" is the JSON response of the API call
137 | 
138 |         // Find the HTML table with id="#data-output"
139 |         let row_placeholder = document.querySelector("#data-output");
140 | 
141 |         // We will create a list of rows for the table 
142 |         // using the loop below (one table row per station)
143 |         let table_rows = "";
144 |         for(let s of data.stations){
145 |             table_rows += `
146 |               <tr>
147 |                   <td>${s.id}</td>
148 |                   <td><a href="station_history?station_id=${s.id}">${s.name}</td>
149 |                   <td>${s.capacity}</td>
150 |               </tr>
151 |             `;
152 |         }
153 | 
154 |         // And now we populate the table by adding the list
155 |         // of rows that we created
156 |         row_placeholder.innerHTML = table_rows;
157 |         document.getElementById("loading-row").remove();
158 |       });
159 |       </script>
160 | 
161 | 
162 |     </div>
163 |   </div>
164 | </body>
165 | 
166 | 
167 | 
168 | 
169 | 
170 | </html>
171 | 


--------------------------------------------------------------------------------
/12-UNIX_Basics/A-Basic_Unix_Shell_Commands.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Overview\n",
  8 |     "--------\n",
  9 |     "\n",
 10 |     "_Readings: The [Appendix A of Learn Python the Hard Way](http://learnpythonthehardway.org/book/appendixa.html) also discusses the material below._\n",
 11 |     "\n",
 12 |     "Modern data science is impossible without some understanding of the Unix command line.  Unix is a family of computer operating systems including the Mac’s OS X and Linux (technically, Linux is a Unix clone); Windows has also Unix emulators, which allow running Unix commands.  In our class, we use the Linux (specifically, the Ubuntu distribution), running on the Amazon EC2 cloud infrastructure.\n",
 13 |     "\n",
 14 |     "Let's start:\n",
 15 |     "\n",
 16 |     "(_**Note**: In IPython, to call a command line script, you add an exclamation mark before the command. That's why you will see all the commands in this notebook being preceded by a `!` character._)"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "### Understading the folder structure\n",
 24 |     "\n",
 25 |     "Basic concepts\n",
 26 |     "* Hierarchical directory structure\n",
 27 |     "* Absolute vs. relative directories\n",
 28 |     "* Parent (..) and current (.) directories\n",
 29 |     "\n",
 30 |     "\n",
 31 |     "### `pwd`\n",
 32 |     "\n",
 33 |     "Prints the current directory. Type `pwd` in the shell prompt. This will tell you your current directory. "
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {
 40 |     "collapsed": true,
 41 |     "scrolled": true
 42 |    },
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "!pwd"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "### `ls`\n",
 53 |     "\n",
 54 |     "Lists the contents of a directory or provide information about the specified file. Typical usage: \n",
 55 |     "\n",
 56 |     "`ls [options] [files or directories]`\n",
 57 |     "\n",
 58 |     "If you want to know the contents of this directory, type `ls -A`. "
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {
 65 |     "collapsed": true
 66 |    },
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "!ls"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "By default, `ls` simply lists the contents of the current directory. There are several options that when used in conjunction with ls give more detailed information about the files or directories being queried. Here are a sample:\n",
 77 |     "\n",
 78 |     "+ `-A`: list all of the contents of the queried directory, even hidden files.\n",
 79 |     "+ `-l`: detailed format, display additional info for all files and directories.\n",
 80 |     "+ `-R`: recursively list the contents of any subdirectories.\n",
 81 |     "+ `-t`: sort files by the time of the last modification.\n",
 82 |     "+ `-S`: sort files by size.\n",
 83 |     "+ `-r`: reverse any sort order.\n",
 84 |     "+ `-h`: when used in conjunction with `-l`, gives a more human-readable output.\n",
 85 |     "\n"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "Let's try now to execute `ls` with a different set of options:"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {
 99 |     "collapsed": true
100 |    },
101 |    "outputs": [],
102 |    "source": [
103 |     "!ls -lh"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "### `cd`\n",
111 |     "\n",
112 |     "Change the current directory. Usage: \n",
113 |     "\n",
114 |     "`cd [directory to move to]`\n",
115 |     "\n",
116 |     "For example, to change to the `/home/ubuntu` directory:"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": null,
122 |    "metadata": {
123 |     "collapsed": true
124 |    },
125 |    "outputs": [],
126 |    "source": [
127 |     "!cd /home/ubuntu"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "If we want to run two commands in a row, we separate them using the `;` character. For example, to change to a directory and show its contents:"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {
141 |     "collapsed": true
142 |    },
143 |    "outputs": [],
144 |    "source": [
145 |     "!cd /home/ubuntu; ls -l"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "### `mkdir`\n",
153 |     "\n",
154 |     "Creates a new folder. For example, to create a new folder named `DealingWithData` under the current folder, we type:\n"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": null,
160 |    "metadata": {
161 |     "collapsed": true
162 |    },
163 |    "outputs": [],
164 |    "source": [
165 |     "!mkdir DealingWithData\n",
166 |     "!ls -lA"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "metadata": {},
172 |    "source": [
173 |     "### `rmdir` \n",
174 |     "\n",
175 |     "Removes a folder. (The folder must be empty for the command to succeed.)"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {
182 |     "collapsed": true
183 |    },
184 |    "outputs": [],
185 |    "source": [
186 |     "!rmdir DealingWithData"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "markdown",
191 |    "metadata": {},
192 |    "source": [
193 |     "### `cp` \n",
194 |     "\n",
195 |     "Copies a file. Usage:\n",
196 |     "\n",
197 |     "`cp [source file] [destination file]`\n",
198 |     "\n",
199 |     "It can also be used to copy multiple files into a directory.\n",
200 |     "\n",
201 |     "`cp [source file1] [source file2] ... [destination directory]`\n",
202 |     "\n",
203 |     "For example, to copy the file 'A-Basic_Unix_Shell_Commands.ipynb' and name the file NotebookA.ipynb"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": null,
209 |    "metadata": {
210 |     "collapsed": true
211 |    },
212 |    "outputs": [],
213 |    "source": [
214 |     "!cp A-Basic_Unix_Shell_Commands.ipynb NotebookA.ipynb\n",
215 |     "!ls -l "
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "markdown",
220 |    "metadata": {},
221 |    "source": [
222 |     "Or we can copy the file to another folder. For example, the following command copies the file `A-Basic_Unix_Shell_Commands.ipynb` to folder `DealingWithData` and names the new file `NotebookA.ipynb`"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "code",
227 |    "execution_count": null,
228 |    "metadata": {
229 |     "collapsed": true
230 |    },
231 |    "outputs": [],
232 |    "source": [
233 |     "!mkdir DealingWithData\n",
234 |     "!cp A-Basic_Unix_Shell_Commands.ipynb DealingWithData/NotebookA.ipynb\n",
235 |     "!ls -lA DealingWithData"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "markdown",
240 |    "metadata": {},
241 |    "source": [
242 |     "### `rm` \n",
243 |     "\n",
244 |     "The `rm` command is used to delete a file.\n",
245 |     "\n",
246 |     "rm -r : deletes a folder, recursively"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": null,
252 |    "metadata": {
253 |     "collapsed": true
254 |    },
255 |    "outputs": [],
256 |    "source": [
257 |     "!rm DealingWithData/NotebookA.ipynb\n",
258 |     "!rm NotebookA.ipynb"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "code",
263 |    "execution_count": null,
264 |    "metadata": {
265 |     "collapsed": true
266 |    },
267 |    "outputs": [],
268 |    "source": [
269 |     "#clean up\n",
270 |     "!rmdir DealingWithData"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "markdown",
275 |    "metadata": {},
276 |    "source": [
277 |     "### `mv`\n",
278 |     "\n",
279 |     "The `mv` command is similar to `cp` but it moves the file instead of just copying it. Effectively it performs a `cp` command, followed by an `rm` for the original file"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "markdown",
284 |    "metadata": {},
285 |    "source": [
286 |     "## Exercise\n",
287 |     "\n",
288 |     "* Find the current directory, using the `pwd` command.\n",
289 |     "* Create two new directories, `dir1` and `dir2` with the `mkdir` command. \n",
290 |     "* Use `ls` to confirm\n",
291 |     "* Copy the file `../2-Introduction_to_Python/data/baseball.csv` to `dir1` and name it `file1.csv`. (Note: The absolute path for the file is `/home/ubuntu/jupyter/NYU_Notes/2-Introduction_to_Python/data/baseball.csv`. We use the `..` notation to refer to the parent directory of the current one.)\n",
292 |     "* Copy the file `../3-SQL/data/imdb.sql.gz` to dir2 and name it `file2.sql.gz`\n",
293 |     "* Move each file to the other directory (`file1.csv` to `dir2` and `file2.sql.gz` to `dir1`) with the `mv` command.\n",
294 |     "* Delete both directories with the `rm -r` command.\n"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "code",
299 |    "execution_count": null,
300 |    "metadata": {
301 |     "collapsed": true
302 |    },
303 |    "outputs": [],
304 |    "source": [
305 |     "# your code here\n"
306 |    ]
307 |   }
308 |  ],
309 |  "metadata": {
310 |   "kernelspec": {
311 |    "display_name": "Python 3",
312 |    "language": "python",
313 |    "name": "python3"
314 |   },
315 |   "language_info": {
316 |    "codemirror_mode": {
317 |     "name": "ipython",
318 |     "version": 3
319 |    },
320 |    "file_extension": ".py",
321 |    "mimetype": "text/x-python",
322 |    "name": "python",
323 |    "nbconvert_exporter": "python",
324 |    "pygments_lexer": "ipython3",
325 |    "version": "3.5.2"
326 |   }
327 |  },
328 |  "nbformat": 4,
329 |  "nbformat_minor": 1
330 | }
331 | 


--------------------------------------------------------------------------------
/12-UNIX_Basics/B-Fetching_Data_Using_CURL.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Getting Data using CURL\n",
  8 |     "-----------------------\n",
  9 |     "\n",
 10 |     "We now move into a more interesting topic: How to get data from Internet sources. For that, we will use a command-line tool of Unix, called `curl`. (Later in class, we will learn how to achieve the same using Python, but for quick testing, curl is often the standard method used.) We will also use a tool called `jq` to interact with JSON output. (Do not worry, we will revisit both these later in class.)\n",
 11 |     "\n",
 12 |     "_Often, curl and jq do not come preinstalled, so the first time that we use them, we need to issue the appropriate command for installing it.  To install it, simply type:_"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {
 19 |     "collapsed": true
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "!sudo apt-get -y install curl\n",
 24 |     "!sudo apt-get -y install jq"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "Let's start by retrieving a simple text file, which we will use later in the class, to illustrate how different shell commands work. The sample data file is hosted online. You can use terminal commands to copy this remote file. Simply type:"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "!curl -L 'https://www.dropbox.com/s/w6sov31z68v5e8v/sample.txt?dl=0'"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "The columns in this tab-separated data correspond to [order id] [time of order] [user id] [ordered item], something similar to what might be encountered in practice. If you wish, you can copy-paste the data written above into a text editor, making sure there is a newline following each of the ordered item columns (the columns with alphabetic characters)."
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "To store the output to a file, we also add the `-o [output file]` in the command. (We are also going to see in the next session how to use _output redirection_ to store the output to a file.)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {
 61 |     "collapsed": true
 62 |    },
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "!curl -L 'https://www.dropbox.com/s/w6sov31z68v5e8v/sample.txt?dl=0' -o data/sample.txt"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {
 72 |     "collapsed": true
 73 |    },
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "!ls data/"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "This will pull the file to the directory `/home/ubuntu/data/`, creating a new file called `sample.txt`. If we do not want to see any statistics about the download, we can use the `-s` option:"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": null,
 89 |    "metadata": {
 90 |     "collapsed": true
 91 |    },
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "!curl  -s -L 'https://www.dropbox.com/s/w6sov31z68v5e8v/sample.txt?dl=0' -o data/sample.txt"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "markdown",
 99 |    "metadata": {},
100 |    "source": [
101 |     "And let's clean up:"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {
108 |     "collapsed": true
109 |    },
110 |    "outputs": [],
111 |    "source": [
112 |     "!rm data/sample.txt"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {},
118 |    "source": [
119 |     "Now, let's try to use curl to get access to some real data. A key component of today's data ecosystem is the existence of `Web APIs` which provide functionality for a variety of tasks.\n",
120 |     "\n",
121 |     "#### Where am I?\n",
122 |     "\n",
123 |     "For example, let's try to figure out programmatically the location of the computer where the iPython server is running. We can access the API call by issuing the following command:\n",
124 |     "\n"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "metadata": {
131 |     "collapsed": true
132 |    },
133 |    "outputs": [],
134 |    "source": [
135 |     "!curl -s \"http://freegeoip.net/json/\" | jq ."
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "markdown",
140 |    "metadata": {},
141 |    "source": [
142 |     "While this does not look nice to a human, for a computer is a perfectly legitimate answer. This format is called \"JSON\", and is an efficient and very commonly used way to trasfer data today on the Internet.\n",
143 |     "| jq controls presentation"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "markdown",
148 |    "metadata": {},
149 |    "source": [
150 |     "Now, let's examine a few more web APIs, just for fun:\n",
151 |     "\n",
152 |     "#### What's the weather?\n",
153 |     "\n",
154 |     "Now, let's use the OpenWeather API to get the weather details in our location. (The details of the API calls are available at http://openweathermap.org/api.)"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": null,
160 |    "metadata": {
161 |     "collapsed": true
162 |    },
163 |    "outputs": [],
164 |    "source": [
165 |     "!curl -s \"http://api.openweathermap.org/data/2.5/weather?\\\n",
166 |     "&appid=ffb7b9808e07c9135bdcc7d1e867253d\\\n",
167 |     "&q=New%20York,NY,USA\\\n",
168 |     "&units=imperial\\\n",
169 |     "&mode=json\" | jq ."
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "metadata": {},
175 |    "source": [
176 |     "#### What's the sentiment?\n",
177 |     "\n",
178 |     "Now let's try to use a web service to automatically analyze the sentiment for a piece of text. (The service comes from the [IBM's Alchemy API](http://www.alchemyapi.com/api/sentiment/textc.html#textsentiment))"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {
185 |     "collapsed": true
186 |    },
187 |    "outputs": [],
188 |    "source": [
189 |     "!curl -s \"http://access.alchemyapi.com/calls/text/TextGetTextSentiment\" \\\n",
190 |     "-d \"outputMode=json\" \\\n",
191 |     "-d \"apikey=4b46c7859a7be311b6f9389b12504e302cac0a55\" \\\n",
192 |     "-d \"text=I hate this product! \" | jq ."
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "metadata": {},
198 |    "source": [
199 |     "#### And a few synonyms\n",
200 |     "\n",
201 |     "And now just a demo of a web API that I created myself a few years back. It analyzes Wikipedia to figure out different ways that people use to refer to the same entity\n",
202 |     "\n"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": null,
208 |    "metadata": {
209 |     "collapsed": true
210 |    },
211 |    "outputs": [],
212 |    "source": [
213 |     "!curl -s \"http://wikisynonyms.ipeirotis.com/api/Donald_Trump\" | jq ."
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "metadata": {},
219 |    "source": [
220 |     "## Exercise\n",
221 |     "\n",
222 |     "The following websites contain listing of many useful APIs\n",
223 |     "\n",
224 |     "* https://www.mashape.com \n",
225 |     "* http://www.programmableweb.com/\n",
226 |     "* http://www.mashery.com/\n",
227 |     "* http://apigee.com/ \n",
228 |     "\n",
229 |     "Mashape is my own personal favorite in terms of user-friendliness and also has examples directly expressed using CURL. but the others are pretty nice as well. Your task: search through these websites and find a web API that does something that you like. Use CURL to issue a web API call to this service. "
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": null,
235 |    "metadata": {
236 |     "collapsed": true
237 |    },
238 |    "outputs": [],
239 |    "source": []
240 |   }
241 |  ],
242 |  "metadata": {
243 |   "kernelspec": {
244 |    "display_name": "Python 3",
245 |    "language": "python",
246 |    "name": "python3"
247 |   },
248 |   "language_info": {
249 |    "codemirror_mode": {
250 |     "name": "ipython",
251 |     "version": 3
252 |    },
253 |    "file_extension": ".py",
254 |    "mimetype": "text/x-python",
255 |    "name": "python",
256 |    "nbconvert_exporter": "python",
257 |    "pygments_lexer": "ipython3",
258 |    "version": "3.5.2"
259 |   }
260 |  },
261 |  "nbformat": 4,
262 |  "nbformat_minor": 1
263 | }
264 | 


--------------------------------------------------------------------------------
/12-UNIX_Basics/D-Running_Tasks_In_The_Background.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Creating Scripts\n",
  8 |     "\n",
  9 |     "Now, let's try to create our first \"script\", which we can execute from the shell directly.\n",
 10 |     "\n",
 11 |     "* Create a file called `get_weather.py`\n",
 12 |     "* Type the commands from the earlier module\n",
 13 |     "```python\n",
 14 |     "    #!/usr/bin/python3\n",
 15 |     "    import requests\n",
 16 |     "    freegeoip_url = 'http://freegeoip.net/json/'\n",
 17 |     "    resp = requests.get(freegeoip_url)\n",
 18 |     "    data = resp.json()\n",
 19 |     "    lon = data[\"longitude\"]\n",
 20 |     "    lat = data[\"latitude\"]\n",
 21 |     "\n",
 22 |     "    openweathermap_url = \"http://api.openweathermap.org/data/2.5/weather\"\n",
 23 |     "    parameters = {\n",
 24 |     "        'lat'   : str(lat),\n",
 25 |     "        'lon'   : str(lon),\n",
 26 |     "        'units' : 'imperial',\n",
 27 |     "        'mode'  : 'json',\n",
 28 |     "        'appid' : 'ffb7b9808e07c9135bdcc7d1e867253d'\n",
 29 |     "    }\n",
 30 |     "    resp = requests.get(openweathermap_url, params=parameters)\n",
 31 |     "    data = resp.json()\n",
 32 |     "    print(\"Location:\", data['name'])\n",
 33 |     "    print(\"Weather:\", data['weather'][0]['description'])\n",
 34 |     "    print(\"Temperature:\", data['main']['temp'])\n",
 35 |     "```\n",
 36 |     "* Finally type `python3 get_weather.py` and see what happens."
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "#### Exercise\n",
 44 |     "\n",
 45 |     "* Modify the script, to print the location, weather and temperature in tab-separated columns.\n",
 46 |     "* Modify the script, to print a header at the beginning; then put the code in an infinite loop (`while True:...`) and get the code the execute for ever. Use the `import time` and `time.sleep(...)` to add a delay of a few seconds between continuous executions of the code.\n",
 47 |     "* Modify the script, write the output to a file instead of print to the screen.\n"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": []
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "### Running Jobs in the Background (until terminating the Terminal)\n",
 60 |     "\n",
 61 |     "Sometimes, we would like to start a task, and let it run in the background. To do so, we simply add the character `&` at the end of the command. For example, if we want to run our script from above and get the task to run in the background,  we can type:\n",
 62 |     "\n",
 63 |     "`python3 get_weather.py  &`\n",
 64 |     "\n",
 65 |     "### Running Jobs in the Background (even after terminating the Terminal)\n",
 66 |     "\n",
 67 |     "When we use the `&` operator, the task runs in the background, but stops running the moment we logout from our ssh session. To allow the task to continue running, even after we log out, we can use the `nohup` command, as follows:\n",
 68 |     "\n",
 69 |     "`nohup python3 get_weather.py  &`\n",
 70 |     "\n",
 71 |     "### Redirecting the output\n",
 72 |     "\n",
 73 |     "If your script has an output on the screen, you often want to save this. To store the output of your screen to a file, when you put a script to run in the background, you use the `> filename.txt` command to store the output in the `filename.txt`. For example:\n",
 74 |     "\n",
 75 |     "`nohup python3 get_weather.py > weather.txt  &`\n",
 76 |     "\n",
 77 |     "will store the outputinto the file weather.txt instead of printing on the screen."
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": []
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "metadata": {},
 88 |    "source": [
 89 |     "## Cron: Scheduling Tasks (vs running them continuously in the background)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {},
 95 |    "source": [
 96 |     "The approach that we described above assumes that the task will be running forever in the background. However, most of the time our script is waiting, executing the `time.sleep` command. \n",
 97 |     "\n",
 98 |     "Instead of having our script sleeping, we can use the **cron** command to execute desired tasks (in the background) at designated times. So, let's remove the **while True** and the **time.sleep** commands from our script, and let's see how we can use cron instead.\n",
 99 |     "\n",
100 |     "To use cron for task, add entries to your crontab file. Start the crontab editor from a terminal window:\n",
101 |     "\n",
102 |     "`sudo crontab -e`\n",
103 |     "\n",
104 |     "A crontab is a simple text file with a list of commands meant to be run at specified times and these jobs will run regardless of whether the user is actually logged into the system. \n",
105 |     "\n",
106 |     "\n",
107 |     "\n",
108 |     "### The structure of the crontab file\n",
109 |     "\n",
110 |     "This is how a cron job is laid out:\n",
111 |     "\n",
112 |     "minute (0-59), hour (0-23, 0 = midnight), day (1-31), month (1-12), weekday (0-6, 0 = Sunday), command\n",
113 |     "\n",
114 |     "and each line of the crontab file has the following format:\n",
115 |     "\n",
116 |     "`minute hour day_of_month month day_of_week   command`\n",
117 |     "\n",
118 |     "Each of the parts is separated by a space, with the final part (the command) having one or more spaces in it. \n",
119 |     "For example, you can run your script at 5 a.m every week with:\n",
120 |     "\n",
121 |     "`0 5 * * 1 /usr/bin/python3 /home/ubuntu/get_temperature.py`\n",
122 |     "\n",
123 |     "#### More examples\n",
124 |     "\n",
125 |     "`01 04 1 1 1 /usr/bin/python3 /home/ubuntu/get_temperature.py`\n",
126 |     "\n",
127 |     "The above example will run our script at 4:01am on January 1st plus every Monday in January. An asterisk (\\*) can be used so that every instance (every hour, every weekday, every month, etc.) of a time period is used. Code:\n",
128 |     "\n",
129 |     "\n",
130 |     "`01 04 * * * /usr/bin/python3 /home/ubuntu/get_temperature.py`\n",
131 |     "\n",
132 |     "The above example will run /usr/bin/somedirectory/somecommand at 4:01am on every day of every month.\n",
133 |     "\n",
134 |     "Comma-separated values can be used to run more than one instance of a particular command within a time period. Dash-separated values can be used to run a command continuously. For example:\n",
135 |     "\n",
136 |     "`01,31 04,05 1-15 1,6 * /usr/bin/python3 /home/ubuntu/get_temperature.py`\n",
137 |     "\n",
138 |     "The above example will run /usr/bin/somedirectory/somecommand at 01 and 31 past the hours of 4:00am and 5:00am on the 1st through the 15th of every January and June.\n",
139 |     "\n",
140 |     "The `/usr/bin/python3 /home/ubuntu/get_temperature.py` text in the above examples indicates the task which will be run at the specified times. It is recommended that you use the full path to the desired commands as shown in the above examples. Enter which somecommand in the terminal to find the full path to somecommand. The crontab will begin running as soon as it is properly edited and saved.\n",
141 |     "\n",
142 |     "(See https://help.ubuntu.com/community/CronHowto for more details)\n"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "metadata": {},
148 |    "source": [
149 |     "### Exercise\n",
150 |     "\n",
151 |     "* Use a cron job to keep track of the temperature in New York, running every minute. Use the redirect operator to store the temperature in a text file called /home/ubuntu/nyc-temperatures.txt, appending a new line for every measurement."
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": []
160 |   },
161 |   {
162 |    "cell_type": "markdown",
163 |    "metadata": {},
164 |    "source": [
165 |     "* Use the `http://api.open-notify.org/iss-now.json` API and keep track of the location of the International Space Station (ISS) over time."
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": []
174 |   }
175 |  ],
176 |  "metadata": {
177 |   "colabVersion": "0.1",
178 |   "kernelspec": {
179 |    "display_name": "Python 3",
180 |    "language": "python",
181 |    "name": "python3"
182 |   },
183 |   "language_info": {
184 |    "codemirror_mode": {
185 |     "name": "ipython",
186 |     "version": 3
187 |    },
188 |    "file_extension": ".py",
189 |    "mimetype": "text/x-python",
190 |    "name": "python",
191 |    "nbconvert_exporter": "python",
192 |    "pygments_lexer": "ipython3",
193 |    "version": "3.6.6"
194 |   }
195 |  },
196 |  "nbformat": 4,
197 |  "nbformat_minor": 1
198 | }
199 | 


--------------------------------------------------------------------------------
/12-UNIX_Basics/cronhelp.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "### Cron assignment, all done from a notebook\n",
  8 |     "The following cells are an example of how to do the cron assignment\n",
  9 |     "without having to go to the terminal, using notepad etc.\n",
 10 |     "Everything can be done frpm notebook cells\n"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "#### First, start the cron daemon, to make sure it is running\n"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 17,
 23 |    "metadata": {},
 24 |    "outputs": [
 25 |     {
 26 |      "name": "stdout",
 27 |      "output_type": "stream",
 28 |      "text": [
 29 |       " * Starting periodic command scheduler cron\r\n",
 30 |       "   ...done.\r\n"
 31 |      ]
 32 |     }
 33 |    ],
 34 |    "source": [
 35 |     "!sudo service cron start"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "#### Second, create the getTemp shell file by \"echoing\" the command lines into a file\n",
 43 |     "Note: the use of \\'s to escape some of the special characters,\n",
 44 |     "but first remove an existing NYC-Temperatures.txt file \n"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 18,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "!rm /home/nwhite/NYC-Temperatures.txt\n"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 26,
 59 |    "metadata": {},
 60 |    "outputs": [
 61 |     {
 62 |      "name": "stdout",
 63 |      "output_type": "stream",
 64 |      "text": [
 65 |       "#!/bin/sh\r\n",
 66 |       "curl -s \"http://api.openweathermap.org/data/2.5/weather?zip=10012&mode=json&units=imperial&APPID=ffb7b9808e07c9135bdcc7d1e867253d \"|jq '.main.temp' \r\n"
 67 |      ]
 68 |     }
 69 |    ],
 70 |    "source": [
 71 |     "# create the getTemp file and change it's permissions tø make it readable and executable\n",
 72 |     "# First put in the line for the \n",
 73 |     "# That tells linux to interpret this line with the bourne shell\n",
 74 |     "\n",
 75 |     "!rm /home/nwhite/getTemp\n",
 76 |     "!echo \"#!/bin/sh\" >/home/nwhite/getTemp\n",
 77 |     "\n",
 78 |     "# Next, add the line to retrieve the temperature from openweather\n",
 79 |     "#\n",
 80 |     "# Put your APIKEY to replace the APPID below below....\n",
 81 |     "#\n",
 82 |     "# now add the call to the openweathermap api\n",
 83 |     "# Note that we need to surround the curl URL in escaped single quotes\n",
 84 |     "!echo \"curl -s \\\"http://api.openweathermap.org/data/2.5/weather?zip=10012&mode=json&units=imperial&APPID=ffb7b9808e07c9135bdcc7d1e867253d \\\"|jq '.main.temp' \" >>/home/nwhite/getTemp \n",
 85 |     "\n",
 86 |     "# change permissions\n",
 87 |     "!chmod a+rx /home/nwhite/getTemp\n",
 88 |     "# look at the file....\n",
 89 |     "!cat /home/nwhite/getTemp\n"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {},
 95 |    "source": [
 96 |     "### We should now have a good getTemp file, so create the crontab entry ...\n",
 97 |     "echo the crontyab entry as input to crontab\n",
 98 |     "`!echo \"/10 * * * *  /home/nwhite/getTemp\"|crontab`\n",
 99 |     "(You  might start with 1 minute intervals to test)"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 30,
105 |    "metadata": {},
106 |    "outputs": [
107 |     {
108 |      "name": "stdout",
109 |      "output_type": "stream",
110 |      "text": [
111 |       "*/10 * * * * /home/nwhite/getTemp >>/home/nwhite/NYC-Temperatures.txt\r\n"
112 |      ]
113 |     }
114 |    ],
115 |    "source": [
116 |     "!echo \"*/10 * * * * /home/nwhite/getTemp >>/home/nwhite/NYC-Temperatures.txt\"|crontab\n",
117 |     "#list the crontab entry to see if it is correct\n",
118 |     "!crontab -l\n"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 31,
124 |    "metadata": {},
125 |    "outputs": [
126 |     {
127 |      "name": "stdout",
128 |      "output_type": "stream",
129 |      "text": [
130 |       "76.5\r\n",
131 |       "76.5\r\n",
132 |       "76.5\r\n",
133 |       "76.5\r\n",
134 |       "76.5\r\n"
135 |      ]
136 |     }
137 |    ],
138 |    "source": [
139 |     "### Now look at the NYC-Temperatures file (Note it may take a few minutes to be created)callable\n",
140 |     "!cat /home/nwhite/NYC-Temperatures.txt\n",
141 |     "# in 5 minutes, you should see an entry !!!\n"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 32,
147 |    "metadata": {},
148 |    "outputs": [
149 |     {
150 |      "name": "stdout",
151 |      "output_type": "stream",
152 |      "text": [
153 |       "*/10 * * * * /home/nwhite/getTemp >>/home/nwhite/NYC-Temperatures.txt\r\n"
154 |      ]
155 |     }
156 |    ],
157 |    "source": [
158 |     "!crontab -l"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 33,
164 |    "metadata": {},
165 |    "outputs": [
166 |     {
167 |      "name": "stdout",
168 |      "output_type": "stream",
169 |      "text": [
170 |       "\u001b[0;39m76.5\u001b[0m\r\n"
171 |      ]
172 |     }
173 |    ],
174 |    "source": [
175 |     "!/home/nwhite/getTemp"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": []
184 |   }
185 |  ],
186 |  "metadata": {
187 |   "kernelspec": {
188 |    "display_name": "Python 3",
189 |    "language": "python",
190 |    "name": "python3"
191 |   },
192 |   "language_info": {
193 |    "codemirror_mode": {
194 |     "name": "ipython",
195 |     "version": 3
196 |    },
197 |    "file_extension": ".py",
198 |    "mimetype": "text/x-python",
199 |    "name": "python",
200 |    "nbconvert_exporter": "python",
201 |    "pygments_lexer": "ipython3",
202 |    "version": "3.6.5"
203 |   }
204 |  },
205 |  "nbformat": 4,
206 |  "nbformat_minor": 2
207 | }
208 | 


--------------------------------------------------------------------------------
/13-Network_Analysis/README.md:
--------------------------------------------------------------------------------
1 | See also https://github.com/khof312/networks_tutorial
2 | 


--------------------------------------------------------------------------------
/13-Network_Analysis/images/MySQL_scheme.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/MySQL_scheme.jpg


--------------------------------------------------------------------------------
/13-Network_Analysis/images/RDBMS_vs_GRAPHDB.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/RDBMS_vs_GRAPHDB.png


--------------------------------------------------------------------------------
/13-Network_Analysis/images/ex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/ex.png


--------------------------------------------------------------------------------
/13-Network_Analysis/images/free_movies.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/free_movies.jpg


--------------------------------------------------------------------------------
/13-Network_Analysis/images/graph.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/graph.jpg


--------------------------------------------------------------------------------
/13-Network_Analysis/images/graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/graph.png


--------------------------------------------------------------------------------
/13-Network_Analysis/images/neo4j-python.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/neo4j-python.png


--------------------------------------------------------------------------------
/13-Network_Analysis/images/new_db_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/new_db_1.jpg


--------------------------------------------------------------------------------
/13-Network_Analysis/images/new_db_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/new_db_2.jpg


--------------------------------------------------------------------------------
/13-Network_Analysis/images/scheme.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/scheme.jpg


--------------------------------------------------------------------------------
/15-Predictive_Modeling/images/digits.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/15-Predictive_Modeling/images/digits.png


--------------------------------------------------------------------------------
/15-Predictive_Modeling/images/iris_petal_sepal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/15-Predictive_Modeling/images/iris_petal_sepal.png


--------------------------------------------------------------------------------
/15-Predictive_Modeling/images/linear_regression.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/15-Predictive_Modeling/images/linear_regression.png


--------------------------------------------------------------------------------
/15-Predictive_Modeling/images/ml.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/15-Predictive_Modeling/images/ml.png


--------------------------------------------------------------------------------
/15-Predictive_Modeling/images/sklearn_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/15-Predictive_Modeling/images/sklearn_logo.png


--------------------------------------------------------------------------------
/15-Predictive_Modeling/images/svm1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/15-Predictive_Modeling/images/svm1.png


--------------------------------------------------------------------------------
/15-Predictive_Modeling/images/svm2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/15-Predictive_Modeling/images/svm2.png


--------------------------------------------------------------------------------
/16-OpenCV/OpenCV - Canny Edge Detection.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Canny Edge Detection"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {
 13 |     "collapsed": true
 14 |    },
 15 |    "source": [
 16 |     "#### Goal\n",
 17 |     "In this chapter, we will learn about\n",
 18 |     "\n",
 19 |     "- Concept of Canny edge detection\n",
 20 |     "\n",
 21 |     "- OpenCV functions for that : cv2.Canny()\n",
 22 |     "\n",
 23 |     "#### Theory\n",
 24 |     "\n",
 25 |     "Canny Edge Detection is a popular edge detection algorithm. It was developed by John F. Canny in 1986. It is a multi-stage algorithm and we will go through each stages.\n",
 26 |     "\n",
 27 |     "##### 1. Noise Reduction\n",
 28 |     "\n",
 29 |     "Since edge detection is susceptible to noise in the image, first step is to remove the noise in the image with a 5x5 Gaussian filter. We have already seen this in previous chapters.\n",
 30 |     "\n",
 31 |     "##### 2. Finding Intensity Gradient of the Image\n",
 32 |     "\n",
 33 |     "Smoothened image is then filtered with a Sobel kernel in both horizontal and vertical direction to get first derivative in horizontal direction (G_x) and vertical direction (G_y). From these two images, we can find edge gradient and direction for each pixel as follows:\n",
 34 |     "\n",
 35 |     "Edge\\_Gradient \\; (G) = \\sqrt{G_x^2 + G_y^2}\n",
 36 |     "\n",
 37 |     "Angle \\; (\\theta) = \\tan^{-1} \\bigg(\\frac{G_y}{G_x}\\bigg)\n",
 38 |     "\n",
 39 |     "Gradient direction is always perpendicular to edges. It is rounded to one of four angles representing vertical, horizontal and two diagonal directions.\n",
 40 |     "\n",
 41 |     "##### 3. Non-maximum Suppression \n",
 42 |     "\n",
 43 |     "After getting gradient magnitude and direction, a full scan of image is done to remove any unwanted pixels which may not constitute the edge. For this, at every pixel, pixel is checked if it is a local maximum in its neighborhood in the direction of gradient. Check the image below:"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {
 49 |     "collapsed": false
 50 |    },
 51 |    "source": [
 52 |     "<img src=\"images/nms.jpg\">"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {
 58 |     "collapsed": true
 59 |    },
 60 |    "source": [
 61 |     "Point A is on the edge ( in vertical direction). Gradient direction is normal to the edge. Point B and C are in gradient directions. So point A is checked with point B and C to see if it forms a local maximum. If so, it is considered for next stage, otherwise, it is suppressed ( put to zero).\n",
 62 |     "\n",
 63 |     "In short, the result you get is a binary image with “thin edges”.\n",
 64 |     "\n",
 65 |     "#### 4. Hysteresis Thresholding\n",
 66 |     "\n",
 67 |     "This stage decides which are all edges are really edges and which are not. For this, we need two threshold values, minVal and maxVal. Any edges with intensity gradient more than maxVal are sure to be edges and those below minVal are sure to be non-edges, so discarded. Those who lie between these two thresholds are classified edges or non-edges based on their connectivity. If they are connected to “sure-edge” pixels, they are considered to be part of edges. Otherwise, they are also discarded. See the image below:"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {
 73 |     "collapsed": true
 74 |    },
 75 |    "source": [
 76 |     "<img src=\"images/hysteresis.jpg\">"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "The edge A is above the maxVal, so considered as “sure-edge”. Although edge C is below maxVal, it is connected to edge A, so that also considered as valid edge and we get that full curve. But edge B, although it is above minVal and is in same region as that of edge C, it is not connected to any “sure-edge”, so that is discarded. So it is very important that we have to select minVal and maxVal accordingly to get the correct result.\n",
 84 |     "\n",
 85 |     "This stage also removes small pixels noises on the assumption that edges are long lines.\n",
 86 |     "\n",
 87 |     "So what we finally get is strong edges in the image.\n",
 88 |     "\n",
 89 |     "#### Canny Edge Detection in OpenCV\n",
 90 |     "\n",
 91 |     "OpenCV puts all the above in single function, cv2.Canny(). We will see how to use it. First argument is our input image. Second and third arguments are our minVal and maxVal respectively. Third argument is aperture_size. It is the size of Sobel kernel used for find image gradients. By default it is 3. Last argument is L2gradient which specifies the equation for finding gradient magnitude. If it is True, it uses the equation mentioned above which is more accurate, otherwise it uses this function: Edge\\_Gradient (G) = |G_x| + |G_y|. By default, it is False."
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 1,
 97 |    "metadata": {
 98 |     "collapsed": true
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "import cv2\n",
103 |     "import numpy as np\n",
104 |     "from matplotlib import pyplot as plt\n",
105 |     "\n",
106 |     "img = cv2.imread('images/test.jpg',0)\n",
107 |     "edges = cv2.Canny(img,100,200)\n",
108 |     "\n",
109 |     "plt.subplot(121),plt.imshow(img,cmap = 'gray')\n",
110 |     "plt.title('Original Image'), plt.xticks([]), plt.yticks([])\n",
111 |     "plt.subplot(122),plt.imshow(edges,cmap = 'gray')\n",
112 |     "plt.title('Edge Image'), plt.xticks([]), plt.yticks([])\n",
113 |     "\n",
114 |     "plt.show()"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "See the result below:\n",
122 |     "<img src=\"images/canny1.jpg\">"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {
129 |     "collapsed": true
130 |    },
131 |    "outputs": [],
132 |    "source": []
133 |   }
134 |  ],
135 |  "metadata": {
136 |   "kernelspec": {
137 |    "display_name": "Python 2",
138 |    "language": "python",
139 |    "name": "python2"
140 |   },
141 |   "language_info": {
142 |    "codemirror_mode": {
143 |     "name": "ipython",
144 |     "version": 2
145 |    },
146 |    "file_extension": ".py",
147 |    "mimetype": "text/x-python",
148 |    "name": "python",
149 |    "nbconvert_exporter": "python",
150 |    "pygments_lexer": "ipython2",
151 |    "version": "2.7.3"
152 |   }
153 |  },
154 |  "nbformat": 4,
155 |  "nbformat_minor": 0
156 | }
157 | 


--------------------------------------------------------------------------------
/16-OpenCV/OpenCV - Geometric Transformations of Images.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Geometric Transformations of Images"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "#### Goals\n",
 15 |     "\n",
 16 |     "- Learn to apply different geometric transformation to images like translation, rotation, affine transformation etc.\n",
 17 |     "\n",
 18 |     "- You will see these functions: cv2.getPerspectiveTransform"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {
 24 |     "collapsed": false
 25 |    },
 26 |    "source": [
 27 |     "#### Transformations\n",
 28 |     "OpenCV provides two transformation functions, cv2.warpAffine and cv2.warpPerspective, with which you can have all kinds of transformations. cv2.warpAffine takes a 2x3 transformation matrix while cv2.warpPerspective takes a 3x3 transformation matrix as input.\n",
 29 |     "\n",
 30 |     "#### Scaling\n",
 31 |     "Scaling is just resizing of the image. OpenCV comes with a function cv2.resize() for this purpose. The size of the image can be specified manually, or you can specify the scaling factor. Different interpolation methods are used. Preferable interpolation methods are cv2.INTER_AREA for shrinking and cv2.INTER_CUBIC (slow) & cv2.INTER_LINEAR for zooming. By default, interpolation method used is cv2.INTER_LINEAR for all resizing purposes. You can resize an input image either of following methods:"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {
 38 |     "collapsed": false
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "import cv2\n",
 43 |     "import numpy as np\n",
 44 |     "\n",
 45 |     "img = cv2.imread('images/opencv_test1.jpg')\n",
 46 |     "\n",
 47 |     "res = cv2.resize(img,None,fx=2, fy=2, interpolation = cv2.INTER_CUBIC)\n",
 48 |     "\n",
 49 |     "#OR\n",
 50 |     "\n",
 51 |     "height, width = img.shape[:2]\n",
 52 |     "res = cv2.resize(img,(2*width, 2*height), interpolation = cv2.INTER_CUBIC)"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "#### Translation \n",
 60 |     "Translation is the shifting of object’s location. If you know the shift in (x,y) direction, let it be (t_x,t_y), you can create the transformation matrix \\textbf{M} as follows:\n",
 61 |     "\n",
 62 |     "M = \\begin{bmatrix} 1 & 0 & t_x \\\\ 0 & 1 & t_y  \\end{bmatrix}\n",
 63 |     "\n",
 64 |     "You can take make it into a Numpy array of type np.float32 and pass it into cv2.warpAffine() function. See below example for a shift of (100,50):"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {
 71 |     "collapsed": true
 72 |    },
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "import cv2\n",
 76 |     "import numpy as np\n",
 77 |     "\n",
 78 |     "img = cv2.imread('images/opencv_test1.jpg',0)\n",
 79 |     "rows,cols = img.shape\n",
 80 |     "\n",
 81 |     "M = np.float32([[1,0,100],[0,1,50]])\n",
 82 |     "dst = cv2.warpAffine(img,M,(cols,rows))\n",
 83 |     "\n",
 84 |     "cv2.imshow('img',dst)\n",
 85 |     "cv2.waitKey(0)\n",
 86 |     "cv2.destroyAllWindows()"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "metadata": {},
 92 |    "source": [
 93 |     "Warning\n",
 94 |     "\n",
 95 |     "Third argument of the cv2.warpAffine() function is the size of the output image, which should be in the form of (width, height). Remember width = number of columns, and height = number of rows."
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "See the result below:"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "markdown",
107 |    "metadata": {},
108 |    "source": [
109 |     "<img src=\"images/translation.jpg\">"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "metadata": {
115 |     "collapsed": false
116 |    },
117 |    "source": [
118 |     "#### Rotation\n",
119 |     "Rotation of an image for an angle \\theta is achieved by the transformation matrix of the form\n",
120 |     "\n",
121 |     "M = \\begin{bmatrix} cos\\theta & -sin\\theta \\\\ sin\\theta & cos\\theta   \\end{bmatrix}\n",
122 |     "\n",
123 |     "But OpenCV provides scaled rotation with adjustable center of rotation so that you can rotate at any location you prefer. Modified transformation matrix is given by\n",
124 |     "\n",
125 |     "\\begin{bmatrix} \\alpha &  \\beta & (1- \\alpha )  \\cdot center.x -  \\beta \\cdot center.y \\\\ - \\beta &  \\alpha &  \\beta \\cdot center.x + (1- \\alpha )  \\cdot center.y \\end{bmatrix}\n",
126 |     "\n",
127 |     "where:\n",
128 |     "\n",
129 |     "\\begin{array}{l} \\alpha =  scale \\cdot \\cos \\theta , \\\\ \\beta =  scale \\cdot \\sin \\theta \\end{array}\n",
130 |     "\n",
131 |     "To find this transformation matrix, OpenCV provides a function, cv2.getRotationMatrix2D. Check below example which rotates the image by 90 degree with respect to center without any scaling."
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {
138 |     "collapsed": false
139 |    },
140 |    "outputs": [],
141 |    "source": [
142 |     "img = cv2.imread('images/opencv_test1.jpg',0)\n",
143 |     "rows,cols = img.shape\n",
144 |     "\n",
145 |     "M = cv2.getRotationMatrix2D((cols/2,rows/2),90,1)\n",
146 |     "dst = cv2.warpAffine(img,M,(cols,rows))"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "<img src=\"images/rotation.jpg\">"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "markdown",
158 |    "metadata": {},
159 |    "source": [
160 |     "#### Affine Transformation\n",
161 |     "In affine transformation, all parallel lines in the original image will still be parallel in the output image. To find the transformation matrix, we need three points from input image and their corresponding locations in output image. Then cv2.getAffineTransform will create a 2x3 matrix which is to be passed to cv2.warpAffine.\n",
162 |     "\n",
163 |     "Check below example, and also look at the points I selected (which are marked in Green color):"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {
170 |     "collapsed": false
171 |    },
172 |    "outputs": [],
173 |    "source": [
174 |     "img = cv2.imread('images/opencv_test1.jpg')\n",
175 |     "rows,cols,ch = img.shape\n",
176 |     "\n",
177 |     "pts1 = np.float32([[50,50],[200,50],[50,200]])\n",
178 |     "pts2 = np.float32([[10,100],[200,50],[100,250]])\n",
179 |     "\n",
180 |     "M = cv2.getAffineTransform(pts1,pts2)\n",
181 |     "\n",
182 |     "dst = cv2.warpAffine(img,M,(cols,rows))\n",
183 |     "\n",
184 |     "plt.subplot(121),plt.imshow(img),plt.title('Input')\n",
185 |     "plt.subplot(122),plt.imshow(dst),plt.title('Output')\n",
186 |     "plt.show()"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "markdown",
191 |    "metadata": {},
192 |    "source": [
193 |     "See the result:\n",
194 |     "<img src=\"images/affine.jpg\">"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "markdown",
199 |    "metadata": {},
200 |    "source": [
201 |     "#### Perspective Transformation\n",
202 |     "For perspective transformation, you need a 3x3 transformation matrix. Straight lines will remain straight even after the transformation. To find this transformation matrix, you need 4 points on the input image and corresponding points on the output image. Among these 4 points, 3 of them should not be collinear. Then transformation matrix can be found by the function cv2.getPerspectiveTransform. Then apply cv2.warpPerspective with this 3x3 transformation matrix.\n",
203 |     "\n",
204 |     "See the code below:"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "metadata": {
211 |     "collapsed": false
212 |    },
213 |    "outputs": [],
214 |    "source": [
215 |     "img = cv2.imread('sudokusmall.png')\n",
216 |     "rows,cols,ch = img.shape\n",
217 |     "\n",
218 |     "pts1 = np.float32([[56,65],[368,52],[28,387],[389,390]])\n",
219 |     "pts2 = np.float32([[0,0],[300,0],[0,300],[300,300]])\n",
220 |     "\n",
221 |     "M = cv2.getPerspectiveTransform(pts1,pts2)\n",
222 |     "\n",
223 |     "dst = cv2.warpPerspective(img,M,(300,300))\n",
224 |     "\n",
225 |     "plt.subplot(121),plt.imshow(img),plt.title('Input')\n",
226 |     "plt.subplot(122),plt.imshow(dst),plt.title('Output')\n",
227 |     "plt.show()\n"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "markdown",
232 |    "metadata": {},
233 |    "source": [
234 |     "Result:\n",
235 |     "<img src=\"images/perspective.jpg\">"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": null,
241 |    "metadata": {
242 |     "collapsed": true
243 |    },
244 |    "outputs": [],
245 |    "source": []
246 |   }
247 |  ],
248 |  "metadata": {
249 |   "kernelspec": {
250 |    "display_name": "Python 2",
251 |    "language": "python",
252 |    "name": "python2"
253 |   },
254 |   "language_info": {
255 |    "codemirror_mode": {
256 |     "name": "ipython",
257 |     "version": 2
258 |    },
259 |    "file_extension": ".py",
260 |    "mimetype": "text/x-python",
261 |    "name": "python",
262 |    "nbconvert_exporter": "python",
263 |    "pygments_lexer": "ipython2",
264 |    "version": "2.7.3"
265 |   }
266 |  },
267 |  "nbformat": 4,
268 |  "nbformat_minor": 0
269 | }
270 | 


--------------------------------------------------------------------------------
/16-OpenCV/OpenCV - Histograms in OpenCV - 2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Histograms - 2: Histogram Equalization"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "#### Goal\n",
 15 |     "In this section,\n",
 16 |     "\n",
 17 |     "- We will learn the concepts of histogram equalization and use it to improve the contrast of our images.\n",
 18 |     "\n",
 19 |     "### Theory\n",
 20 |     "So what is histogram ? You can consider histogram as a graph or plot, which gives you an overall idea about the intensity distribution of an image. It is a plot with pixel values (ranging from 0 to 255, not always) in X-axis and corresponding number of pixels in the image on Y-axis.\n",
 21 |     "\n",
 22 |     "It is just another way of understanding the image. By looking at the histogram of an image, you get intuition about contrast, brightness, intensity distribution etc of that image. Almost all image processing tools today, provides features on histogram. Below is an image from Cambridge in Color website, and I recommend you to visit the site for more details."
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {
 28 |     "collapsed": true
 29 |    },
 30 |    "source": [
 31 |     "<img src=\"images/histogram_equalization.png\">"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "metadata": {},
 37 |    "source": [
 38 |     "I would recommend you to read the wikipedia page on Histogram Equalization for more details about it. It has a very good explanation with worked out examples, so that you would understand almost everything after reading that. Instead, here we will see its Numpy implementation. After that, we will see OpenCV function."
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 1,
 44 |    "metadata": {
 45 |     "collapsed": true
 46 |    },
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "import cv2\n",
 50 |     "import numpy as np\n",
 51 |     "from matplotlib import pyplot as plt\n",
 52 |     "\n",
 53 |     "img = cv2.imread('images/test.jpg',0)\n",
 54 |     "\n",
 55 |     "hist,bins = np.histogram(img.flatten(),256,[0,256])\n",
 56 |     "\n",
 57 |     "cdf = hist.cumsum()\n",
 58 |     "cdf_normalized = cdf * hist.max()/ cdf.max()\n",
 59 |     "\n",
 60 |     "plt.plot(cdf_normalized, color = 'b')\n",
 61 |     "plt.hist(img.flatten(),256,[0,256], color = 'r')\n",
 62 |     "plt.xlim([0,256])\n",
 63 |     "plt.legend(('cdf','histogram'), loc = 'upper left')\n",
 64 |     "plt.show()"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "<img src=\"images/histeq_numpy1.jpg\">"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "metadata": {
 77 |     "collapsed": true
 78 |    },
 79 |    "source": [
 80 |     "You can see histogram lies in brighter region. We need the full spectrum. For that, we need a transformation function which maps the input pixels in brighter region to output pixels in full region. That is what histogram equalization does.\n",
 81 |     "\n",
 82 |     "Now we find the minimum histogram value (excluding 0) and apply the histogram equalization equation as given in wiki page. But I have used here, the masked array concept array from Numpy. For masked array, all operations are performed on non-masked elements. You can read more about it from Numpy docs on masked arrays."
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 2,
 88 |    "metadata": {
 89 |     "collapsed": true
 90 |    },
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "cdf_m = np.ma.masked_equal(cdf,0)\n",
 94 |     "cdf_m = (cdf_m - cdf_m.min())*255/(cdf_m.max()-cdf_m.min())\n",
 95 |     "cdf = np.ma.filled(cdf_m,0).astype('uint8')"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {
101 |     "collapsed": true
102 |    },
103 |    "source": [
104 |     "Now we have the look-up table that gives us the information on what is the output pixel value for every input pixel value. So we just apply the transform."
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 3,
110 |    "metadata": {
111 |     "collapsed": true
112 |    },
113 |    "outputs": [],
114 |    "source": [
115 |     "img2 = cdf[img]"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {
121 |     "collapsed": true
122 |    },
123 |    "source": [
124 |     "Now we calculate its histogram and cdf as before ( you do it) and result looks like below :"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "markdown",
129 |    "metadata": {
130 |     "collapsed": true
131 |    },
132 |    "source": [
133 |     "<img src=\"images/histeq_numpy2.jpg\">"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "metadata": {
139 |     "collapsed": true
140 |    },
141 |    "source": [
142 |     "Another important feature is that, even if the image was a darker image (instead of a brighter one we used), after equalization we will get almost the same image as we got. As a result, this is used as a “reference tool” to make all images with same lighting conditions. This is useful in many cases. For example, in face recognition, before training the face data, the images of faces are histogram equalized to make them all with same lighting conditions.\n",
143 |     "\n",
144 |     "### Histograms Equalization in OpenCV\n",
145 |     "OpenCV has a function to do this, cv2.equalizeHist(). Its input is just grayscale image and output is our histogram equalized image.\n",
146 |     "\n",
147 |     "Below is a simple code snippet showing its usage for same image we used :"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 4,
153 |    "metadata": {
154 |     "collapsed": false
155 |    },
156 |    "outputs": [
157 |     {
158 |      "data": {
159 |       "text/plain": [
160 |        "True"
161 |       ]
162 |      },
163 |      "execution_count": 4,
164 |      "metadata": {},
165 |      "output_type": "execute_result"
166 |     }
167 |    ],
168 |    "source": [
169 |     "img = cv2.imread('images/test.jpg',0)\n",
170 |     "equ = cv2.equalizeHist(img)\n",
171 |     "res = np.hstack((img,equ)) #stacking images side-by-side\n",
172 |     "cv2.imwrite('images/res.png',res)"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "markdown",
177 |    "metadata": {},
178 |    "source": [
179 |     "So now you can take different images with different light conditions, equalize it and check the results.\n",
180 |     "\n",
181 |     "Histogram equalization is good when histogram of the image is confined to a particular region. It won’t work good in places where there is large intensity variations where histogram covers a large region, ie both bright and dark pixels are present. Please check the SOF links in Additional Resources.\n",
182 |     "\n",
183 |     "### CLAHE (Contrast Limited Adaptive Histogram Equalization)\n",
184 |     "The first histogram equalization we just saw, considers the global contrast of the image. In many cases, it is not a good idea. For example, below image shows an input image and its result after global histogram equalization."
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {
190 |     "collapsed": true
191 |    },
192 |    "source": [
193 |     "<img src=\"images/clahe_1.jpg\">"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "markdown",
198 |    "metadata": {},
199 |    "source": [
200 |     "It is true that the background contrast has improved after histogram equalization. But compare the face of statue in both images. We lost most of the information there due to over-brightness. It is because its histogram is not confined to a particular region as we saw in previous cases (Try to plot histogram of input image, you will get more intuition).\n",
201 |     "\n",
202 |     "So to solve this problem, adaptive histogram equalization is used. In this, image is divided into small blocks called “tiles” (tileSize is 8x8 by default in OpenCV). Then each of these blocks are histogram equalized as usual. So in a small area, histogram would confine to a small region (unless there is noise). If noise is there, it will be amplified. To avoid this, contrast limiting is applied. If any histogram bin is above the specified contrast limit (by default 40 in OpenCV), those pixels are clipped and distributed uniformly to other bins before applying histogram equalization. After equalization, to remove artifacts in tile borders, bilinear interpolation is applied.\n",
203 |     "\n",
204 |     "Below code snippet shows how to apply CLAHE in OpenCV:"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": 5,
210 |    "metadata": {
211 |     "collapsed": false
212 |    },
213 |    "outputs": [
214 |     {
215 |      "data": {
216 |       "text/plain": [
217 |        "True"
218 |       ]
219 |      },
220 |      "execution_count": 5,
221 |      "metadata": {},
222 |      "output_type": "execute_result"
223 |     }
224 |    ],
225 |    "source": [
226 |     "import numpy as np\n",
227 |     "import cv2\n",
228 |     "\n",
229 |     "img = cv2.imread('images/test.png',0)\n",
230 |     "\n",
231 |     "# create a CLAHE object (Arguments are optional).\n",
232 |     "clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))\n",
233 |     "cl1 = clahe.apply(img)\n",
234 |     "\n",
235 |     "cv2.imwrite('images/clahe_2.jpg',cl1)"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "markdown",
240 |    "metadata": {},
241 |    "source": [
242 |     "<img src=\"images/clahe_3.jpg\">"
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": null,
248 |    "metadata": {
249 |     "collapsed": true
250 |    },
251 |    "outputs": [],
252 |    "source": []
253 |   }
254 |  ],
255 |  "metadata": {
256 |   "kernelspec": {
257 |    "display_name": "Python 2",
258 |    "language": "python",
259 |    "name": "python2"
260 |   },
261 |   "language_info": {
262 |    "codemirror_mode": {
263 |     "name": "ipython",
264 |     "version": 2
265 |    },
266 |    "file_extension": ".py",
267 |    "mimetype": "text/x-python",
268 |    "name": "python",
269 |    "nbconvert_exporter": "python",
270 |    "pygments_lexer": "ipython2",
271 |    "version": "2.7.3"
272 |   }
273 |  },
274 |  "nbformat": 4,
275 |  "nbformat_minor": 0
276 | }
277 | 


--------------------------------------------------------------------------------
/16-OpenCV/OpenCV - Image Gradients.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Image Gradients"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {
 13 |     "collapsed": true
 14 |    },
 15 |    "source": [
 16 |     "#### Goal\n",
 17 |     "\n",
 18 |     "In this chapter, we will learn to:\n",
 19 |     "\n",
 20 |     "- Find Image gradients, edges etc\n",
 21 |     "\n",
 22 |     "- We will see following functions : cv2.Sobel(), cv2.Scharr(), cv2.Laplacian() etc\n",
 23 |     "\n",
 24 |     "#### Theory\n",
 25 |     "\n",
 26 |     "OpenCV provides three types of gradient filters or High-pass filters, Sobel, Scharr and Laplacian. We will see each one of them."
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {
 32 |     "collapsed": false
 33 |    },
 34 |    "source": [
 35 |     "#### 1. Sobel and Scharr Derivatives\n",
 36 |     "Sobel operators is a joint Gausssian smoothing plus differentiation operation, so it is more resistant to noise. You can specify the direction of derivatives to be taken, vertical or horizontal (by the arguments, yorder and xorder respectively). You can also specify the size of kernel by the argument ksize. If ksize = -1, a 3x3 Scharr filter is used which gives better results than 3x3 Sobel filter. Please see the docs for kernels used.\n",
 37 |     "\n",
 38 |     "#### 2. Laplacian Derivatives\n",
 39 |     "It calculates the Laplacian of the image given by the relation, \\Delta src = \\frac{\\partial ^2{src}}{\\partial x^2} + \\frac{\\partial ^2{src}}{\\partial y^2} where each derivative is found using Sobel derivatives. If ksize = 1, then following kernel is used for filtering:\n",
 40 |     "\n",
 41 |     "kernel = \\begin{bmatrix} 0 & 1 & 0 \\\\ 1 & -4 & 1 \\\\ 0 & 1 & 0  \\end{bmatrix}\n",
 42 |     "\n",
 43 |     "#### Code\n",
 44 |     "Below code shows all operators in a single diagram. All kernels are of 5x5 size. Depth of output image is passed -1 to get the result in np.uint8 type."
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 1,
 50 |    "metadata": {
 51 |     "collapsed": true
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "import cv2\n",
 56 |     "import numpy as np\n",
 57 |     "from matplotlib import pyplot as plt\n",
 58 |     "\n",
 59 |     "img = cv2.imread('images/test.jpg',0)\n",
 60 |     "\n",
 61 |     "laplacian = cv2.Laplacian(img,cv2.CV_64F)\n",
 62 |     "sobelx = cv2.Sobel(img,cv2.CV_64F,1,0,ksize=5)\n",
 63 |     "sobely = cv2.Sobel(img,cv2.CV_64F,0,1,ksize=5)\n",
 64 |     "\n",
 65 |     "plt.subplot(2,2,1),plt.imshow(img,cmap = 'gray')\n",
 66 |     "plt.title('Original'), plt.xticks([]), plt.yticks([])\n",
 67 |     "plt.subplot(2,2,2),plt.imshow(laplacian,cmap = 'gray')\n",
 68 |     "plt.title('Laplacian'), plt.xticks([]), plt.yticks([])\n",
 69 |     "plt.subplot(2,2,3),plt.imshow(sobelx,cmap = 'gray')\n",
 70 |     "plt.title('Sobel X'), plt.xticks([]), plt.yticks([])\n",
 71 |     "plt.subplot(2,2,4),plt.imshow(sobely,cmap = 'gray')\n",
 72 |     "plt.title('Sobel Y'), plt.xticks([]), plt.yticks([])\n",
 73 |     "\n",
 74 |     "plt.show()"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {
 80 |     "collapsed": true
 81 |    },
 82 |    "source": [
 83 |     "Result:\n",
 84 |     "<img src=\"images/gradients.jpg\">"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {},
 90 |    "source": [
 91 |     "#### One Important Matter!\n",
 92 |     "In our last example, output datatype is cv2.CV_8U or np.uint8. But there is a slight problem with that. Black-to-White transition is taken as Positive slope (it has a positive value) while White-to-Black transition is taken as a Negative slope (It has negative value). So when you convert data to np.uint8, all negative slopes are made zero. In simple words, you miss that edge.\n",
 93 |     "\n",
 94 |     "If you want to detect both edges, better option is to keep the output datatype to some higher forms, like cv2.CV_16S, cv2.CV_64F etc, take its absolute value and then convert back to cv2.CV_8U. Below code demonstrates this procedure for a horizontal Sobel filter and difference in results."
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 2,
100 |    "metadata": {
101 |     "collapsed": true
102 |    },
103 |    "outputs": [],
104 |    "source": [
105 |     "import cv2\n",
106 |     "import numpy as np\n",
107 |     "from matplotlib import pyplot as plt\n",
108 |     "\n",
109 |     "img = cv2.imread('images/test.png',0)\n",
110 |     "\n",
111 |     "# Output dtype = cv2.CV_8U\n",
112 |     "sobelx8u = cv2.Sobel(img,cv2.CV_8U,1,0,ksize=5)\n",
113 |     "\n",
114 |     "# Output dtype = cv2.CV_64F. Then take its absolute and convert to cv2.CV_8U\n",
115 |     "sobelx64f = cv2.Sobel(img,cv2.CV_64F,1,0,ksize=5)\n",
116 |     "abs_sobel64f = np.absolute(sobelx64f)\n",
117 |     "sobel_8u = np.uint8(abs_sobel64f)\n",
118 |     "\n",
119 |     "plt.subplot(1,3,1),plt.imshow(img,cmap = 'gray')\n",
120 |     "plt.title('Original'), plt.xticks([]), plt.yticks([])\n",
121 |     "plt.subplot(1,3,2),plt.imshow(sobelx8u,cmap = 'gray')\n",
122 |     "plt.title('Sobel CV_8U'), plt.xticks([]), plt.yticks([])\n",
123 |     "plt.subplot(1,3,3),plt.imshow(sobel_8u,cmap = 'gray')\n",
124 |     "plt.title('Sobel abs(CV_64F)'), plt.xticks([]), plt.yticks([])\n",
125 |     "\n",
126 |     "plt.show()"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "Check the result below:\n",
134 |     "<img src=\"images/double_edge.jpg\">"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {
141 |     "collapsed": true
142 |    },
143 |    "outputs": [],
144 |    "source": []
145 |   }
146 |  ],
147 |  "metadata": {
148 |   "kernelspec": {
149 |    "display_name": "Python 2",
150 |    "language": "python",
151 |    "name": "python2"
152 |   },
153 |   "language_info": {
154 |    "codemirror_mode": {
155 |     "name": "ipython",
156 |     "version": 2
157 |    },
158 |    "file_extension": ".py",
159 |    "mimetype": "text/x-python",
160 |    "name": "python",
161 |    "nbconvert_exporter": "python",
162 |    "pygments_lexer": "ipython2",
163 |    "version": "2.7.3"
164 |   }
165 |  },
166 |  "nbformat": 4,
167 |  "nbformat_minor": 0
168 | }
169 | 


--------------------------------------------------------------------------------
/16-OpenCV/OpenCV - Image Pyramids.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Image Pyramids"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {
 13 |     "collapsed": true
 14 |    },
 15 |    "source": [
 16 |     "#### Goal\n",
 17 |     "In this chapter,\n",
 18 |     "\n",
 19 |     "- We will learn about Image Pyramids\n",
 20 |     "\n",
 21 |     "- We will use Image pyramids to create a new fruit, “Orapple”\n",
 22 |     "\n",
 23 |     "- We will see these functions: cv2.pyrUp(), cv2.pyrDown()\n",
 24 |     "\n",
 25 |     "\n",
 26 |     "#### Theory\n",
 27 |     "Normally, we used to work with an image of constant size. But in some occassions, we need to work with images of different resolution of the same image. For example, while searching for something in an image, like face, we are not sure at what size the object will be present in the image. In that case, we will need to create a set of images with different resolution and search for object in all the images. These set of images with different resolution are called Image Pyramids (because when they are kept in a stack with biggest image at bottom and smallest image at top look like a pyramid).\n",
 28 |     "\n",
 29 |     "There are two kinds of Image Pyramids. 1) Gaussian Pyramid and 2) Laplacian Pyramids\n",
 30 |     "\n",
 31 |     "Higher level (Low resolution) in a Gaussian Pyramid is formed by removing consecutive rows and columns in Lower level (higher resolution) image. Then each pixel in higher level is formed by the contribution from 5 pixels in underlying level with gaussian weights. By doing so, a M \\times N image becomes M/2 \\times N/2 image. So area reduces to one-fourth of original area. It is called an Octave. The same pattern continues as we go upper in pyramid (ie, resolution decreases). Similarly while expanding, area becomes 4 times in each level. We can find Gaussian pyramids using cv2.pyrDown() and cv2.pyrUp() functions."
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 3,
 37 |    "metadata": {
 38 |     "collapsed": false
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "import cv2\n",
 43 |     "img = cv2.imread('images/test.jpg')\n",
 44 |     "lower_reso = cv2.pyrDown(img)"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "Below is the 4 levels in an image pyramid."
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {
 57 |     "collapsed": false
 58 |    },
 59 |    "source": [
 60 |     "<img src=\"images/messipyr.jpg\">"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {
 66 |     "collapsed": true
 67 |    },
 68 |    "source": [
 69 |     "Now you can go down the image pyramid with cv2.pyrUp() function."
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 4,
 75 |    "metadata": {
 76 |     "collapsed": true
 77 |    },
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "higher_reso2 = cv2.pyrUp(img)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "Remember, higher_reso2 is not equal to higher_reso, because once you decrease the resolution, you loose the information. Below image is 3 level down the pyramid created from smallest image in previous case. Compare it with original image:"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "metadata": {
 93 |     "collapsed": true
 94 |    },
 95 |    "source": [
 96 |     "<img src=\"images/messiup.jpg\">"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "Laplacian Pyramids are formed from the Gaussian Pyramids. There is no exclusive function for that. Laplacian pyramid images are like edge images only. Most of its elements are zeros. They are used in image compression. A level in Laplacian Pyramid is formed by the difference between that level in Gaussian Pyramid and expanded version of its upper level in Gaussian Pyramid. The three levels of a Laplacian level will look like below (contrast is adjusted to enhance the contents):"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "<img src=\"images/lap.jpg\">"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "markdown",
115 |    "metadata": {
116 |     "collapsed": true
117 |    },
118 |    "source": [
119 |     "#### Image Blending using Pyramids\n",
120 |     "One application of Pyramids is Image Blending. For example, in image stitching, you will need to stack two images together, but it may not look good due to discontinuities between images. In that case, image blending with Pyramids gives you seamless blending without leaving much data in the images. One classical example of this is the blending of two fruits, Orange and Apple. See the result now itself to understand what I am saying:"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {},
126 |    "source": [
127 |     "<img src=\"images/orapple.jpg\">"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {
133 |     "collapsed": true
134 |    },
135 |    "source": [
136 |     "Please check first reference in additional resources, it has full diagramatic details on image blending, Laplacian Pyramids etc. Simply it is done as follows:\n",
137 |     "\n",
138 |     "1. Load the two images of apple and orange\n",
139 |     "\n",
140 |     "2. Find the Gaussian Pyramids for apple and orange (in this particular example, number of levels is 6)\n",
141 |     "\n",
142 |     "3. From Gaussian Pyramids, find their Laplacian Pyramids\n",
143 |     "\n",
144 |     "4. Now join the left half of apple and right half of orange in each levels of Laplacian Pyramids\n",
145 |     "\n",
146 |     "5. Finally from this joint image pyramids, reconstruct the original image.\n",
147 |     "\n",
148 |     "\n",
149 |     "Below is the full code. (For sake of simplicity, each step is done separately which may take more memory. You can optimize it if you want so)."
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {
156 |     "collapsed": true
157 |    },
158 |    "outputs": [],
159 |    "source": [
160 |     "import cv2\n",
161 |     "import numpy as np,sys\n",
162 |     "\n",
163 |     "A = cv2.imread('apple.jpg')\n",
164 |     "B = cv2.imread('orange.jpg')\n",
165 |     "\n",
166 |     "# generate Gaussian pyramid for A\n",
167 |     "G = A.copy()\n",
168 |     "gpA = [G]\n",
169 |     "for i in xrange(6):\n",
170 |     "    G = cv2.pyrDown(G)\n",
171 |     "    gpA.append(G)\n",
172 |     "\n",
173 |     "# generate Gaussian pyramid for B\n",
174 |     "G = B.copy()\n",
175 |     "gpB = [G]\n",
176 |     "for i in xrange(6):\n",
177 |     "    G = cv2.pyrDown(G)\n",
178 |     "    gpB.append(G)\n",
179 |     "\n",
180 |     "# generate Laplacian Pyramid for A\n",
181 |     "lpA = [gpA[5]]\n",
182 |     "for i in xrange(5,0,-1):\n",
183 |     "    GE = cv2.pyrUp(gpA[i])\n",
184 |     "    L = cv2.subtract(gpA[i-1],GE)\n",
185 |     "    lpA.append(L)\n",
186 |     "\n",
187 |     "# generate Laplacian Pyramid for B\n",
188 |     "lpB = [gpB[5]]\n",
189 |     "for i in xrange(5,0,-1):\n",
190 |     "    GE = cv2.pyrUp(gpB[i])\n",
191 |     "    L = cv2.subtract(gpB[i-1],GE)\n",
192 |     "    lpB.append(L)\n",
193 |     "\n",
194 |     "# Now add left and right halves of images in each level\n",
195 |     "LS = []\n",
196 |     "for la,lb in zip(lpA,lpB):\n",
197 |     "    rows,cols,dpt = la.shape\n",
198 |     "    ls = np.hstack((la[:,0:cols/2], lb[:,cols/2:]))\n",
199 |     "    LS.append(ls)\n",
200 |     "\n",
201 |     "# now reconstruct\n",
202 |     "ls_ = LS[0]\n",
203 |     "for i in xrange(1,6):\n",
204 |     "    ls_ = cv2.pyrUp(ls_)\n",
205 |     "    ls_ = cv2.add(ls_, LS[i])\n",
206 |     "\n",
207 |     "# image with direct connecting each half\n",
208 |     "real = np.hstack((A[:,:cols/2],B[:,cols/2:]))\n",
209 |     "\n",
210 |     "cv2.imwrite('Pyramid_blending2.jpg',ls_)\n",
211 |     "cv2.imwrite('Direct_blending.jpg',real)"
212 |    ]
213 |   }
214 |  ],
215 |  "metadata": {
216 |   "kernelspec": {
217 |    "display_name": "Python 2",
218 |    "language": "python",
219 |    "name": "python2"
220 |   },
221 |   "language_info": {
222 |    "codemirror_mode": {
223 |     "name": "ipython",
224 |     "version": 2
225 |    },
226 |    "file_extension": ".py",
227 |    "mimetype": "text/x-python",
228 |    "name": "python",
229 |    "nbconvert_exporter": "python",
230 |    "pygments_lexer": "ipython2",
231 |    "version": "2.7.3"
232 |   }
233 |  },
234 |  "nbformat": 4,
235 |  "nbformat_minor": 0
236 | }
237 | 


--------------------------------------------------------------------------------
/16-OpenCV/OpenCV - Morphological Transformations.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Morphological Transformations"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {
 13 |     "collapsed": true
 14 |    },
 15 |    "source": [
 16 |     "#### Goal\n",
 17 |     "\n",
 18 |     "In this chapter,\n",
 19 |     "- We will learn different morphological operations like Erosion, Dilation, Opening, Closing etc.\n",
 20 |     "\n",
 21 |     "- We will see different functions like : cv2.erode(), cv2.dilate(), cv2.morphologyEx() etc.\n",
 22 |     "\n",
 23 |     "#### Theory\n",
 24 |     "Morphological transformations are some simple operations based on the image shape. It is normally performed on binary images. It needs two inputs, one is our original image, second one is called structuring element or kernel which decides the nature of operation. Two basic morphological operators are Erosion and Dilation. Then its variant forms like Opening, Closing, Gradient etc also comes into play. We will see them one-by-one with help of following image:"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {
 30 |     "collapsed": false
 31 |    },
 32 |    "source": [
 33 |     "Result:\n",
 34 |     "<img src=\"images/j.png\">"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {
 40 |     "collapsed": false
 41 |    },
 42 |    "source": [
 43 |     "#### 1. Erosion\n",
 44 |     "The basic idea of erosion is just like soil erosion only, it erodes away the boundaries of foreground object (Always try to keep foreground in white). So what does it do? The kernel slides through the image (as in 2D convolution). A pixel in the original image (either 1 or 0) will be considered 1 only if all the pixels under the kernel is 1, otherwise it is eroded (made to zero).\n",
 45 |     "\n",
 46 |     "So what happends is that, all the pixels near boundary will be discarded depending upon the size of kernel. So the thickness or size of the foreground object decreases or simply white region decreases in the image. It is useful for removing small white noises (as we have seen in colorspace chapter), detach two connected objects etc.\n",
 47 |     "\n",
 48 |     "Here, as an example, I would use a 5x5 kernel with full of ones. Let’s see it how it works:"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "metadata": {
 55 |     "collapsed": true
 56 |    },
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "import cv2\n",
 60 |     "import numpy as np\n",
 61 |     "\n",
 62 |     "img = cv2.imread('j.png',0)\n",
 63 |     "kernel = np.ones((5,5),np.uint8)\n",
 64 |     "erosion = cv2.erode(img,kernel,iterations = 1)"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {
 70 |     "collapsed": true
 71 |    },
 72 |    "source": [
 73 |     "Result:\n",
 74 |     "<img src=\"images/erosion.png\">"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "#### 2. Dilation\n",
 82 |     "It is just opposite of erosion. Here, a pixel element is ‘1’ if atleast one pixel under the kernel is ‘1’. So it increases the white region in the image or size of foreground object increases. Normally, in cases like noise removal, erosion is followed by dilation. Because, erosion removes white noises, but it also shrinks our object. So we dilate it. Since noise is gone, they won’t come back, but our object area increases. It is also useful in joining broken parts of an object."
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {
 89 |     "collapsed": true
 90 |    },
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "dilation = cv2.dilate(img,kernel,iterations = 1)"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "Result :\n",
101 |     "<img src=\"images/dilation.png\">"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "metadata": {
107 |     "collapsed": false
108 |    },
109 |    "source": [
110 |     "#### 3. Opening\n",
111 |     "Opening is just another name of erosion followed by dilation. It is useful in removing noise, as we explained above. Here we use the function, cv2.morphologyEx()"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {
118 |     "collapsed": false
119 |    },
120 |    "outputs": [],
121 |    "source": [
122 |     "opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "metadata": {},
128 |    "source": [
129 |     "Result :\n",
130 |     "<img src=\"images/opening.png\">"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "markdown",
135 |    "metadata": {},
136 |    "source": [
137 |     "#### 4. Closing\n",
138 |     "Closing is reverse of Opening, Dilation followed by Erosion. It is useful in closing small holes inside the foreground objects, or small black points on the object."
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {
145 |     "collapsed": false
146 |    },
147 |    "outputs": [],
148 |    "source": [
149 |     "closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {
155 |     "collapsed": true
156 |    },
157 |    "source": [
158 |     "Result:\n",
159 |     "\n",
160 |     "<img src=\"images/closing.png\">"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "metadata": {},
166 |    "source": [
167 |     "#### 5. Morphological Gradient\n",
168 |     "It is the difference between dilation and erosion of an image.\n",
169 |     "\n",
170 |     "The result will look like the outline of the object."
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": null,
176 |    "metadata": {
177 |     "collapsed": true
178 |    },
179 |    "outputs": [],
180 |    "source": [
181 |     "gradient = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, kernel)"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "markdown",
186 |    "metadata": {
187 |     "collapsed": true
188 |    },
189 |    "source": [
190 |     "Result:\n",
191 |     "\n",
192 |     "<img src=\"images/gradient.png\">"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "metadata": {},
198 |    "source": [
199 |     "#### 6. Top Hat\n",
200 |     "It is the difference between input image and Opening of the image. Below example is done for a 9x9 kernel."
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": null,
206 |    "metadata": {
207 |     "collapsed": true
208 |    },
209 |    "outputs": [],
210 |    "source": [
211 |     "tophat = cv2.morphologyEx(img, cv2.MORPH_TOPHAT, kernel)"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "markdown",
216 |    "metadata": {},
217 |    "source": [
218 |     "Result:\n",
219 |     "\n",
220 |     "<img src=\"images/tophat.png\">"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {},
226 |    "source": [
227 |     "#### 7. Black Hat\n",
228 |     "It is the difference between the closing of the input image and input image."
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": null,
234 |    "metadata": {
235 |     "collapsed": true
236 |    },
237 |    "outputs": [],
238 |    "source": [
239 |     "blackhat = cv2.morphologyEx(img, cv2.MORPH_BLACKHAT, kernel)"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "markdown",
244 |    "metadata": {},
245 |    "source": [
246 |     "Result:\n",
247 |     "\n",
248 |     "<img src=\"images/blackhat.png\">"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "markdown",
253 |    "metadata": {},
254 |    "source": [
255 |     "#### Structuring Element\n",
256 |     "We manually created a structuring elements in the previous examples with help of Numpy. It is rectangular shape. But in some cases, you may need elliptical/circular shaped kernels. So for this purpose, OpenCV has a function, cv2.getStructuringElement(). You just pass the shape and size of the kernel, you get the desired kernel."
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "code",
261 |    "execution_count": null,
262 |    "metadata": {
263 |     "collapsed": true
264 |    },
265 |    "outputs": [],
266 |    "source": [
267 |     "# Rectangular Kernel\n",
268 |     ">>> cv2.getStructuringElement(cv2.MORPH_RECT,(5,5))\n",
269 |     "array([[1, 1, 1, 1, 1],\n",
270 |     "       [1, 1, 1, 1, 1],\n",
271 |     "       [1, 1, 1, 1, 1],\n",
272 |     "       [1, 1, 1, 1, 1],\n",
273 |     "       [1, 1, 1, 1, 1]], dtype=uint8)\n",
274 |     "\n",
275 |     "# Elliptical Kernel\n",
276 |     ">>> cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))\n",
277 |     "array([[0, 0, 1, 0, 0],\n",
278 |     "       [1, 1, 1, 1, 1],\n",
279 |     "       [1, 1, 1, 1, 1],\n",
280 |     "       [1, 1, 1, 1, 1],\n",
281 |     "       [0, 0, 1, 0, 0]], dtype=uint8)\n",
282 |     "\n",
283 |     "# Cross-shaped Kernel\n",
284 |     ">>> cv2.getStructuringElement(cv2.MORPH_CROSS,(5,5))\n",
285 |     "array([[0, 0, 1, 0, 0],\n",
286 |     "       [0, 0, 1, 0, 0],\n",
287 |     "       [1, 1, 1, 1, 1],\n",
288 |     "       [0, 0, 1, 0, 0],\n",
289 |     "       [0, 0, 1, 0, 0]], dtype=uint8)"
290 |    ]
291 |   }
292 |  ],
293 |  "metadata": {
294 |   "kernelspec": {
295 |    "display_name": "Python 2",
296 |    "language": "python",
297 |    "name": "python2"
298 |   },
299 |   "language_info": {
300 |    "codemirror_mode": {
301 |     "name": "ipython",
302 |     "version": 2
303 |    },
304 |    "file_extension": ".py",
305 |    "mimetype": "text/x-python",
306 |    "name": "python",
307 |    "nbconvert_exporter": "python",
308 |    "pygments_lexer": "ipython2",
309 |    "version": "2.7.3"
310 |   }
311 |  },
312 |  "nbformat": 4,
313 |  "nbformat_minor": 0
314 | }
315 | 


--------------------------------------------------------------------------------
/18-Elastic/datasets/airports.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:e448270d8affa34d4c9d63df6fa68efa39d96f30c6b42ea59801b48a6c06fcf8
3 | size 937067
4 | 


--------------------------------------------------------------------------------
/18-Elastic/datasets/movie_metadata.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:ee65e153a601b2fe6ff4f4db87cabf715d304635bb7a662a0f7fd6db21c621bc
3 | size 1494688
4 | 


--------------------------------------------------------------------------------
/18-Elastic/datasets/table.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:ff00fe10e7cf76cb612803671844233f51f667ba3fcc0446ebe8a3f536a14898
3 | size 91149
4 | 


--------------------------------------------------------------------------------
/18-Elastic/images/01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/01.png


--------------------------------------------------------------------------------
/18-Elastic/images/download.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/download.jpg


--------------------------------------------------------------------------------
/18-Elastic/images/elastic_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/elastic_logo.png


--------------------------------------------------------------------------------
/18-Elastic/images/g01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g01.png


--------------------------------------------------------------------------------
/18-Elastic/images/g02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g02.png


--------------------------------------------------------------------------------
/18-Elastic/images/g03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g03.png


--------------------------------------------------------------------------------
/18-Elastic/images/g04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g04.png


--------------------------------------------------------------------------------
/18-Elastic/images/g05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g05.png


--------------------------------------------------------------------------------
/18-Elastic/images/g06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g06.png


--------------------------------------------------------------------------------
/18-Elastic/images/g07.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g07.png


--------------------------------------------------------------------------------
/18-Elastic/images/g08.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g08.png


--------------------------------------------------------------------------------
/18-Elastic/images/g09.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g09.png


--------------------------------------------------------------------------------
/18-Elastic/images/g10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g10.png


--------------------------------------------------------------------------------
/18-Elastic/images/g11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g11.png


--------------------------------------------------------------------------------
/18-Elastic/images/g12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g12.png


--------------------------------------------------------------------------------
/18-Elastic/images/k01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/k01.png


--------------------------------------------------------------------------------
/18-Elastic/images/k02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/k02.png


--------------------------------------------------------------------------------
/18-Elastic/images/k03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/k03.png


--------------------------------------------------------------------------------
/18-Elastic/images/k04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/k04.png


--------------------------------------------------------------------------------
/18-Elastic/images/k05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/k05.png


--------------------------------------------------------------------------------
/18-Elastic/images/k06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/k06.png


--------------------------------------------------------------------------------
/18-Elastic/images/k07.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/k07.png


--------------------------------------------------------------------------------
/18-Elastic/images/l01_ex01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/l01_ex01.png


--------------------------------------------------------------------------------
/18-Elastic/images/l01_ex03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/l01_ex03.png


--------------------------------------------------------------------------------
/18-Elastic/images/l02_ex01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/l02_ex01.png


--------------------------------------------------------------------------------
/18-Elastic/images/l02_ex03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/l02_ex03.png


--------------------------------------------------------------------------------
/18-Elastic/images/solr_vs_elasticsearch.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/solr_vs_elasticsearch.jpg


--------------------------------------------------------------------------------
/21-Slack/S1-Slack_GetPermissions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Getting the Access Code for Slack\n",
  8 |     "\n",
  9 |     "The code below illustrates how we connect to the Slack API to request an authorization token for our app. Remember that we have to register our app with Slack first, and get the `client_id`.\n",
 10 |     "\n",
 11 |     "#### Creating a Slack App\n",
 12 |     "\n",
 13 |     "* Go to https://api.slack.com/apps and create your app. You will need the \"Client ID\" and the \"Client Secret\" that is created for you.\n",
 14 |     "* Select the **\"OAuth & Permissions\"** tab from the left-hand side and add a \"Redirect URL\" for your app. The redirect URL ensures (for security) that the app can only talk to your own web server. Add `http://<yourIP>:5000/slack` as your redirect URL.\n",
 15 |     "* Select the \"Bot Users\" tab from the left-hand side and add a bot username for your app."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "\n",
 23 |     "#### Get the Web Server up and running\n",
 24 |     "\n",
 25 |     "* See the Python Script `webserver.py` that is in this folder. \n",
 26 |     "* **IMPORTANT**: Modify the CLIENT_ID, CLIENT_SECRET, and REDIRECT variables in `slack_app.json` to match those of your own Slack app.\n",
 27 |     "* You now need to start the server, so that it can receive the authentication code for the user. "
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "# the command below will run the webserver from the notebook\n",
 37 |     "# Notice that the server is configured to stop running \n",
 38 |     "# after receiving the first authorization grant and storing the access token\n",
 39 |     "%run webserver.py"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "# Alternatively, you can launch the webserver from the Terminal issuing the command\n",
 49 |     "#\n",
 50 |     "# python3 webserver.py\n"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "# Alternatively, if you want to run the server in the background, type:\n",
 60 |     "#\n",
 61 |     "# nohup ./webserver.py &\n",
 62 |     "#\n",
 63 |     "# which will put the server to run in the background\n",
 64 |     "#\n",
 65 |     "# If you need to stop the background server, you can isssue the following\n",
 66 |     "# command from the terminal, which will stop any process that contains \n",
 67 |     "# `WebServer` as part of its name\n",
 68 |     "#\n",
 69 |     "#  kill $(pgrep WebServer)"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "Now, we are ready to execute the authentication flow, which is illustrated in the picture below."
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "<img width = \"800\" src=\"https://a.slack-edge.com/bfaba/img/api/slack_oauth_flow_diagram@2x.png\">"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "#### Launch the user authentication process (Steps 1-4 in the picture above)\n",
 91 |     "\n",
 92 |     "* Now to go `http://<yourIP>:5000/install` and click the \"Install Slack Bot\" URL. You will be asked to login to your Slack account, and grant permissions to the bot to use your account credentials/permissions. This is the Step 1 in the picture above.\n",
 93 |     "\n",
 94 |     "* Once you grant permissions, then then the Slack server will call the `http://<yourIP>:5000/slack` URL and send the authentication code to the redirect URL. This is Step 2 in the picture above.\n",
 95 |     "\n",
 96 |     "* At that point, out web server, will call back Slack Service API saying \"hey, I got the code, here is the verification (my `client_secret`) that I am indeed the correct app. Can you send me the access token for the user?\" (See the `WebServer.py` code, under the \"/slack\" route.) This is Step 3 in the picture above.\n",
 97 |     "\n",
 98 |     "* The Slack service API will send us back the \"access token\" for the authenticated user. We will save this in the file `slack_secret.json`. This is Step 4 in the picture above.\n",
 99 |     "\n",
100 |     "* The webserver will stop running after a successful authorization, and the cell above will stop showing a `[*]`"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "metadata": {},
106 |    "source": [
107 |     "#### Done with the authentication, the authentication data is now stored locally.\n",
108 |     "\n",
109 |     "After we authorize the app, our web server has received from Slack the authentication token, and it was stored in the `slack_secret.json` file. Now, we are ready to proceed with Steps 5 and 6, which are the regular API calls to the Slack API."
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "import json\n",
119 |     "\n",
120 |     "# Read the access token from the file\n",
121 |     "OAUTH_FILE = 'slack_secret.json'\n",
122 |     "f = open(OAUTH_FILE, 'r') \n",
123 |     "content = f.read()\n",
124 |     "f.close()\n",
125 |     "auth_info = json.loads(content)\n",
126 |     "\n",
127 |     "auth_info"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 1,
133 |    "metadata": {
134 |     "collapsed": true
135 |    },
136 |    "outputs": [
137 |     {
138 |      "name": "stdout",
139 |      "output_type": "stream",
140 |      "text": [
141 |       "Requirement already satisfied: slackclient in /usr/local/lib/python3.5/dist-packages\n",
142 |       "Requirement already satisfied: websocket-client<1.0a0,>=0.35 in /usr/local/lib/python3.5/dist-packages (from slackclient)\n",
143 |       "Requirement already satisfied: requests<3.0a0,>=2.11 in /usr/local/lib/python3.5/dist-packages (from slackclient)\n",
144 |       "Requirement already satisfied: six<2.0a0,>=1.10 in /usr/local/lib/python3.5/dist-packages (from slackclient)\n",
145 |       "Requirement already satisfied: urllib3<1.23,>=1.21.1 in /usr/local/lib/python3.5/dist-packages (from requests<3.0a0,>=2.11->slackclient)\n",
146 |       "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.5/dist-packages (from requests<3.0a0,>=2.11->slackclient)\n",
147 |       "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.5/dist-packages (from requests<3.0a0,>=2.11->slackclient)\n",
148 |       "Requirement already satisfied: idna<2.7,>=2.5 in /usr/local/lib/python3.5/dist-packages (from requests<3.0a0,>=2.11->slackclient)\n"
149 |      ]
150 |     }
151 |    ],
152 |    "source": [
153 |     "# We will just use the library to test that the code that we get back\n",
154 |     "# allows us to connect to the Slack API\n",
155 |     "!sudo -H python3 -m pip install slackclient"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": [
164 |     "from slackclient import SlackClient\n",
165 |     "sc = SlackClient(auth_info[\"access_token\"])"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "response = sc.api_call(\"users.info\", user=auth_info[\"user_id\"])\n",
175 |     "user = response['user']"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "print(\"The username of the authenticated user is\", user.get('name'))\n",
185 |     "print(\"The email of the authenticated user is\", user.get('profile').get('email'))\n",
186 |     "print(\"The email of the authenticated user is\", user.get('profile').get('real_name'))"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": null,
192 |    "metadata": {
193 |     "collapsed": true
194 |    },
195 |    "outputs": [],
196 |    "source": []
197 |   }
198 |  ],
199 |  "metadata": {
200 |   "kernelspec": {
201 |    "display_name": "Python 3",
202 |    "language": "python",
203 |    "name": "python3"
204 |   },
205 |   "language_info": {
206 |    "codemirror_mode": {
207 |     "name": "ipython",
208 |     "version": 3
209 |    },
210 |    "file_extension": ".py",
211 |    "mimetype": "text/x-python",
212 |    "name": "python",
213 |    "nbconvert_exporter": "python",
214 |    "pygments_lexer": "ipython3",
215 |    "version": "3.5.2"
216 |   }
217 |  },
218 |  "nbformat": 4,
219 |  "nbformat_minor": 1
220 | }
221 | 


--------------------------------------------------------------------------------
/21-Slack/S2-Slack-EventProcessing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Building a Chat Bot\n",
  8 |     "\n",
  9 |     "In the examples below, we will build a very simple application that creates a simple chat bot. The bot is monitoring the actions that are happening on Slack, and posts some acknowledgement messages every time someone types a message.\n",
 10 |     "\n",
 11 |     "\n",
 12 |     "#### Authentication\n",
 13 |     "\n",
 14 |     "We start first by authenticating our application. (See notes in S1). We read the `slack_secret.json` file, and we instantiate then our SlackClient."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# Read the access token from the file\n",
 24 |     "import json\n",
 25 |     "\n",
 26 |     "secrets_file = 'slack_secret.json'\n",
 27 |     "f = open(secrets_file, 'r') \n",
 28 |     "content = f.read()\n",
 29 |     "f.close()\n",
 30 |     "\n",
 31 |     "auth_info = json.loads(content)\n",
 32 |     "token = auth_info[\"access_token\"]"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": null,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "# If you happen not to have the slackclient library installed, \n",
 42 |     "# you can uncomment and run the code below\n",
 43 |     "!sudo -H python3 -m pip install -U slackclient"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "from slackclient import SlackClient\n",
 53 |     "sc = SlackClient(token)\n",
 54 |     "response = sc.api_call(\"users.info\", user=auth_info[\"user_id\"])\n",
 55 |     "user = response['user']\n",
 56 |     "print(\"The username of the authenticated user is\", user.get('name'))\n",
 57 |     "print(\"The email of the authenticated user is\", user.get('profile').get('email'))\n",
 58 |     "print(\"The email of the authenticated user is\", user.get('profile').get('real_name'))"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "metadata": {},
 64 |    "source": [
 65 |     "### Implementing the bot\n",
 66 |     "\n",
 67 |     "The code below continuously monitors the various events on Slack. Of course, the bot can be programmed to react to any type of event. In this particular example, the bot monitors for user messages, and posts a \"thank you\" note to each user into the #bots channel."
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "import time\n",
 77 |     "import re\n",
 78 |     "\n",
 79 |     "if sc.rtm_connect():\n",
 80 |     "    # We are going to be polling the Slack API for recent events continuously\n",
 81 |     "    while True:\n",
 82 |     "        # We are going to wait 1 second between monitoring attempts\n",
 83 |     "        time.sleep(1)\n",
 84 |     "        # If there are any new events, we will get a response. If there are no events, the response will be empty\n",
 85 |     "        response = sc.rtm_read()\n",
 86 |     "        for item in response:\n",
 87 |     "            event_type = item.get(\"type\")\n",
 88 |     "            # If the event is a message and the message is written by a user (and not a bot)\n",
 89 |     "            if event_type == 'message' and item.get(\"user\")!=None:\n",
 90 |     "                print(item)\n",
 91 |     "                print(\"=========================\")\n",
 92 |     "                message = \"Thank you user {u} for participating in channel {c}\".format(u=item[\"user\"], c=item[\"channel\"])\n",
 93 |     "                sc.api_call(\"chat.postMessage\", channel=\"#bots\", text=message)"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {
 99 |     "collapsed": true
100 |    },
101 |    "source": [
102 |     "#### Exercises \n",
103 |     "\n",
104 |     "* Change the code above to retrieve the actual user name of the user, instead of the user id\n",
105 |     "* Chagne the code above to retrieve the actual name of the channel instead of the channel id\n",
106 |     "* Change the code above to post the answer to the channel where the user posted, instead of the '#bots' channel"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": []
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {},
120 |    "outputs": [],
121 |    "source": []
122 |   }
123 |  ],
124 |  "metadata": {
125 |   "kernelspec": {
126 |    "display_name": "Python 3",
127 |    "language": "python",
128 |    "name": "python3"
129 |   },
130 |   "language_info": {
131 |    "codemirror_mode": {
132 |     "name": "ipython",
133 |     "version": 3
134 |    },
135 |    "file_extension": ".py",
136 |    "mimetype": "text/x-python",
137 |    "name": "python",
138 |    "nbconvert_exporter": "python",
139 |    "pygments_lexer": "ipython3",
140 |    "version": "3.5.2"
141 |   }
142 |  },
143 |  "nbformat": 4,
144 |  "nbformat_minor": 1
145 | }
146 | 


--------------------------------------------------------------------------------
/21-Slack/images/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/1.jpg


--------------------------------------------------------------------------------
/21-Slack/images/10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/10.jpg


--------------------------------------------------------------------------------
/21-Slack/images/11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/11.jpg


--------------------------------------------------------------------------------
/21-Slack/images/12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/12.jpg


--------------------------------------------------------------------------------
/21-Slack/images/13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/13.jpg


--------------------------------------------------------------------------------
/21-Slack/images/14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/14.jpg


--------------------------------------------------------------------------------
/21-Slack/images/15.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/15.jpg


--------------------------------------------------------------------------------
/21-Slack/images/16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/16.jpg


--------------------------------------------------------------------------------
/21-Slack/images/17.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/17.jpg


--------------------------------------------------------------------------------
/21-Slack/images/18.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/18.jpg


--------------------------------------------------------------------------------
/21-Slack/images/19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/19.jpg


--------------------------------------------------------------------------------
/21-Slack/images/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/2.jpg


--------------------------------------------------------------------------------
/21-Slack/images/20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/20.jpg


--------------------------------------------------------------------------------
/21-Slack/images/21.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/21.jpg


--------------------------------------------------------------------------------
/21-Slack/images/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/3.jpg


--------------------------------------------------------------------------------
/21-Slack/images/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/4.jpg


--------------------------------------------------------------------------------
/21-Slack/images/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/5.jpg


--------------------------------------------------------------------------------
/21-Slack/images/6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/6.jpg


--------------------------------------------------------------------------------
/21-Slack/images/7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/7.jpg


--------------------------------------------------------------------------------
/21-Slack/images/8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/8.jpg


--------------------------------------------------------------------------------
/21-Slack/images/9.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/9.jpg


--------------------------------------------------------------------------------
/21-Slack/slack_app.json:
--------------------------------------------------------------------------------
1 | {
2 | "CLIENT_ID" : "PUT_YOUR_OWN_CLIENT_ID",
3 | "CLIENT_SECRET" : "PUT_YOUR_OWN_CLIENT_SECRET",
4 | "REDIRECT" : "PUT_YOUR_OWN_REDIRECT_URL",
5 | "PERMISSIONS" : "client"
6 | }
7 | 


--------------------------------------------------------------------------------
/21-Slack/slack_secret.json:
--------------------------------------------------------------------------------
1 | {"ok":false,"error":"invalid_code"}
2 | 


--------------------------------------------------------------------------------
/21-Slack/templates/install_slack_app.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous">
 5 | </head>
 6 | <body>  
 7 |     <div class="wrapper">
 8 |         <div class="col-xs-4 center-block text-center">
 9 |             <a class="btn btn-primary btn-lg" href="{{ url }}" role="button">Install Panos's Slack App</a>
10 |         </div>
11 |     </div>
12 | </body>
13 | </html>


--------------------------------------------------------------------------------
/21-Slack/webserver.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | # Flask is a webserver library
 4 | from flask import Flask, request, render_template
 5 | 
 6 | # We will use the requests library to to issue a request to Slack
 7 | # and the json library to parse it
 8 | import requests
 9 | import json
10 | 
11 | SLACK_URL = "https://slack.com/oauth/authorize"
12 | 
13 | # Edit this file to add your own client details in the slack_app.json file
14 | CONFIG_FILE = 'slack_app.json'
15 | # This is the location where we will store the authentication data from Slack
16 | OAUTH_FILE = 'slack_secret.json'
17 |  
18 | # Initialize the Flask web server
19 | # We create a folder "plots" where we are going to store
20 | # plots to post them (later on) as messages to Slack channels
21 | webserver = Flask("SlackOAuth", static_folder='plots')
22 | 
23 | # This URL will just have a link that the user clicks to install
24 | # the Slack bot
25 | @webserver.route("/install")
26 | def install_bot():
27 |     url = (SLACK_URL + 
28 |     '?response_type=code' + 
29 |     '&client_id='+ CLIENT_ID + 
30 |     '&scope=' + PERMISSIONS +
31 |     '&redirect_uri=' + REDIRECT )
32 |     
33 |     return render_template("install_slack_app.html", url=url)
34 | 
35 | # This is the place where the webserver will receive the call from Slack
36 | # The call from Slack will have a parameter "code"
37 | @webserver.route("/slack")
38 | def oauth_helper():
39 |     code = request.args.get('code')
40 |     
41 |     # Now that we got the code 
42 |     # we request the access token from Slask. Notice that we 
43 |     # use the client_secret to prove that the app is the real one
44 |     # that was registered with the Slack API
45 |     url = "https://slack.com/api/oauth.access"
46 |     params = {"grant_type": "authorization_code", 
47 |               "client_id": CLIENT_ID, 
48 |               "client_secret": CLIENT_SECRET, 
49 |               "code": code,
50 |               "redirect_uri": REDIRECT}
51 |     resp = requests.get(url, params=params)
52 |     data = json.loads(resp.text)
53 |     
54 |     # We store the code in a file as the webserver does not interact with the 
55 |     # rest of the Python code, and we also want to reuse the code in the future
56 |     # (Typically, we would store the access_token in a database.)
57 |     f = open(OAUTH_FILE, 'w') # Store the code as a file
58 |     f.write(resp.text + '\n')
59 |     f.close()
60 |     
61 |     # If we start the server just to get the code, it is safe (and convenient) 
62 |     # to shut down the web server after this request. 
63 |     # stop_server()
64 |     
65 |     # What we return here has no real impact on the functionality of the code
66 |     # Normally, we would just redirect the user to a "Thank you" page.
67 |     return '<html><body>Code: <b>'+code+'</b><p>Response:<b>'+resp.text+'</b></body></html>'
68 | 
69 | def stop_server():
70 |     shutdown_after_request = request.environ.get('werkzeug.server.shutdown')
71 |     shutdown_after_request()
72 |     return
73 | 
74 | # This allows us to server files (in our case, images)
75 | # that we create on the server.
76 | @webserver.route('/plots/<path:path>')
77 | def static_proxy(path):
78 |     return webserver.send_static_file(path)
79 |     
80 | 
81 | if __name__ == '__main__':
82 |     
83 |     # We open the CONFIG file here and read the details for the app
84 |     f = open(CONFIG_FILE, 'r') 
85 |     content = f.read()
86 |     f.close()
87 |     config= json.loads(content)
88 |     CLIENT_ID = config['CLIENT_ID']
89 |     CLIENT_SECRET = config['CLIENT_SECRET']
90 |     REDIRECT = config['REDIRECT']
91 |     PERMISSIONS = config['PERMISSIONS']
92 |     webserver.run(host='0.0.0.0', port=5000, debug=True)
93 | 
94 |     
95 |    
96 | 
97 | 


--------------------------------------------------------------------------------
/COURSES.md:
--------------------------------------------------------------------------------
 1 | ## Related Courses
 2 | * [Data Analytics for Business](http://inseaddataanalytics.github.io/INSEADAnalytics/), [Projects](http://inseaddataanalytics.github.io/INSEADAnalytics/ProjectsMenu.html)
 3 | * [Udacity Intro to Data Science](https://www.udacity.com/course/ud359)
 4 | * [Udacity Data Wrangling with MongoDB](https://www.udacity.com/course/ud032)
 5 | * [Harvard CS 109](http://cs109.org/schedule.php)
 6 | * [CU Boulder Research Computing Fall 2013 meetup](https://github.com/ResearchComputing/Meetup-Fall-2013)
 7 | * [Udacity Exploratory Data Analysis](https://www.udacity.comcourse/ud651)
 8 | * [USC COMM 620 Data Retrieval and Processing Techniques](http://www-bcf.usc.edu/~ljian/courses/large_data_syllabus.html)
 9 | * [NYU/Stern Practical Data Science](http://jattenberg.github.io/PDS-Fall-2013/)
10 | * [Columbia Data Science](http://columbiadatascience.com/2012/08/29/syllabus/)
11 | * [UW-IS school Introduction to Data Science](http://www.jblumenstock.com/teaching/course=infx598)
12 | * [MIT’s How to Process, Analyze and Visualize Data](http://dataiap.github.io/dataiap/) [github](https://github.com/dataiap/dataiap)
13 | * [MIT’s ASCII to Answers](http://db.csail.mit.edu/6.885/) [github](https://github.com/mitdbg/asciiclass)
14 | 
15 | 


--------------------------------------------------------------------------------
/DATA_SOURCES.md:
--------------------------------------------------------------------------------
 1 | ## Datasets on BigQuery
 2 | 
 3 | * [NYU datasets](https://console.cloud.google.com/bigquery?invt=AbzacQ&project=nyu-datasets&inv=1)
 4 |   * Citibike
 5 |   * DOH restaurant inspection data
 6 |   * Facebook
 7 |   * IMDB
 8 |   * NYPD complaints
 9 |   * ...(others)
10 | * [Public Datsets on BigQuery](https://console.cloud.google.com/bigquery(cameo:browse)?invt=AbzacQ&project=bigquery-public-data&filter=solution-type:dataset)
11 | * [Wharton Research Data Services (WRDS)](https://wrds-www.wharton.upenn.edu/)
12 | 
13 | ## Interesting data sources
14 | * [Public Datasets on Google BigQuery](https://console.cloud.google.com/marketplace/browse;page=1?filter=solution-type:dataset&filter=price:free&hl=en-GB&project=nyu-datasets)
15 | * Yelp APIs: [Yelp Fusion](https://www.yelp.com/developers/documentation/v3) and [Yelp GraphQL](https://www.yelp.com/developers/graphql/guides/intro)
16 | * [US Census API](https://www.census.gov/data/developers/data-sets.html) and [US Census Data](http://www.census.gov/data.html)
17 | * [Twitter API](https://developer.twitter.com/en/docs.html)
18 | * [Spotify API](https://developer.spotify.com/documentation/web-api/)
19 | * [NYC Open Data](https://data.cityofnewyork.us/)
20 | * [U.S. Government’s open data](https://www.data.gov/)
21 | * [The New York Times Data APIs](http://developer.nytimes.com/docs)
22 | * [Quandl: Finance and Economics Data](http://www.quandl.com/)
23 | * [Lending Club](https://www.lendingclub.com/info/download-data.action)
24 | * [World Bank Data](http://data.worldbank.org/)
25 | * [Unicef Data](http://www.unicef.org/sowc09/statistics/tables.php)
26 | * [Yahoo Labs Data](http://webscope.sandbox.yahoo.com/catalog.php)
27 | * [World Health Organization](http://www.who.int/research/en/)
28 | * [Weather Underground Data API](http://www.wunderground.com/weather/api/?MR=1)
29 | * [Google Public Data Explorer](http://www.google.com/publicdata/directory)
30 | * [NASDAQ](https://data.nasdaq.com/)
31 | * [CBOE Futures Exchange](http://cfe.cboe.com/Data/)
32 | * [Enron Email Dataset](http://www.cs.cmu.edu/~enron/)
33 | 
34 | 
35 | ## Aggregate lists of data sets
36 | * [Kaggle Datasets](https://www.kaggle.com/datasets)
37 | * [Data Collaboratives](http://datacollaboratives.org/explorer.html)
38 | * [30 Places to Find Open Data on the Web, by Romy Misra of Visual.ly](http://blog.visual.ly/data-sources/)
39 | * [Interesting datasets and APIs, by Prof. James Bagrow](http://bagrow.com/dsv/datasets.html)
40 | * [Datasets for Data Mining and Data Science, by KDnugets](http://www.kdnuggets.com/datasets/index.html)
41 | * [Research-quality data sets, by Hilary Mason](http://bitly.com/bundles/hmason/1)
42 | * [Reddit list of interesting data sets](http://www.reddit.com/r/datasets/)
43 | * [Finding Data on the Internet by Revolution Analytics](http://www.inside-r.org/howto/finding-data-internet)
44 | 
45 | ## Discussions on data sources
46 | * [What data people are searching for](http://static.googleusercontent.com/media/www.google.com/en/us/googleblogs/pdfs/google_public_data_march2010.pdf)
47 | * [Discussion for accessing finance data](http://stackoverflow.com/questions/10040954/alternative-to-google-finance-api)
48 | * [Documentation for Yahoo! Finance](http://www.gummy-stuff.org/Yahoo-data.htm)
49 | * [Open Data Discussion](http://opendata.stackexchange.com/questions?sort=votes)
50 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Outside NYU, the content is shared under a Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license. For more details see https://creativecommons.org/licenses/by-nc/4.0/
2 | 
3 | Inside NYU, any usage of the material by instructors other than the authors is strictly prohibited, and any violators will be prosecuted to the fullest extent of law.
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | This repository contains notes for various classes and seminars that I teach at NYU. They are focused on teaching programming for data science to non-CS majors. The emphasis is on offering live examples that students can use directly to complete their goals. 
 2 | 
 3 | ## Accessing your Data Science Environment 
 4 | 
 5 | * [Accessing your Data Science Environment](https://docs.google.com/document/d/1A5Y53eqBRRlrVMV-yLrpA9-3xZ3jQmv9i6qhOU5gn44/edit?usp=sharing)
 6 | 
 7 | We setup and deploy our data science environment (effectively, Jupyter with Python and R support, plus MySQL) using docker. As our default option, we allow students to connect to a JupyterHub server that runs on Kubernetes. We also give the option to students to run the same environment locally on their laptops, or deploy the Docker image on AWS or Google Cloud.
 8 | 
 9 | ## Data Sets
10 | 
11 | * [List of interesting data sets](DATA_SOURCES.md)
12 | 
13 | ## Related Courses
14 | 
15 | * [List of related courses](COURSES.md)
16 | 
17 | ## License
18 | 
19 | * See [LICENSE](LICENSE)
20 | 


--------------------------------------------------------------------------------
/jupyterhub/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL = /bin/bash
 2 | DOCKER_PATH=docker/
 3 | IMG_VERSION?=latest
 4 | 
 5 | .PHONY: build-image push-image build-single-user push-single-user check-env
 6 | 
 7 | check-env:
 8 | ifndef DOCKER_REGISTRY
 9 | 	$(error DOCKER_REGISTRY not set)
10 | endif
11 | 
12 | build-image: check-env
13 | 	docker build -f ${DOCKER_PATH}/${IMAGE}/Dockerfile ${ARG} -t $(DOCKER_REGISTRY):$(IMAGE)-$(IMG_VERSION) .
14 | 
15 | push-image: check-env build-image
16 | 	docker push $(DOCKER_REGISTRY):$(IMAGE)-$(IMG_VERSION)
17 | 
18 | build-single-user:
19 | 	make IMAGE="single-user" build-image
20 | 
21 | push-single-user: build-single-user
22 | 	make IMAGE="single-user" push-image
23 | 


--------------------------------------------------------------------------------
/jupyterhub/README.md:
--------------------------------------------------------------------------------
  1 | This is the Class Tools infrastructure specification and management tools.
  2 | 
  3 | - [Prerequisites](#prerequisites)
  4 | - [Build the notebook image](#build-the-notebook-image)
  5 | - [Setup JupyterHub on your Kubernetes cluster](#setup-jupyterhub-on-your-kubernetes-cluster)
  6 | - [Chart configuration](#chart-configuration)
  7 | - [Reference](#reference)
  8 | 
  9 | ### Prerequisites
 10 | 
 11 | Make sure you have installed
 12 | 
 13 | * [Docker](https://www.docker.com/) >= *17.x.x*
 14 | * [Google Cloud SDK](https://cloud.google.com/sdk/)
 15 | * [kubectl](https://kubernetes.io/docs/user-guide/kubectl/)
 16 | 
 17 | ### Build the notebook image
 18 | 
 19 | Specify the following environment variables:
 20 | 
 21 | * `DOCKER_REGISTRY` : the registry and repository to push the images, e.g. `me/my-docker-hub-repo`,
 22 | * `IMG_VERSION` : the suffix to append to each image. Each tag will be in the form `name-IMG_VERSION`. Defaults to *latest*.
 23 | 
 24 | To build and push the single-user Jupyter Notebook image to the docker repo specified above, run
 25 | 
 26 | ```bash
 27 | make push-single-user
 28 | ```
 29 | 
 30 | in the project's root directory.
 31 | 
 32 | You can also run
 33 | 
 34 | ```bash
 35 | make build-single-user
 36 | ```
 37 | 
 38 | to just build the docker image locally.
 39 | 
 40 | 
 41 | ### Setup JupyterHub on your Kubernetes cluster
 42 | 
 43 | First, you need to install Helm. See [these instructions](https://github.com/kubernetes/helm/blob/master/docs/install.md)
 44 | for details on how to do this.
 45 | 
 46 | To initialize Helm, execute
 47 | ```bash
 48 | kubectl --namespace kube-system create sa tiller
 49 | kubectl create clusterrolebinding tiller --clusterrole cluster-admin --serviceaccount=kube-system:tiller
 50 | helm init --service-account tiller
 51 | ```
 52 | 
 53 | **IMPORTANT**: Only execute the second command if the kubernetes cluster you are deploying to is RBAC-enabled.
 54 | 
 55 | Once the Helm initialization is done, install the JupyterHub helm repository to Helm, by running:
 56 | ```bash
 57 | helm repo add jupyterhub https://jupyterhub.github.io/helm-chart/
 58 | helm repo update
 59 | ```
 60 | 
 61 | ### Chart configuration
 62 | 
 63 | Run
 64 | 
 65 | ```bash
 66 | cp deployment/helm/config.yaml.example deployment/helm/config.yaml
 67 | ```
 68 | 
 69 | and replace the placeholders inside the `config.yaml` file with their desirable values.
 70 | 
 71 | * `proxy.secretToken`: Quoting from [[1]](#reference):
 72 | > A 64-byte cryptographically secure randomly generated string used to secure communications between the hub and the configurable-http-proxy.
 73 | >
 74 | > This must be generated with `openssl rand -hex 32`.
 75 | >
 76 | > Changing this value will cause the proxy and hub pods to restart. It is good security practice to rotate these values over time. If this secret leaks, immediately change it to something else, or user data can be compromised
 77 | 
 78 | * `singleuser.image`: The docker image you built during the first phase of the setup process.
 79 |     * `singleuser.image.name`: The repository in which the image is hosted.
 80 |     * `singleuser.image.tag`: The tag of the target notebook image.
 81 | 
 82 | For more configuration options, see [[1]](#reference).
 83 | 
 84 | Once you have setup the `config.yaml` file, run
 85 | ```bash
 86 | helm install jupyterhub/jupyterhub --version=v0.5 \
 87 |     --name=RELEASE-NAME --namespace=NAMESPACE-NAME \
 88 |     -f path/to/config.yaml [--set=rbac.enabled=false]
 89 | ```
 90 | 
 91 | where:
 92 | 
 93 | * `--name` is a deployment identifier used by helm
 94 | * `--namespace` is the name of the namespace in which JupyterHub will be deployed. If it does not exist, it will
 95 | be created for you.
 96 | 
 97 | **NOTE**: If the cluster you are deploying to is not RBAC-enabled, then you need to also use the `--set` flag
 98 | in the above command.
 99 | 
100 | After the above command executes, check the status of the deployment by running
101 | ```bash
102 | kubectl get pods --namespace NAMESPACE-NAME
103 | ```
104 | 
105 | When both the proxy and the hub pods have a status of 'Running', you are good to go.
106 | 
107 | **NOTE**: You will also have to allow TCP traffic to the hub proxy's port on your cloud provider's firewall. Run
108 | ```bash
109 | kubectl get services --namespace NAMESPACE-NAME
110 | ```
111 | 
112 | and look for the `proxy-public` service. Allow TCP traffic to the port which targets port 80 of the proxy. For instance,
113 | say the output of the `get services` command were:
114 | ```
115 | hub            ClusterIP      . . .   8081/TCP
116 | proxy-api      ClusterIP      . . .   8001/TCP
117 | proxy-http     ClusterIP      . . .   8000/TCP
118 | proxy-public   LoadBalancer   . . .   80:31870/TCP,443:31182/TCP
119 | ```
120 | 
121 | We would have to allow traffic to `tcp:31870` on our firewall in order to be able to access the proxy.
122 | 
123 | ### Reference
124 | 
125 | [1] [Helm Chart Configuration](https://zero-to-jupyterhub.readthedocs.io/en/latest/reference.html#id1)
126 | 


--------------------------------------------------------------------------------
/jupyterhub/deployment/helm/config.yaml.example:
--------------------------------------------------------------------------------
1 | proxy:
2 |     secretToken: "YOUR_TOKEN_HERE"
3 | singleuser:
4 |     image:
5 |         name: HUB_REPOSITORY
6 |         tag: IMAGE_TAG
7 | 


--------------------------------------------------------------------------------
/jupyterhub/docker/single-user/Dockerfile:
--------------------------------------------------------------------------------
  1 | FROM ubuntu:latest
  2 | 
  3 | ENV LANGUAGE en_US.UTF-8
  4 | ENV LANG en_US.UTF-8
  5 | ENV LC_ALL en_US.UTF-8
  6 | ENV PYTHONIOENCODING UTF-8
  7 | ENV NB_USER ubuntu
  8 | 
  9 | RUN useradd -ms /bin/bash ubuntu
 10 | 
 11 | RUN apt-get -y update && \
 12 |     apt-get -y dist-upgrade && \
 13 |     apt-get -y upgrade && \
 14 |     apt-get -y install \
 15 |         sudo \
 16 |         build-essential \
 17 |         python3-dev \
 18 |         python3-pip \
 19 |         ca-certificates \
 20 |         curl \
 21 |         git \
 22 |         gfortran \
 23 |         libblas-dev \
 24 |         liblapack-dev \
 25 |         libssl-dev \
 26 |         libffi-dev \
 27 |         libcurl4-openssl-dev \
 28 |         libgdal-dev \
 29 |         wget \
 30 |         jq \
 31 |         language-pack-en \
 32 |         libcurl4-openssl-dev \
 33 |         libffi-dev \
 34 |         libzmq3-dev \
 35 |         libxml2-dev \
 36 |         libxslt-dev \
 37 |         python3-lxml \
 38 |         zlib1g-dev \
 39 |     python3-mysqldb && \
 40 |     apt-get clean && \
 41 |     rm -rf /var/lib/apt/lists/*
 42 | 
 43 | # install latest version of pip
 44 | RUN pip3 install -U pip
 45 | 
 46 | # TODO: Move the Python libraries to a requirements.txt file?
 47 | 
 48 | # install basic Python libraries to run Jupyter
 49 | RUN pip3 install -U \
 50 |     notebook==5.2.* \
 51 |     jupyterhub==0.8.* \
 52 |     ipython
 53 | 
 54 | # add libraries used in intro to python exercise
 55 | RUN pip3 install -U jellyfish \
 56 |     ngram
 57 | 
 58 | # add standard data science libraries
 59 | RUN pip3 install -U \
 60 |     numpy \
 61 |     scipy \
 62 |     matplotlib \
 63 |     pandas \
 64 |     statsmodels \
 65 |     scikit-learn
 66 | 
 67 | # add libraries for teaching web APIs
 68 | RUN pip3 install -U \
 69 |     requests \
 70 |     requests_oauthlib \
 71 |     Flask \
 72 |     slackclient
 73 | 
 74 | # add libraries for NLP
 75 | RUN pip3 install -U \
 76 |     spacy \
 77 |     nltk \
 78 |     gensim
 79 | 
 80 | # add libraries for visualization/mapping
 81 | RUN pip3 install -U \
 82 |     seaborn \
 83 |     bokeh \
 84 |     folium \
 85 |     geopandas \
 86 |     geopy
 87 | 
 88 | # add libraries for finance
 89 | RUN pip3 install -U \
 90 |     googlefinance \
 91 |     yahoo-finance \
 92 |     quandl
 93 | 
 94 | # misc libraries
 95 | RUN pip3 install -U \
 96 |     boto \
 97 |     boto3 \
 98 |     elasticsearch \
 99 |     networkx \
100 |     py2neo \
101 |     pymongo \
102 |     selenium \
103 |     tweepy
104 | 
105 | ARG FILE_PATH
106 | # Add a notebook profile.
107 | COPY $FILE_PATH/jupyter_notebook_config.py /etc/jupyter/
108 | RUN echo "c.NotebookApp.notebook_dir = '/notebooks'" >> /etc/jupyter/jupyter_notebook_config.py
109 | RUN echo "c.NotebookApp.allow_root = True" >> /etc/jupyter/jupyter_notebook_config.py
110 | RUN echo "$NB_USER ALL=NOPASSWD: ALL" >> /etc/sudoers
111 | 
112 | WORKDIR /notebooks
113 | RUN ["git", "clone", "--verbose", "https://github.com/ipeirotis/dealing_with_data.git", "/notebooks"]
114 | # VOLUME /notebooks
115 | 
116 | WORKDIR /data
117 | RUN ["git", "clone", "--verbose", "https://github.com/ipeirotis/data.git", "/data"]
118 | # VOLUME /data
119 | 
120 | RUN pip3 install ipython-sql sql_magic mysqlclient
121 | 
122 | EXPOSE 8888
123 | LABEL org.jupyter.service="jupyter"
124 | RUN chmod -R 777 /notebooks
125 | RUN chmod -R 777 /data
126 | 
127 | CMD ["start-notebook.sh"]
128 | 
129 | # Add local files as late as possible to avoid cache busting
130 | COPY $FILE_PATH/start-notebook.sh /usr/local/bin/
131 | 
132 | USER $NB_USER
133 | 


--------------------------------------------------------------------------------
/jupyterhub/docker/single-user/jupyter_notebook_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Jupyter Development Team.
 2 | # Distributed under the terms of the Modified BSD License.
 3 | 
 4 | from jupyter_core.paths import jupyter_data_dir
 5 | import subprocess
 6 | import os
 7 | import errno
 8 | import stat
 9 | 
10 | c = get_config()
11 | c.NotebookApp.ip = '*'
12 | c.NotebookApp.port = 8888
13 | c.NotebookApp.open_browser = False
14 | 
15 | # Generate a self-signed certificate
16 | if 'GEN_CERT' in os.environ:
17 |     dir_name = jupyter_data_dir()
18 |     pem_file = os.path.join(dir_name, 'notebook.pem')
19 |     try:
20 |         os.makedirs(dir_name)
21 |     except OSError as exc:  # Python >2.5
22 |         if exc.errno == errno.EEXIST and os.path.isdir(dir_name):
23 |             pass
24 |         else:
25 |             raise
26 |     # Generate a certificate if one doesn't exist on disk
27 |     subprocess.check_call(['openssl', 'req', '-new',
28 |                            '-newkey', 'rsa:2048',
29 |                            '-days', '365',
30 |                            '-nodes', '-x509',
31 |                            '-subj', '/C=XX/ST=XX/L=XX/O=generated/CN=generated',
32 |                            '-keyout', pem_file,
33 |                            '-out', pem_file])
34 |     # Restrict access to the file
35 |     os.chmod(pem_file, stat.S_IRUSR | stat.S_IWUSR)
36 |     c.NotebookApp.certfile = pem_file
37 | 


--------------------------------------------------------------------------------
/jupyterhub/docker/single-user/start-notebook.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) Jupyter Development Team.
 3 | # Distributed under the terms of the Modified BSD License.
 4 | 
 5 | set -e
 6 | 
 7 | if [[ ! -z "${JUPYTERHUB_API_TOKEN}" ]]; then
 8 |   # launched by JupyterHub, use single-user entrypoint
 9 |   exec jupyterhub-singleuser $*
10 | else
11 |   exec jupyter notebook $*
12 | fi
13 | 


--------------------------------------------------------------------------------
/start_jupyter.sh:
--------------------------------------------------------------------------------
1 | export PATH=$PATH:/usr/local/bin/geckodriver
2 | jupyter notebook --NotebookApp.iopub_data_rate_limit=1.0e10 --notebook-dir=/home/ubuntu/jupyter > /tmp/jupyter.out 2>&1 &
3 | 
4 | 


--------------------------------------------------------------------------------
/stop_jupyter.sh:
--------------------------------------------------------------------------------
1 | kill $(pgrep jupyter)
2 | 


--------------------------------------------------------------------------------
/sync_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | sudo rm -rf /data
4 | sudo mkdir -p /data
5 | sudo git clone https://github.com/ipeirotis/data.git /data
6 | 
7 | rm /home/ubuntu/sync_data.sh 
8 | ln -s /home/ubuntu/jupyter/NYU_Notes/sync_data.sh /home/ubuntu/sync_data.sh 
9 | 


--------------------------------------------------------------------------------
/sync_notebooks.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "############################################################"
 4 | echo "This will replace the content of NYU_Notes"
 5 | echo "the most recent content from the Github repository"
 6 | echo ""
 7 | echo "The existing NYU_Notes folder will be renamed"
 8 | echo "NYU_Notes_"$(date '+%Y-%b-%d_%H%M')
 9 | echo "and preserved until you delete it. "
10 | echo "############################################################"
11 | echo ""
12 | read -p "Are you sure that you want to proceed? (Y/N) " -n 1 -r
13 | echo 
14 | if [[ $REPLY =~ ^[Yy]$ ]]
15 | then
16 |   cd /home/ubuntu/jupyter
17 |   mv NYU_Notes NYU_Notes_$(date '+%Y-%b-%d_%H%M')
18 |   git clone https://github.com/ipeirotis/dealing_with_data.git NYU_Notes
19 |   cd
20 | fi
21 | 
22 | rm /home/ubuntu/sync_notebooks.sh 
23 | ln -s /home/ubuntu/jupyter/NYU_Notes/sync_notebooks.sh /home/ubuntu/sync_notebooks.sh 
24 | 


--------------------------------------------------------------------------------
/test_notebooks.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "import subprocess\n",
 11 |     "import tempfile\n",
 12 |     "\n",
 13 |     "import nbformat\n",
 14 |     "\n",
 15 |     "def notebook_run(path):\n",
 16 |     "    \"\"\"Execute a notebook via nbconvert and collect output.\n",
 17 |     "       :returns (parsed nb object, execution errors)\n",
 18 |     "    \"\"\"\n",
 19 |     "\n",
 20 |     "    with tempfile.NamedTemporaryFile(suffix=\".ipynb\") as fout:\n",
 21 |     "        args = [\"jupyter\", \"nbconvert\", \"--to\", \"notebook\", \"--execute\",\n",
 22 |     "          \"--ExecutePreprocessor.timeout=60\",\n",
 23 |     "          \"--output\", fout.name, path]\n",
 24 |     "        subprocess.check_call(args)\n",
 25 |     "\n",
 26 |     "        fout.seek(0)\n",
 27 |     "        nb = nbformat.read(fout.name, nbformat.current_nbformat)\n",
 28 |     "\n",
 29 |     "    errors = [output for cell in nb.cells if \"outputs\" in cell\n",
 30 |     "                     for output in cell[\"outputs\"]\\\n",
 31 |     "                     if output.output_type == \"error\"]\n",
 32 |     "\n",
 33 |     "    return nb, errors"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 2,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "from os import listdir\n",
 43 |     "from os.path import isfile, join, isdir\n",
 44 |     "\n",
 45 |     "def test_notebooks(path):\n",
 46 |     "    \n",
 47 |     "    notebooks = [f for f in listdir(path) if isfile(join(path, f)) and f.endswith('.ipynb')]\n",
 48 |     "    directories = [f for f in listdir(path) if isdir(join(path, f))]\n",
 49 |     "    \n",
 50 |     "    for notebook in sorted(notebooks):\n",
 51 |     "        print(notebook)\n",
 52 |     "        nb, errors = notebook_run(join(path, notebook))\n",
 53 |     "        assert errors == []\n",
 54 |     "    \n",
 55 |     "    for directory in sorted(directories):\n",
 56 |     "        print(directory)"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 3,
 62 |    "metadata": {},
 63 |    "outputs": [
 64 |     {
 65 |      "name": "stdout",
 66 |      "output_type": "stream",
 67 |      "text": [
 68 |       "A-Introduction_to_iPython_Notebooks.ipynb\n"
 69 |      ]
 70 |     },
 71 |     {
 72 |      "ename": "CalledProcessError",
 73 |      "evalue": "Command '['jupyter', 'nbconvert', '--to', 'notebook', '--execute', '--ExecutePreprocessor.timeout=60', '--output', '/tmp/tmpiy8tr2a2.ipynb', '01-Introduction_to_Python/A-Introduction_to_iPython_Notebooks.ipynb']' returned non-zero exit status 1.",
 74 |      "output_type": "error",
 75 |      "traceback": [
 76 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 77 |       "\u001b[0;31mCalledProcessError\u001b[0m                        Traceback (most recent call last)",
 78 |       "\u001b[0;32m<ipython-input-3-19e1f70957ae>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtest_notebooks\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'01-Introduction_to_Python'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
 79 |       "\u001b[0;32m<ipython-input-2-7208f5080b39>\u001b[0m in \u001b[0;36mtest_notebooks\u001b[0;34m(path)\u001b[0m\n\u001b[1;32m      9\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mnotebook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msorted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnotebooks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     10\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnotebook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m         \u001b[0mnb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnotebook_run\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnotebook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     12\u001b[0m         \u001b[0;32massert\u001b[0m \u001b[0merrors\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
 80 |       "\u001b[0;32m<ipython-input-1-c5c5db1245c8>\u001b[0m in \u001b[0;36mnotebook_run\u001b[0;34m(path)\u001b[0m\n\u001b[1;32m     14\u001b[0m           \u001b[0;34m\"--ExecutePreprocessor.timeout=60\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     15\u001b[0m           \"--output\", fout.name, path]\n\u001b[0;32m---> 16\u001b[0;31m         \u001b[0msubprocess\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     18\u001b[0m         \u001b[0mfout\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseek\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 81 |       "\u001b[0;32m/usr/lib/python3.6/subprocess.py\u001b[0m in \u001b[0;36mcheck_call\u001b[0;34m(*popenargs, **kwargs)\u001b[0m\n\u001b[1;32m    289\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mcmd\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    290\u001b[0m             \u001b[0mcmd\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpopenargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 291\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0mCalledProcessError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mretcode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcmd\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    292\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    293\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
 82 |       "\u001b[0;31mCalledProcessError\u001b[0m: Command '['jupyter', 'nbconvert', '--to', 'notebook', '--execute', '--ExecutePreprocessor.timeout=60', '--output', '/tmp/tmpiy8tr2a2.ipynb', '01-Introduction_to_Python/A-Introduction_to_iPython_Notebooks.ipynb']' returned non-zero exit status 1."
 83 |      ]
 84 |     }
 85 |    ],
 86 |    "source": [
 87 |     "test_notebooks('01-Introduction_to_Python')"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [
 95 |     {
 96 |      "name": "stdout",
 97 |      "output_type": "stream",
 98 |      "text": [
 99 |       "D-MySQL_and_Python.ipynb\n"
100 |      ]
101 |     }
102 |    ],
103 |    "source": [
104 |     "test_notebooks('02-SQL')"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": []
113 |   }
114 |  ],
115 |  "metadata": {
116 |   "kernelspec": {
117 |    "display_name": "Python 3",
118 |    "language": "python",
119 |    "name": "python3"
120 |   },
121 |   "language_info": {
122 |    "codemirror_mode": {
123 |     "name": "ipython",
124 |     "version": 3
125 |    },
126 |    "file_extension": ".py",
127 |    "mimetype": "text/x-python",
128 |    "name": "python",
129 |    "nbconvert_exporter": "python",
130 |    "pygments_lexer": "ipython3",
131 |    "version": "3.6.6"
132 |   }
133 |  },
134 |  "nbformat": 4,
135 |  "nbformat_minor": 2
136 | }
137 | 


--------------------------------------------------------------------------------
/upgrade_linux.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | sudo apt-get -y update
3 | sudo apt-get -y dist-upgrade
4 | sudo apt-get -y upgrade
5 | sudo apt-get -y autoremove
6 | 


--------------------------------------------------------------------------------
/upgrade_python.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | sudo -H pip freeze --local | grep -v '^\-e' | cut -d = -f 1  | tee >(xargs -n1 sudo -H python3 -m pip install -U) | grep -v "Requirement"
3 | 


--------------------------------------------------------------------------------