'.format(i=visitor_counter)
18 |
19 | # The function simply returns a message with the
20 | # current date and time.
21 | def get_time_message():
22 | datetime.now().strftime('%Y-%m-%d %H:%M:%S')
23 | date = datetime.now().strftime('%Y-%m-%d')
24 | time = datetime.now().strftime('%H:%M:%S')
25 | message = f"
The date is {date}
The time is {time}"
26 | return message
27 |
28 |
29 | # Augmenting the basic "Hello world" with a message
30 | # that shows the date and time
31 | @app.route("/")
32 | def home():
33 | message = get_time_message()
34 | return "
Hello World!" + message +"
"
35 |
36 | app.run(host='0.0.0.0', port=5000)
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/11-Flask/B-main3.py:
--------------------------------------------------------------------------------
1 | # Import necessary libraries
2 | import base64
3 | from io import BytesIO
4 |
5 | import matplotlib.pyplot as plt
6 | import pandas as pd
7 | from flask import Flask, jsonify, request
8 | from sqlalchemy import create_engine, text
9 | import pymysql
10 |
11 | app = Flask(__name__)
12 |
13 | # This code creates a connection to the database
14 | conn_string = 'mysql+pymysql://{user}:{password}@{host}/{db}?charset={encoding}'.format(
15 | host='db.ipeirotis.org',
16 | user='student',
17 | db='citibike_fall2017',
18 | password='dwdstudent2015',
19 | encoding='utf8mb4')
20 |
21 | engine = create_engine(conn_string)
22 |
23 |
24 | @app.route('/citibike_api', methods=['GET'])
25 | def citibike_stations():
26 | """
27 | API endpoint to get Citibike station details from the database.
28 | """
29 | sql = "SELECT DISTINCT id, name, capacity, lat, lon FROM status_fall2017"
30 | # Connect to the database, execute the query, and get back the results
31 | with engine.connect() as connection:
32 | stations = pd.read_sql(text(sql), con=connection)
33 |
34 | # Create the response. We will put the retrieved data as a list of
35 | # dictionaries, under the key "stations".
36 | list_of_stations = stations.to_dict(orient='records')
37 |
38 | api_results = {"stations": list_of_stations}
39 |
40 | # We JSON-ify our dictionary and return it as the API response
41 | return jsonify(api_results)
42 |
43 |
44 | @app.route('/station_map', methods=['GET'])
45 | def station_map():
46 | """
47 | API endpoint to get a scatter plot of Citibike stations on a map.
48 | """
49 | # Connect to the database, execute the query, and get back the results
50 | sql = "SELECT DISTINCT id, name, capacity, lat, lon FROM status_fall2017"
51 | with engine.connect() as connection:
52 | stations = pd.read_sql(text(sql), con=connection)
53 |
54 | fig, ax = plt.subplots()
55 | ax = stations.plot(kind='scatter', x='lon', y='lat', ax=ax)
56 |
57 | buf = BytesIO()
58 | fig.savefig(buf, format="png")
59 | # Embed the result in the html output.
60 | data = base64.b64encode(buf.getbuffer()).decode("ascii")
61 |
62 | # Create the response. We will put the retrieved data as a list of
63 | # dictionaries, under the key "stations".
64 | results = {"image": data}
65 |
66 | # We JSON-ify our dictionary and return it as the API response
67 | return jsonify(results)
68 |
69 | @app.route('/station_image', methods=['GET'])
70 | def station_image():
71 | """
72 | API endpoint to get a scatter plot of Citibike stations on a map.
73 | """
74 | # Connect to the database, execute the query, and get back the results
75 | sql = "SELECT DISTINCT id, name, capacity, lat, lon FROM status_fall2017"
76 | with engine.connect() as connection:
77 | stations = pd.read_sql(text(sql), con=connection)
78 |
79 | fig, ax = plt.subplots()
80 | ax = stations.plot(kind='scatter', x='lon', y='lat', ax=ax)
81 |
82 | buf = BytesIO()
83 | fig.savefig(buf, format="png")
84 | # Embed the result in the html output.
85 | data = base64.b64encode(buf.getbuffer()).decode("ascii")
86 |
87 | # Return an image
88 | return f""
89 |
90 |
91 |
92 | @app.route('/station_status')
93 | def station_status():
94 | """
95 | API endpoint to get the status of a specific Citibike station.
96 | """
97 | # Get the station ID from the URL parameters
98 | param = request.args.get('station_id')
99 | try:
100 | param_value = int(param)
101 | except:
102 | return jsonify({"error": "No station_id parameter given or other problem"})
103 |
104 | sql = '''SELECT available_bikes,
105 | available_docks,
106 | capacity,
107 | available_bikes / capacity AS percent_full,
108 | communication_time
109 | FROM status_fall2017
110 | WHERE id = :station_id'''
111 |
112 | with engine.connect() as con:
113 | station_status = pd.read_sql(text(sql),
114 | con=con,
115 | params={"station_id": param_value})
116 |
117 | station_status_over_time = station_status.to_dict(orient='records')
118 |
119 | api_results = {
120 | "station_id": param_value,
121 | "status_over_time": station_status_over_time
122 | }
123 |
124 | # We JSON-ify our dictionary and return it as the API response
125 | return jsonify(api_results)
126 |
127 | # Main page
128 | @app.route("/")
129 | def index():
130 | """
131 | Main page of the web application.
132 | """
133 | page = '''
134 |
135 |
136 | Citibike API
137 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
--------------------------------------------------------------------------------
/12-UNIX_Basics/A-Basic_Unix_Shell_Commands.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "Overview\n",
8 | "--------\n",
9 | "\n",
10 | "_Readings: The [Appendix A of Learn Python the Hard Way](http://learnpythonthehardway.org/book/appendixa.html) also discusses the material below._\n",
11 | "\n",
12 | "Modern data science is impossible without some understanding of the Unix command line. Unix is a family of computer operating systems including the Mac’s OS X and Linux (technically, Linux is a Unix clone); Windows has also Unix emulators, which allow running Unix commands. In our class, we use the Linux (specifically, the Ubuntu distribution), running on the Amazon EC2 cloud infrastructure.\n",
13 | "\n",
14 | "Let's start:\n",
15 | "\n",
16 | "(_**Note**: In IPython, to call a command line script, you add an exclamation mark before the command. That's why you will see all the commands in this notebook being preceded by a `!` character._)"
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {},
22 | "source": [
23 | "### Understading the folder structure\n",
24 | "\n",
25 | "Basic concepts\n",
26 | "* Hierarchical directory structure\n",
27 | "* Absolute vs. relative directories\n",
28 | "* Parent (..) and current (.) directories\n",
29 | "\n",
30 | "\n",
31 | "### `pwd`\n",
32 | "\n",
33 | "Prints the current directory. Type `pwd` in the shell prompt. This will tell you your current directory. "
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {
40 | "collapsed": true,
41 | "scrolled": true
42 | },
43 | "outputs": [],
44 | "source": [
45 | "!pwd"
46 | ]
47 | },
48 | {
49 | "cell_type": "markdown",
50 | "metadata": {},
51 | "source": [
52 | "### `ls`\n",
53 | "\n",
54 | "Lists the contents of a directory or provide information about the specified file. Typical usage: \n",
55 | "\n",
56 | "`ls [options] [files or directories]`\n",
57 | "\n",
58 | "If you want to know the contents of this directory, type `ls -A`. "
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "metadata": {
65 | "collapsed": true
66 | },
67 | "outputs": [],
68 | "source": [
69 | "!ls"
70 | ]
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "metadata": {},
75 | "source": [
76 | "By default, `ls` simply lists the contents of the current directory. There are several options that when used in conjunction with ls give more detailed information about the files or directories being queried. Here are a sample:\n",
77 | "\n",
78 | "+ `-A`: list all of the contents of the queried directory, even hidden files.\n",
79 | "+ `-l`: detailed format, display additional info for all files and directories.\n",
80 | "+ `-R`: recursively list the contents of any subdirectories.\n",
81 | "+ `-t`: sort files by the time of the last modification.\n",
82 | "+ `-S`: sort files by size.\n",
83 | "+ `-r`: reverse any sort order.\n",
84 | "+ `-h`: when used in conjunction with `-l`, gives a more human-readable output.\n",
85 | "\n"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "Let's try now to execute `ls` with a different set of options:"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": null,
98 | "metadata": {
99 | "collapsed": true
100 | },
101 | "outputs": [],
102 | "source": [
103 | "!ls -lh"
104 | ]
105 | },
106 | {
107 | "cell_type": "markdown",
108 | "metadata": {},
109 | "source": [
110 | "### `cd`\n",
111 | "\n",
112 | "Change the current directory. Usage: \n",
113 | "\n",
114 | "`cd [directory to move to]`\n",
115 | "\n",
116 | "For example, to change to the `/home/ubuntu` directory:"
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": null,
122 | "metadata": {
123 | "collapsed": true
124 | },
125 | "outputs": [],
126 | "source": [
127 | "!cd /home/ubuntu"
128 | ]
129 | },
130 | {
131 | "cell_type": "markdown",
132 | "metadata": {},
133 | "source": [
134 | "If we want to run two commands in a row, we separate them using the `;` character. For example, to change to a directory and show its contents:"
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": null,
140 | "metadata": {
141 | "collapsed": true
142 | },
143 | "outputs": [],
144 | "source": [
145 | "!cd /home/ubuntu; ls -l"
146 | ]
147 | },
148 | {
149 | "cell_type": "markdown",
150 | "metadata": {},
151 | "source": [
152 | "### `mkdir`\n",
153 | "\n",
154 | "Creates a new folder. For example, to create a new folder named `DealingWithData` under the current folder, we type:\n"
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": null,
160 | "metadata": {
161 | "collapsed": true
162 | },
163 | "outputs": [],
164 | "source": [
165 | "!mkdir DealingWithData\n",
166 | "!ls -lA"
167 | ]
168 | },
169 | {
170 | "cell_type": "markdown",
171 | "metadata": {},
172 | "source": [
173 | "### `rmdir` \n",
174 | "\n",
175 | "Removes a folder. (The folder must be empty for the command to succeed.)"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "metadata": {
182 | "collapsed": true
183 | },
184 | "outputs": [],
185 | "source": [
186 | "!rmdir DealingWithData"
187 | ]
188 | },
189 | {
190 | "cell_type": "markdown",
191 | "metadata": {},
192 | "source": [
193 | "### `cp` \n",
194 | "\n",
195 | "Copies a file. Usage:\n",
196 | "\n",
197 | "`cp [source file] [destination file]`\n",
198 | "\n",
199 | "It can also be used to copy multiple files into a directory.\n",
200 | "\n",
201 | "`cp [source file1] [source file2] ... [destination directory]`\n",
202 | "\n",
203 | "For example, to copy the file 'A-Basic_Unix_Shell_Commands.ipynb' and name the file NotebookA.ipynb"
204 | ]
205 | },
206 | {
207 | "cell_type": "code",
208 | "execution_count": null,
209 | "metadata": {
210 | "collapsed": true
211 | },
212 | "outputs": [],
213 | "source": [
214 | "!cp A-Basic_Unix_Shell_Commands.ipynb NotebookA.ipynb\n",
215 | "!ls -l "
216 | ]
217 | },
218 | {
219 | "cell_type": "markdown",
220 | "metadata": {},
221 | "source": [
222 | "Or we can copy the file to another folder. For example, the following command copies the file `A-Basic_Unix_Shell_Commands.ipynb` to folder `DealingWithData` and names the new file `NotebookA.ipynb`"
223 | ]
224 | },
225 | {
226 | "cell_type": "code",
227 | "execution_count": null,
228 | "metadata": {
229 | "collapsed": true
230 | },
231 | "outputs": [],
232 | "source": [
233 | "!mkdir DealingWithData\n",
234 | "!cp A-Basic_Unix_Shell_Commands.ipynb DealingWithData/NotebookA.ipynb\n",
235 | "!ls -lA DealingWithData"
236 | ]
237 | },
238 | {
239 | "cell_type": "markdown",
240 | "metadata": {},
241 | "source": [
242 | "### `rm` \n",
243 | "\n",
244 | "The `rm` command is used to delete a file.\n",
245 | "\n",
246 | "rm -r : deletes a folder, recursively"
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "execution_count": null,
252 | "metadata": {
253 | "collapsed": true
254 | },
255 | "outputs": [],
256 | "source": [
257 | "!rm DealingWithData/NotebookA.ipynb\n",
258 | "!rm NotebookA.ipynb"
259 | ]
260 | },
261 | {
262 | "cell_type": "code",
263 | "execution_count": null,
264 | "metadata": {
265 | "collapsed": true
266 | },
267 | "outputs": [],
268 | "source": [
269 | "#clean up\n",
270 | "!rmdir DealingWithData"
271 | ]
272 | },
273 | {
274 | "cell_type": "markdown",
275 | "metadata": {},
276 | "source": [
277 | "### `mv`\n",
278 | "\n",
279 | "The `mv` command is similar to `cp` but it moves the file instead of just copying it. Effectively it performs a `cp` command, followed by an `rm` for the original file"
280 | ]
281 | },
282 | {
283 | "cell_type": "markdown",
284 | "metadata": {},
285 | "source": [
286 | "## Exercise\n",
287 | "\n",
288 | "* Find the current directory, using the `pwd` command.\n",
289 | "* Create two new directories, `dir1` and `dir2` with the `mkdir` command. \n",
290 | "* Use `ls` to confirm\n",
291 | "* Copy the file `../2-Introduction_to_Python/data/baseball.csv` to `dir1` and name it `file1.csv`. (Note: The absolute path for the file is `/home/ubuntu/jupyter/NYU_Notes/2-Introduction_to_Python/data/baseball.csv`. We use the `..` notation to refer to the parent directory of the current one.)\n",
292 | "* Copy the file `../3-SQL/data/imdb.sql.gz` to dir2 and name it `file2.sql.gz`\n",
293 | "* Move each file to the other directory (`file1.csv` to `dir2` and `file2.sql.gz` to `dir1`) with the `mv` command.\n",
294 | "* Delete both directories with the `rm -r` command.\n"
295 | ]
296 | },
297 | {
298 | "cell_type": "code",
299 | "execution_count": null,
300 | "metadata": {
301 | "collapsed": true
302 | },
303 | "outputs": [],
304 | "source": [
305 | "# your code here\n"
306 | ]
307 | }
308 | ],
309 | "metadata": {
310 | "kernelspec": {
311 | "display_name": "Python 3",
312 | "language": "python",
313 | "name": "python3"
314 | },
315 | "language_info": {
316 | "codemirror_mode": {
317 | "name": "ipython",
318 | "version": 3
319 | },
320 | "file_extension": ".py",
321 | "mimetype": "text/x-python",
322 | "name": "python",
323 | "nbconvert_exporter": "python",
324 | "pygments_lexer": "ipython3",
325 | "version": "3.5.2"
326 | }
327 | },
328 | "nbformat": 4,
329 | "nbformat_minor": 1
330 | }
331 |
--------------------------------------------------------------------------------
/12-UNIX_Basics/B-Fetching_Data_Using_CURL.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "Getting Data using CURL\n",
8 | "-----------------------\n",
9 | "\n",
10 | "We now move into a more interesting topic: How to get data from Internet sources. For that, we will use a command-line tool of Unix, called `curl`. (Later in class, we will learn how to achieve the same using Python, but for quick testing, curl is often the standard method used.) We will also use a tool called `jq` to interact with JSON output. (Do not worry, we will revisit both these later in class.)\n",
11 | "\n",
12 | "_Often, curl and jq do not come preinstalled, so the first time that we use them, we need to issue the appropriate command for installing it. To install it, simply type:_"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": null,
18 | "metadata": {
19 | "collapsed": true
20 | },
21 | "outputs": [],
22 | "source": [
23 | "!sudo apt-get -y install curl\n",
24 | "!sudo apt-get -y install jq"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {},
30 | "source": [
31 | "Let's start by retrieving a simple text file, which we will use later in the class, to illustrate how different shell commands work. The sample data file is hosted online. You can use terminal commands to copy this remote file. Simply type:"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": null,
37 | "metadata": {},
38 | "outputs": [],
39 | "source": [
40 | "!curl -L 'https://www.dropbox.com/s/w6sov31z68v5e8v/sample.txt?dl=0'"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "The columns in this tab-separated data correspond to [order id] [time of order] [user id] [ordered item], something similar to what might be encountered in practice. If you wish, you can copy-paste the data written above into a text editor, making sure there is a newline following each of the ordered item columns (the columns with alphabetic characters)."
48 | ]
49 | },
50 | {
51 | "cell_type": "markdown",
52 | "metadata": {},
53 | "source": [
54 | "To store the output to a file, we also add the `-o [output file]` in the command. (We are also going to see in the next session how to use _output redirection_ to store the output to a file.)"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": null,
60 | "metadata": {
61 | "collapsed": true
62 | },
63 | "outputs": [],
64 | "source": [
65 | "!curl -L 'https://www.dropbox.com/s/w6sov31z68v5e8v/sample.txt?dl=0' -o data/sample.txt"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": null,
71 | "metadata": {
72 | "collapsed": true
73 | },
74 | "outputs": [],
75 | "source": [
76 | "!ls data/"
77 | ]
78 | },
79 | {
80 | "cell_type": "markdown",
81 | "metadata": {},
82 | "source": [
83 | "This will pull the file to the directory `/home/ubuntu/data/`, creating a new file called `sample.txt`. If we do not want to see any statistics about the download, we can use the `-s` option:"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": null,
89 | "metadata": {
90 | "collapsed": true
91 | },
92 | "outputs": [],
93 | "source": [
94 | "!curl -s -L 'https://www.dropbox.com/s/w6sov31z68v5e8v/sample.txt?dl=0' -o data/sample.txt"
95 | ]
96 | },
97 | {
98 | "cell_type": "markdown",
99 | "metadata": {},
100 | "source": [
101 | "And let's clean up:"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": null,
107 | "metadata": {
108 | "collapsed": true
109 | },
110 | "outputs": [],
111 | "source": [
112 | "!rm data/sample.txt"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "Now, let's try to use curl to get access to some real data. A key component of today's data ecosystem is the existence of `Web APIs` which provide functionality for a variety of tasks.\n",
120 | "\n",
121 | "#### Where am I?\n",
122 | "\n",
123 | "For example, let's try to figure out programmatically the location of the computer where the iPython server is running. We can access the API call by issuing the following command:\n",
124 | "\n"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": null,
130 | "metadata": {
131 | "collapsed": true
132 | },
133 | "outputs": [],
134 | "source": [
135 | "!curl -s \"http://freegeoip.net/json/\" | jq ."
136 | ]
137 | },
138 | {
139 | "cell_type": "markdown",
140 | "metadata": {},
141 | "source": [
142 | "While this does not look nice to a human, for a computer is a perfectly legitimate answer. This format is called \"JSON\", and is an efficient and very commonly used way to trasfer data today on the Internet.\n",
143 | "| jq controls presentation"
144 | ]
145 | },
146 | {
147 | "cell_type": "markdown",
148 | "metadata": {},
149 | "source": [
150 | "Now, let's examine a few more web APIs, just for fun:\n",
151 | "\n",
152 | "#### What's the weather?\n",
153 | "\n",
154 | "Now, let's use the OpenWeather API to get the weather details in our location. (The details of the API calls are available at http://openweathermap.org/api.)"
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": null,
160 | "metadata": {
161 | "collapsed": true
162 | },
163 | "outputs": [],
164 | "source": [
165 | "!curl -s \"http://api.openweathermap.org/data/2.5/weather?\\\n",
166 | "&appid=ffb7b9808e07c9135bdcc7d1e867253d\\\n",
167 | "&q=New%20York,NY,USA\\\n",
168 | "&units=imperial\\\n",
169 | "&mode=json\" | jq ."
170 | ]
171 | },
172 | {
173 | "cell_type": "markdown",
174 | "metadata": {},
175 | "source": [
176 | "#### What's the sentiment?\n",
177 | "\n",
178 | "Now let's try to use a web service to automatically analyze the sentiment for a piece of text. (The service comes from the [IBM's Alchemy API](http://www.alchemyapi.com/api/sentiment/textc.html#textsentiment))"
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": null,
184 | "metadata": {
185 | "collapsed": true
186 | },
187 | "outputs": [],
188 | "source": [
189 | "!curl -s \"http://access.alchemyapi.com/calls/text/TextGetTextSentiment\" \\\n",
190 | "-d \"outputMode=json\" \\\n",
191 | "-d \"apikey=4b46c7859a7be311b6f9389b12504e302cac0a55\" \\\n",
192 | "-d \"text=I hate this product! \" | jq ."
193 | ]
194 | },
195 | {
196 | "cell_type": "markdown",
197 | "metadata": {},
198 | "source": [
199 | "#### And a few synonyms\n",
200 | "\n",
201 | "And now just a demo of a web API that I created myself a few years back. It analyzes Wikipedia to figure out different ways that people use to refer to the same entity\n",
202 | "\n"
203 | ]
204 | },
205 | {
206 | "cell_type": "code",
207 | "execution_count": null,
208 | "metadata": {
209 | "collapsed": true
210 | },
211 | "outputs": [],
212 | "source": [
213 | "!curl -s \"http://wikisynonyms.ipeirotis.com/api/Donald_Trump\" | jq ."
214 | ]
215 | },
216 | {
217 | "cell_type": "markdown",
218 | "metadata": {},
219 | "source": [
220 | "## Exercise\n",
221 | "\n",
222 | "The following websites contain listing of many useful APIs\n",
223 | "\n",
224 | "* https://www.mashape.com \n",
225 | "* http://www.programmableweb.com/\n",
226 | "* http://www.mashery.com/\n",
227 | "* http://apigee.com/ \n",
228 | "\n",
229 | "Mashape is my own personal favorite in terms of user-friendliness and also has examples directly expressed using CURL. but the others are pretty nice as well. Your task: search through these websites and find a web API that does something that you like. Use CURL to issue a web API call to this service. "
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": null,
235 | "metadata": {
236 | "collapsed": true
237 | },
238 | "outputs": [],
239 | "source": []
240 | }
241 | ],
242 | "metadata": {
243 | "kernelspec": {
244 | "display_name": "Python 3",
245 | "language": "python",
246 | "name": "python3"
247 | },
248 | "language_info": {
249 | "codemirror_mode": {
250 | "name": "ipython",
251 | "version": 3
252 | },
253 | "file_extension": ".py",
254 | "mimetype": "text/x-python",
255 | "name": "python",
256 | "nbconvert_exporter": "python",
257 | "pygments_lexer": "ipython3",
258 | "version": "3.5.2"
259 | }
260 | },
261 | "nbformat": 4,
262 | "nbformat_minor": 1
263 | }
264 |
--------------------------------------------------------------------------------
/12-UNIX_Basics/D-Running_Tasks_In_The_Background.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Creating Scripts\n",
8 | "\n",
9 | "Now, let's try to create our first \"script\", which we can execute from the shell directly.\n",
10 | "\n",
11 | "* Create a file called `get_weather.py`\n",
12 | "* Type the commands from the earlier module\n",
13 | "```python\n",
14 | " #!/usr/bin/python3\n",
15 | " import requests\n",
16 | " freegeoip_url = 'http://freegeoip.net/json/'\n",
17 | " resp = requests.get(freegeoip_url)\n",
18 | " data = resp.json()\n",
19 | " lon = data[\"longitude\"]\n",
20 | " lat = data[\"latitude\"]\n",
21 | "\n",
22 | " openweathermap_url = \"http://api.openweathermap.org/data/2.5/weather\"\n",
23 | " parameters = {\n",
24 | " 'lat' : str(lat),\n",
25 | " 'lon' : str(lon),\n",
26 | " 'units' : 'imperial',\n",
27 | " 'mode' : 'json',\n",
28 | " 'appid' : 'ffb7b9808e07c9135bdcc7d1e867253d'\n",
29 | " }\n",
30 | " resp = requests.get(openweathermap_url, params=parameters)\n",
31 | " data = resp.json()\n",
32 | " print(\"Location:\", data['name'])\n",
33 | " print(\"Weather:\", data['weather'][0]['description'])\n",
34 | " print(\"Temperature:\", data['main']['temp'])\n",
35 | "```\n",
36 | "* Finally type `python3 get_weather.py` and see what happens."
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {},
42 | "source": [
43 | "#### Exercise\n",
44 | "\n",
45 | "* Modify the script, to print the location, weather and temperature in tab-separated columns.\n",
46 | "* Modify the script, to print a header at the beginning; then put the code in an infinite loop (`while True:...`) and get the code the execute for ever. Use the `import time` and `time.sleep(...)` to add a delay of a few seconds between continuous executions of the code.\n",
47 | "* Modify the script, write the output to a file instead of print to the screen.\n"
48 | ]
49 | },
50 | {
51 | "cell_type": "markdown",
52 | "metadata": {},
53 | "source": []
54 | },
55 | {
56 | "cell_type": "markdown",
57 | "metadata": {},
58 | "source": [
59 | "### Running Jobs in the Background (until terminating the Terminal)\n",
60 | "\n",
61 | "Sometimes, we would like to start a task, and let it run in the background. To do so, we simply add the character `&` at the end of the command. For example, if we want to run our script from above and get the task to run in the background, we can type:\n",
62 | "\n",
63 | "`python3 get_weather.py &`\n",
64 | "\n",
65 | "### Running Jobs in the Background (even after terminating the Terminal)\n",
66 | "\n",
67 | "When we use the `&` operator, the task runs in the background, but stops running the moment we logout from our ssh session. To allow the task to continue running, even after we log out, we can use the `nohup` command, as follows:\n",
68 | "\n",
69 | "`nohup python3 get_weather.py &`\n",
70 | "\n",
71 | "### Redirecting the output\n",
72 | "\n",
73 | "If your script has an output on the screen, you often want to save this. To store the output of your screen to a file, when you put a script to run in the background, you use the `> filename.txt` command to store the output in the `filename.txt`. For example:\n",
74 | "\n",
75 | "`nohup python3 get_weather.py > weather.txt &`\n",
76 | "\n",
77 | "will store the outputinto the file weather.txt instead of printing on the screen."
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "metadata": {},
83 | "source": []
84 | },
85 | {
86 | "cell_type": "markdown",
87 | "metadata": {},
88 | "source": [
89 | "## Cron: Scheduling Tasks (vs running them continuously in the background)"
90 | ]
91 | },
92 | {
93 | "cell_type": "markdown",
94 | "metadata": {},
95 | "source": [
96 | "The approach that we described above assumes that the task will be running forever in the background. However, most of the time our script is waiting, executing the `time.sleep` command. \n",
97 | "\n",
98 | "Instead of having our script sleeping, we can use the **cron** command to execute desired tasks (in the background) at designated times. So, let's remove the **while True** and the **time.sleep** commands from our script, and let's see how we can use cron instead.\n",
99 | "\n",
100 | "To use cron for task, add entries to your crontab file. Start the crontab editor from a terminal window:\n",
101 | "\n",
102 | "`sudo crontab -e`\n",
103 | "\n",
104 | "A crontab is a simple text file with a list of commands meant to be run at specified times and these jobs will run regardless of whether the user is actually logged into the system. \n",
105 | "\n",
106 | "\n",
107 | "\n",
108 | "### The structure of the crontab file\n",
109 | "\n",
110 | "This is how a cron job is laid out:\n",
111 | "\n",
112 | "minute (0-59), hour (0-23, 0 = midnight), day (1-31), month (1-12), weekday (0-6, 0 = Sunday), command\n",
113 | "\n",
114 | "and each line of the crontab file has the following format:\n",
115 | "\n",
116 | "`minute hour day_of_month month day_of_week command`\n",
117 | "\n",
118 | "Each of the parts is separated by a space, with the final part (the command) having one or more spaces in it. \n",
119 | "For example, you can run your script at 5 a.m every week with:\n",
120 | "\n",
121 | "`0 5 * * 1 /usr/bin/python3 /home/ubuntu/get_temperature.py`\n",
122 | "\n",
123 | "#### More examples\n",
124 | "\n",
125 | "`01 04 1 1 1 /usr/bin/python3 /home/ubuntu/get_temperature.py`\n",
126 | "\n",
127 | "The above example will run our script at 4:01am on January 1st plus every Monday in January. An asterisk (\\*) can be used so that every instance (every hour, every weekday, every month, etc.) of a time period is used. Code:\n",
128 | "\n",
129 | "\n",
130 | "`01 04 * * * /usr/bin/python3 /home/ubuntu/get_temperature.py`\n",
131 | "\n",
132 | "The above example will run /usr/bin/somedirectory/somecommand at 4:01am on every day of every month.\n",
133 | "\n",
134 | "Comma-separated values can be used to run more than one instance of a particular command within a time period. Dash-separated values can be used to run a command continuously. For example:\n",
135 | "\n",
136 | "`01,31 04,05 1-15 1,6 * /usr/bin/python3 /home/ubuntu/get_temperature.py`\n",
137 | "\n",
138 | "The above example will run /usr/bin/somedirectory/somecommand at 01 and 31 past the hours of 4:00am and 5:00am on the 1st through the 15th of every January and June.\n",
139 | "\n",
140 | "The `/usr/bin/python3 /home/ubuntu/get_temperature.py` text in the above examples indicates the task which will be run at the specified times. It is recommended that you use the full path to the desired commands as shown in the above examples. Enter which somecommand in the terminal to find the full path to somecommand. The crontab will begin running as soon as it is properly edited and saved.\n",
141 | "\n",
142 | "(See https://help.ubuntu.com/community/CronHowto for more details)\n"
143 | ]
144 | },
145 | {
146 | "cell_type": "markdown",
147 | "metadata": {},
148 | "source": [
149 | "### Exercise\n",
150 | "\n",
151 | "* Use a cron job to keep track of the temperature in New York, running every minute. Use the redirect operator to store the temperature in a text file called /home/ubuntu/nyc-temperatures.txt, appending a new line for every measurement."
152 | ]
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": null,
157 | "metadata": {},
158 | "outputs": [],
159 | "source": []
160 | },
161 | {
162 | "cell_type": "markdown",
163 | "metadata": {},
164 | "source": [
165 | "* Use the `http://api.open-notify.org/iss-now.json` API and keep track of the location of the International Space Station (ISS) over time."
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": null,
171 | "metadata": {},
172 | "outputs": [],
173 | "source": []
174 | }
175 | ],
176 | "metadata": {
177 | "colabVersion": "0.1",
178 | "kernelspec": {
179 | "display_name": "Python 3",
180 | "language": "python",
181 | "name": "python3"
182 | },
183 | "language_info": {
184 | "codemirror_mode": {
185 | "name": "ipython",
186 | "version": 3
187 | },
188 | "file_extension": ".py",
189 | "mimetype": "text/x-python",
190 | "name": "python",
191 | "nbconvert_exporter": "python",
192 | "pygments_lexer": "ipython3",
193 | "version": "3.6.6"
194 | }
195 | },
196 | "nbformat": 4,
197 | "nbformat_minor": 1
198 | }
199 |
--------------------------------------------------------------------------------
/12-UNIX_Basics/cronhelp.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "### Cron assignment, all done from a notebook\n",
8 | "The following cells are an example of how to do the cron assignment\n",
9 | "without having to go to the terminal, using notepad etc.\n",
10 | "Everything can be done frpm notebook cells\n"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {},
16 | "source": [
17 | "#### First, start the cron daemon, to make sure it is running\n"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 17,
23 | "metadata": {},
24 | "outputs": [
25 | {
26 | "name": "stdout",
27 | "output_type": "stream",
28 | "text": [
29 | " * Starting periodic command scheduler cron\r\n",
30 | " ...done.\r\n"
31 | ]
32 | }
33 | ],
34 | "source": [
35 | "!sudo service cron start"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {},
41 | "source": [
42 | "#### Second, create the getTemp shell file by \"echoing\" the command lines into a file\n",
43 | "Note: the use of \\'s to escape some of the special characters,\n",
44 | "but first remove an existing NYC-Temperatures.txt file \n"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": 18,
50 | "metadata": {},
51 | "outputs": [],
52 | "source": [
53 | "!rm /home/nwhite/NYC-Temperatures.txt\n"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": 26,
59 | "metadata": {},
60 | "outputs": [
61 | {
62 | "name": "stdout",
63 | "output_type": "stream",
64 | "text": [
65 | "#!/bin/sh\r\n",
66 | "curl -s \"http://api.openweathermap.org/data/2.5/weather?zip=10012&mode=json&units=imperial&APPID=ffb7b9808e07c9135bdcc7d1e867253d \"|jq '.main.temp' \r\n"
67 | ]
68 | }
69 | ],
70 | "source": [
71 | "# create the getTemp file and change it's permissions tø make it readable and executable\n",
72 | "# First put in the line for the \n",
73 | "# That tells linux to interpret this line with the bourne shell\n",
74 | "\n",
75 | "!rm /home/nwhite/getTemp\n",
76 | "!echo \"#!/bin/sh\" >/home/nwhite/getTemp\n",
77 | "\n",
78 | "# Next, add the line to retrieve the temperature from openweather\n",
79 | "#\n",
80 | "# Put your APIKEY to replace the APPID below below....\n",
81 | "#\n",
82 | "# now add the call to the openweathermap api\n",
83 | "# Note that we need to surround the curl URL in escaped single quotes\n",
84 | "!echo \"curl -s \\\"http://api.openweathermap.org/data/2.5/weather?zip=10012&mode=json&units=imperial&APPID=ffb7b9808e07c9135bdcc7d1e867253d \\\"|jq '.main.temp' \" >>/home/nwhite/getTemp \n",
85 | "\n",
86 | "# change permissions\n",
87 | "!chmod a+rx /home/nwhite/getTemp\n",
88 | "# look at the file....\n",
89 | "!cat /home/nwhite/getTemp\n"
90 | ]
91 | },
92 | {
93 | "cell_type": "markdown",
94 | "metadata": {},
95 | "source": [
96 | "### We should now have a good getTemp file, so create the crontab entry ...\n",
97 | "echo the crontyab entry as input to crontab\n",
98 | "`!echo \"/10 * * * * /home/nwhite/getTemp\"|crontab`\n",
99 | "(You might start with 1 minute intervals to test)"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": 30,
105 | "metadata": {},
106 | "outputs": [
107 | {
108 | "name": "stdout",
109 | "output_type": "stream",
110 | "text": [
111 | "*/10 * * * * /home/nwhite/getTemp >>/home/nwhite/NYC-Temperatures.txt\r\n"
112 | ]
113 | }
114 | ],
115 | "source": [
116 | "!echo \"*/10 * * * * /home/nwhite/getTemp >>/home/nwhite/NYC-Temperatures.txt\"|crontab\n",
117 | "#list the crontab entry to see if it is correct\n",
118 | "!crontab -l\n"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": 31,
124 | "metadata": {},
125 | "outputs": [
126 | {
127 | "name": "stdout",
128 | "output_type": "stream",
129 | "text": [
130 | "76.5\r\n",
131 | "76.5\r\n",
132 | "76.5\r\n",
133 | "76.5\r\n",
134 | "76.5\r\n"
135 | ]
136 | }
137 | ],
138 | "source": [
139 | "### Now look at the NYC-Temperatures file (Note it may take a few minutes to be created)callable\n",
140 | "!cat /home/nwhite/NYC-Temperatures.txt\n",
141 | "# in 5 minutes, you should see an entry !!!\n"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": 32,
147 | "metadata": {},
148 | "outputs": [
149 | {
150 | "name": "stdout",
151 | "output_type": "stream",
152 | "text": [
153 | "*/10 * * * * /home/nwhite/getTemp >>/home/nwhite/NYC-Temperatures.txt\r\n"
154 | ]
155 | }
156 | ],
157 | "source": [
158 | "!crontab -l"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": 33,
164 | "metadata": {},
165 | "outputs": [
166 | {
167 | "name": "stdout",
168 | "output_type": "stream",
169 | "text": [
170 | "\u001b[0;39m76.5\u001b[0m\r\n"
171 | ]
172 | }
173 | ],
174 | "source": [
175 | "!/home/nwhite/getTemp"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "metadata": {},
182 | "outputs": [],
183 | "source": []
184 | }
185 | ],
186 | "metadata": {
187 | "kernelspec": {
188 | "display_name": "Python 3",
189 | "language": "python",
190 | "name": "python3"
191 | },
192 | "language_info": {
193 | "codemirror_mode": {
194 | "name": "ipython",
195 | "version": 3
196 | },
197 | "file_extension": ".py",
198 | "mimetype": "text/x-python",
199 | "name": "python",
200 | "nbconvert_exporter": "python",
201 | "pygments_lexer": "ipython3",
202 | "version": "3.6.5"
203 | }
204 | },
205 | "nbformat": 4,
206 | "nbformat_minor": 2
207 | }
208 |
--------------------------------------------------------------------------------
/13-Network_Analysis/README.md:
--------------------------------------------------------------------------------
1 | See also https://github.com/khof312/networks_tutorial
2 |
--------------------------------------------------------------------------------
/13-Network_Analysis/images/MySQL_scheme.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/MySQL_scheme.jpg
--------------------------------------------------------------------------------
/13-Network_Analysis/images/RDBMS_vs_GRAPHDB.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/RDBMS_vs_GRAPHDB.png
--------------------------------------------------------------------------------
/13-Network_Analysis/images/ex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/ex.png
--------------------------------------------------------------------------------
/13-Network_Analysis/images/free_movies.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/free_movies.jpg
--------------------------------------------------------------------------------
/13-Network_Analysis/images/graph.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/graph.jpg
--------------------------------------------------------------------------------
/13-Network_Analysis/images/graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/graph.png
--------------------------------------------------------------------------------
/13-Network_Analysis/images/neo4j-python.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/neo4j-python.png
--------------------------------------------------------------------------------
/13-Network_Analysis/images/new_db_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/new_db_1.jpg
--------------------------------------------------------------------------------
/13-Network_Analysis/images/new_db_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/new_db_2.jpg
--------------------------------------------------------------------------------
/13-Network_Analysis/images/scheme.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/13-Network_Analysis/images/scheme.jpg
--------------------------------------------------------------------------------
/15-Predictive_Modeling/images/digits.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/15-Predictive_Modeling/images/digits.png
--------------------------------------------------------------------------------
/15-Predictive_Modeling/images/iris_petal_sepal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/15-Predictive_Modeling/images/iris_petal_sepal.png
--------------------------------------------------------------------------------
/15-Predictive_Modeling/images/linear_regression.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/15-Predictive_Modeling/images/linear_regression.png
--------------------------------------------------------------------------------
/15-Predictive_Modeling/images/ml.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/15-Predictive_Modeling/images/ml.png
--------------------------------------------------------------------------------
/15-Predictive_Modeling/images/sklearn_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/15-Predictive_Modeling/images/sklearn_logo.png
--------------------------------------------------------------------------------
/15-Predictive_Modeling/images/svm1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/15-Predictive_Modeling/images/svm1.png
--------------------------------------------------------------------------------
/15-Predictive_Modeling/images/svm2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/15-Predictive_Modeling/images/svm2.png
--------------------------------------------------------------------------------
/16-OpenCV/OpenCV - Canny Edge Detection.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Canny Edge Detection"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {
13 | "collapsed": true
14 | },
15 | "source": [
16 | "#### Goal\n",
17 | "In this chapter, we will learn about\n",
18 | "\n",
19 | "- Concept of Canny edge detection\n",
20 | "\n",
21 | "- OpenCV functions for that : cv2.Canny()\n",
22 | "\n",
23 | "#### Theory\n",
24 | "\n",
25 | "Canny Edge Detection is a popular edge detection algorithm. It was developed by John F. Canny in 1986. It is a multi-stage algorithm and we will go through each stages.\n",
26 | "\n",
27 | "##### 1. Noise Reduction\n",
28 | "\n",
29 | "Since edge detection is susceptible to noise in the image, first step is to remove the noise in the image with a 5x5 Gaussian filter. We have already seen this in previous chapters.\n",
30 | "\n",
31 | "##### 2. Finding Intensity Gradient of the Image\n",
32 | "\n",
33 | "Smoothened image is then filtered with a Sobel kernel in both horizontal and vertical direction to get first derivative in horizontal direction (G_x) and vertical direction (G_y). From these two images, we can find edge gradient and direction for each pixel as follows:\n",
34 | "\n",
35 | "Edge\\_Gradient \\; (G) = \\sqrt{G_x^2 + G_y^2}\n",
36 | "\n",
37 | "Angle \\; (\\theta) = \\tan^{-1} \\bigg(\\frac{G_y}{G_x}\\bigg)\n",
38 | "\n",
39 | "Gradient direction is always perpendicular to edges. It is rounded to one of four angles representing vertical, horizontal and two diagonal directions.\n",
40 | "\n",
41 | "##### 3. Non-maximum Suppression \n",
42 | "\n",
43 | "After getting gradient magnitude and direction, a full scan of image is done to remove any unwanted pixels which may not constitute the edge. For this, at every pixel, pixel is checked if it is a local maximum in its neighborhood in the direction of gradient. Check the image below:"
44 | ]
45 | },
46 | {
47 | "cell_type": "markdown",
48 | "metadata": {
49 | "collapsed": false
50 | },
51 | "source": [
52 | ""
53 | ]
54 | },
55 | {
56 | "cell_type": "markdown",
57 | "metadata": {
58 | "collapsed": true
59 | },
60 | "source": [
61 | "Point A is on the edge ( in vertical direction). Gradient direction is normal to the edge. Point B and C are in gradient directions. So point A is checked with point B and C to see if it forms a local maximum. If so, it is considered for next stage, otherwise, it is suppressed ( put to zero).\n",
62 | "\n",
63 | "In short, the result you get is a binary image with “thin edges”.\n",
64 | "\n",
65 | "#### 4. Hysteresis Thresholding\n",
66 | "\n",
67 | "This stage decides which are all edges are really edges and which are not. For this, we need two threshold values, minVal and maxVal. Any edges with intensity gradient more than maxVal are sure to be edges and those below minVal are sure to be non-edges, so discarded. Those who lie between these two thresholds are classified edges or non-edges based on their connectivity. If they are connected to “sure-edge” pixels, they are considered to be part of edges. Otherwise, they are also discarded. See the image below:"
68 | ]
69 | },
70 | {
71 | "cell_type": "markdown",
72 | "metadata": {
73 | "collapsed": true
74 | },
75 | "source": [
76 | ""
77 | ]
78 | },
79 | {
80 | "cell_type": "markdown",
81 | "metadata": {},
82 | "source": [
83 | "The edge A is above the maxVal, so considered as “sure-edge”. Although edge C is below maxVal, it is connected to edge A, so that also considered as valid edge and we get that full curve. But edge B, although it is above minVal and is in same region as that of edge C, it is not connected to any “sure-edge”, so that is discarded. So it is very important that we have to select minVal and maxVal accordingly to get the correct result.\n",
84 | "\n",
85 | "This stage also removes small pixels noises on the assumption that edges are long lines.\n",
86 | "\n",
87 | "So what we finally get is strong edges in the image.\n",
88 | "\n",
89 | "#### Canny Edge Detection in OpenCV\n",
90 | "\n",
91 | "OpenCV puts all the above in single function, cv2.Canny(). We will see how to use it. First argument is our input image. Second and third arguments are our minVal and maxVal respectively. Third argument is aperture_size. It is the size of Sobel kernel used for find image gradients. By default it is 3. Last argument is L2gradient which specifies the equation for finding gradient magnitude. If it is True, it uses the equation mentioned above which is more accurate, otherwise it uses this function: Edge\\_Gradient (G) = |G_x| + |G_y|. By default, it is False."
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": 1,
97 | "metadata": {
98 | "collapsed": true
99 | },
100 | "outputs": [],
101 | "source": [
102 | "import cv2\n",
103 | "import numpy as np\n",
104 | "from matplotlib import pyplot as plt\n",
105 | "\n",
106 | "img = cv2.imread('images/test.jpg',0)\n",
107 | "edges = cv2.Canny(img,100,200)\n",
108 | "\n",
109 | "plt.subplot(121),plt.imshow(img,cmap = 'gray')\n",
110 | "plt.title('Original Image'), plt.xticks([]), plt.yticks([])\n",
111 | "plt.subplot(122),plt.imshow(edges,cmap = 'gray')\n",
112 | "plt.title('Edge Image'), plt.xticks([]), plt.yticks([])\n",
113 | "\n",
114 | "plt.show()"
115 | ]
116 | },
117 | {
118 | "cell_type": "markdown",
119 | "metadata": {},
120 | "source": [
121 | "See the result below:\n",
122 | ""
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": null,
128 | "metadata": {
129 | "collapsed": true
130 | },
131 | "outputs": [],
132 | "source": []
133 | }
134 | ],
135 | "metadata": {
136 | "kernelspec": {
137 | "display_name": "Python 2",
138 | "language": "python",
139 | "name": "python2"
140 | },
141 | "language_info": {
142 | "codemirror_mode": {
143 | "name": "ipython",
144 | "version": 2
145 | },
146 | "file_extension": ".py",
147 | "mimetype": "text/x-python",
148 | "name": "python",
149 | "nbconvert_exporter": "python",
150 | "pygments_lexer": "ipython2",
151 | "version": "2.7.3"
152 | }
153 | },
154 | "nbformat": 4,
155 | "nbformat_minor": 0
156 | }
157 |
--------------------------------------------------------------------------------
/16-OpenCV/OpenCV - Geometric Transformations of Images.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Geometric Transformations of Images"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Goals\n",
15 | "\n",
16 | "- Learn to apply different geometric transformation to images like translation, rotation, affine transformation etc.\n",
17 | "\n",
18 | "- You will see these functions: cv2.getPerspectiveTransform"
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {
24 | "collapsed": false
25 | },
26 | "source": [
27 | "#### Transformations\n",
28 | "OpenCV provides two transformation functions, cv2.warpAffine and cv2.warpPerspective, with which you can have all kinds of transformations. cv2.warpAffine takes a 2x3 transformation matrix while cv2.warpPerspective takes a 3x3 transformation matrix as input.\n",
29 | "\n",
30 | "#### Scaling\n",
31 | "Scaling is just resizing of the image. OpenCV comes with a function cv2.resize() for this purpose. The size of the image can be specified manually, or you can specify the scaling factor. Different interpolation methods are used. Preferable interpolation methods are cv2.INTER_AREA for shrinking and cv2.INTER_CUBIC (slow) & cv2.INTER_LINEAR for zooming. By default, interpolation method used is cv2.INTER_LINEAR for all resizing purposes. You can resize an input image either of following methods:"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": null,
37 | "metadata": {
38 | "collapsed": false
39 | },
40 | "outputs": [],
41 | "source": [
42 | "import cv2\n",
43 | "import numpy as np\n",
44 | "\n",
45 | "img = cv2.imread('images/opencv_test1.jpg')\n",
46 | "\n",
47 | "res = cv2.resize(img,None,fx=2, fy=2, interpolation = cv2.INTER_CUBIC)\n",
48 | "\n",
49 | "#OR\n",
50 | "\n",
51 | "height, width = img.shape[:2]\n",
52 | "res = cv2.resize(img,(2*width, 2*height), interpolation = cv2.INTER_CUBIC)"
53 | ]
54 | },
55 | {
56 | "cell_type": "markdown",
57 | "metadata": {},
58 | "source": [
59 | "#### Translation \n",
60 | "Translation is the shifting of object’s location. If you know the shift in (x,y) direction, let it be (t_x,t_y), you can create the transformation matrix \\textbf{M} as follows:\n",
61 | "\n",
62 | "M = \\begin{bmatrix} 1 & 0 & t_x \\\\ 0 & 1 & t_y \\end{bmatrix}\n",
63 | "\n",
64 | "You can take make it into a Numpy array of type np.float32 and pass it into cv2.warpAffine() function. See below example for a shift of (100,50):"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": null,
70 | "metadata": {
71 | "collapsed": true
72 | },
73 | "outputs": [],
74 | "source": [
75 | "import cv2\n",
76 | "import numpy as np\n",
77 | "\n",
78 | "img = cv2.imread('images/opencv_test1.jpg',0)\n",
79 | "rows,cols = img.shape\n",
80 | "\n",
81 | "M = np.float32([[1,0,100],[0,1,50]])\n",
82 | "dst = cv2.warpAffine(img,M,(cols,rows))\n",
83 | "\n",
84 | "cv2.imshow('img',dst)\n",
85 | "cv2.waitKey(0)\n",
86 | "cv2.destroyAllWindows()"
87 | ]
88 | },
89 | {
90 | "cell_type": "markdown",
91 | "metadata": {},
92 | "source": [
93 | "Warning\n",
94 | "\n",
95 | "Third argument of the cv2.warpAffine() function is the size of the output image, which should be in the form of (width, height). Remember width = number of columns, and height = number of rows."
96 | ]
97 | },
98 | {
99 | "cell_type": "markdown",
100 | "metadata": {},
101 | "source": [
102 | "See the result below:"
103 | ]
104 | },
105 | {
106 | "cell_type": "markdown",
107 | "metadata": {},
108 | "source": [
109 | ""
110 | ]
111 | },
112 | {
113 | "cell_type": "markdown",
114 | "metadata": {
115 | "collapsed": false
116 | },
117 | "source": [
118 | "#### Rotation\n",
119 | "Rotation of an image for an angle \\theta is achieved by the transformation matrix of the form\n",
120 | "\n",
121 | "M = \\begin{bmatrix} cos\\theta & -sin\\theta \\\\ sin\\theta & cos\\theta \\end{bmatrix}\n",
122 | "\n",
123 | "But OpenCV provides scaled rotation with adjustable center of rotation so that you can rotate at any location you prefer. Modified transformation matrix is given by\n",
124 | "\n",
125 | "\\begin{bmatrix} \\alpha & \\beta & (1- \\alpha ) \\cdot center.x - \\beta \\cdot center.y \\\\ - \\beta & \\alpha & \\beta \\cdot center.x + (1- \\alpha ) \\cdot center.y \\end{bmatrix}\n",
126 | "\n",
127 | "where:\n",
128 | "\n",
129 | "\\begin{array}{l} \\alpha = scale \\cdot \\cos \\theta , \\\\ \\beta = scale \\cdot \\sin \\theta \\end{array}\n",
130 | "\n",
131 | "To find this transformation matrix, OpenCV provides a function, cv2.getRotationMatrix2D. Check below example which rotates the image by 90 degree with respect to center without any scaling."
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {
138 | "collapsed": false
139 | },
140 | "outputs": [],
141 | "source": [
142 | "img = cv2.imread('images/opencv_test1.jpg',0)\n",
143 | "rows,cols = img.shape\n",
144 | "\n",
145 | "M = cv2.getRotationMatrix2D((cols/2,rows/2),90,1)\n",
146 | "dst = cv2.warpAffine(img,M,(cols,rows))"
147 | ]
148 | },
149 | {
150 | "cell_type": "markdown",
151 | "metadata": {},
152 | "source": [
153 | ""
154 | ]
155 | },
156 | {
157 | "cell_type": "markdown",
158 | "metadata": {},
159 | "source": [
160 | "#### Affine Transformation\n",
161 | "In affine transformation, all parallel lines in the original image will still be parallel in the output image. To find the transformation matrix, we need three points from input image and their corresponding locations in output image. Then cv2.getAffineTransform will create a 2x3 matrix which is to be passed to cv2.warpAffine.\n",
162 | "\n",
163 | "Check below example, and also look at the points I selected (which are marked in Green color):"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": null,
169 | "metadata": {
170 | "collapsed": false
171 | },
172 | "outputs": [],
173 | "source": [
174 | "img = cv2.imread('images/opencv_test1.jpg')\n",
175 | "rows,cols,ch = img.shape\n",
176 | "\n",
177 | "pts1 = np.float32([[50,50],[200,50],[50,200]])\n",
178 | "pts2 = np.float32([[10,100],[200,50],[100,250]])\n",
179 | "\n",
180 | "M = cv2.getAffineTransform(pts1,pts2)\n",
181 | "\n",
182 | "dst = cv2.warpAffine(img,M,(cols,rows))\n",
183 | "\n",
184 | "plt.subplot(121),plt.imshow(img),plt.title('Input')\n",
185 | "plt.subplot(122),plt.imshow(dst),plt.title('Output')\n",
186 | "plt.show()"
187 | ]
188 | },
189 | {
190 | "cell_type": "markdown",
191 | "metadata": {},
192 | "source": [
193 | "See the result:\n",
194 | ""
195 | ]
196 | },
197 | {
198 | "cell_type": "markdown",
199 | "metadata": {},
200 | "source": [
201 | "#### Perspective Transformation\n",
202 | "For perspective transformation, you need a 3x3 transformation matrix. Straight lines will remain straight even after the transformation. To find this transformation matrix, you need 4 points on the input image and corresponding points on the output image. Among these 4 points, 3 of them should not be collinear. Then transformation matrix can be found by the function cv2.getPerspectiveTransform. Then apply cv2.warpPerspective with this 3x3 transformation matrix.\n",
203 | "\n",
204 | "See the code below:"
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": null,
210 | "metadata": {
211 | "collapsed": false
212 | },
213 | "outputs": [],
214 | "source": [
215 | "img = cv2.imread('sudokusmall.png')\n",
216 | "rows,cols,ch = img.shape\n",
217 | "\n",
218 | "pts1 = np.float32([[56,65],[368,52],[28,387],[389,390]])\n",
219 | "pts2 = np.float32([[0,0],[300,0],[0,300],[300,300]])\n",
220 | "\n",
221 | "M = cv2.getPerspectiveTransform(pts1,pts2)\n",
222 | "\n",
223 | "dst = cv2.warpPerspective(img,M,(300,300))\n",
224 | "\n",
225 | "plt.subplot(121),plt.imshow(img),plt.title('Input')\n",
226 | "plt.subplot(122),plt.imshow(dst),plt.title('Output')\n",
227 | "plt.show()\n"
228 | ]
229 | },
230 | {
231 | "cell_type": "markdown",
232 | "metadata": {},
233 | "source": [
234 | "Result:\n",
235 | ""
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": null,
241 | "metadata": {
242 | "collapsed": true
243 | },
244 | "outputs": [],
245 | "source": []
246 | }
247 | ],
248 | "metadata": {
249 | "kernelspec": {
250 | "display_name": "Python 2",
251 | "language": "python",
252 | "name": "python2"
253 | },
254 | "language_info": {
255 | "codemirror_mode": {
256 | "name": "ipython",
257 | "version": 2
258 | },
259 | "file_extension": ".py",
260 | "mimetype": "text/x-python",
261 | "name": "python",
262 | "nbconvert_exporter": "python",
263 | "pygments_lexer": "ipython2",
264 | "version": "2.7.3"
265 | }
266 | },
267 | "nbformat": 4,
268 | "nbformat_minor": 0
269 | }
270 |
--------------------------------------------------------------------------------
/16-OpenCV/OpenCV - Histograms in OpenCV - 2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Histograms - 2: Histogram Equalization"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Goal\n",
15 | "In this section,\n",
16 | "\n",
17 | "- We will learn the concepts of histogram equalization and use it to improve the contrast of our images.\n",
18 | "\n",
19 | "### Theory\n",
20 | "So what is histogram ? You can consider histogram as a graph or plot, which gives you an overall idea about the intensity distribution of an image. It is a plot with pixel values (ranging from 0 to 255, not always) in X-axis and corresponding number of pixels in the image on Y-axis.\n",
21 | "\n",
22 | "It is just another way of understanding the image. By looking at the histogram of an image, you get intuition about contrast, brightness, intensity distribution etc of that image. Almost all image processing tools today, provides features on histogram. Below is an image from Cambridge in Color website, and I recommend you to visit the site for more details."
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {
28 | "collapsed": true
29 | },
30 | "source": [
31 | ""
32 | ]
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {},
37 | "source": [
38 | "I would recommend you to read the wikipedia page on Histogram Equalization for more details about it. It has a very good explanation with worked out examples, so that you would understand almost everything after reading that. Instead, here we will see its Numpy implementation. After that, we will see OpenCV function."
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 1,
44 | "metadata": {
45 | "collapsed": true
46 | },
47 | "outputs": [],
48 | "source": [
49 | "import cv2\n",
50 | "import numpy as np\n",
51 | "from matplotlib import pyplot as plt\n",
52 | "\n",
53 | "img = cv2.imread('images/test.jpg',0)\n",
54 | "\n",
55 | "hist,bins = np.histogram(img.flatten(),256,[0,256])\n",
56 | "\n",
57 | "cdf = hist.cumsum()\n",
58 | "cdf_normalized = cdf * hist.max()/ cdf.max()\n",
59 | "\n",
60 | "plt.plot(cdf_normalized, color = 'b')\n",
61 | "plt.hist(img.flatten(),256,[0,256], color = 'r')\n",
62 | "plt.xlim([0,256])\n",
63 | "plt.legend(('cdf','histogram'), loc = 'upper left')\n",
64 | "plt.show()"
65 | ]
66 | },
67 | {
68 | "cell_type": "markdown",
69 | "metadata": {},
70 | "source": [
71 | ""
72 | ]
73 | },
74 | {
75 | "cell_type": "markdown",
76 | "metadata": {
77 | "collapsed": true
78 | },
79 | "source": [
80 | "You can see histogram lies in brighter region. We need the full spectrum. For that, we need a transformation function which maps the input pixels in brighter region to output pixels in full region. That is what histogram equalization does.\n",
81 | "\n",
82 | "Now we find the minimum histogram value (excluding 0) and apply the histogram equalization equation as given in wiki page. But I have used here, the masked array concept array from Numpy. For masked array, all operations are performed on non-masked elements. You can read more about it from Numpy docs on masked arrays."
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": 2,
88 | "metadata": {
89 | "collapsed": true
90 | },
91 | "outputs": [],
92 | "source": [
93 | "cdf_m = np.ma.masked_equal(cdf,0)\n",
94 | "cdf_m = (cdf_m - cdf_m.min())*255/(cdf_m.max()-cdf_m.min())\n",
95 | "cdf = np.ma.filled(cdf_m,0).astype('uint8')"
96 | ]
97 | },
98 | {
99 | "cell_type": "markdown",
100 | "metadata": {
101 | "collapsed": true
102 | },
103 | "source": [
104 | "Now we have the look-up table that gives us the information on what is the output pixel value for every input pixel value. So we just apply the transform."
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": 3,
110 | "metadata": {
111 | "collapsed": true
112 | },
113 | "outputs": [],
114 | "source": [
115 | "img2 = cdf[img]"
116 | ]
117 | },
118 | {
119 | "cell_type": "markdown",
120 | "metadata": {
121 | "collapsed": true
122 | },
123 | "source": [
124 | "Now we calculate its histogram and cdf as before ( you do it) and result looks like below :"
125 | ]
126 | },
127 | {
128 | "cell_type": "markdown",
129 | "metadata": {
130 | "collapsed": true
131 | },
132 | "source": [
133 | ""
134 | ]
135 | },
136 | {
137 | "cell_type": "markdown",
138 | "metadata": {
139 | "collapsed": true
140 | },
141 | "source": [
142 | "Another important feature is that, even if the image was a darker image (instead of a brighter one we used), after equalization we will get almost the same image as we got. As a result, this is used as a “reference tool” to make all images with same lighting conditions. This is useful in many cases. For example, in face recognition, before training the face data, the images of faces are histogram equalized to make them all with same lighting conditions.\n",
143 | "\n",
144 | "### Histograms Equalization in OpenCV\n",
145 | "OpenCV has a function to do this, cv2.equalizeHist(). Its input is just grayscale image and output is our histogram equalized image.\n",
146 | "\n",
147 | "Below is a simple code snippet showing its usage for same image we used :"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": 4,
153 | "metadata": {
154 | "collapsed": false
155 | },
156 | "outputs": [
157 | {
158 | "data": {
159 | "text/plain": [
160 | "True"
161 | ]
162 | },
163 | "execution_count": 4,
164 | "metadata": {},
165 | "output_type": "execute_result"
166 | }
167 | ],
168 | "source": [
169 | "img = cv2.imread('images/test.jpg',0)\n",
170 | "equ = cv2.equalizeHist(img)\n",
171 | "res = np.hstack((img,equ)) #stacking images side-by-side\n",
172 | "cv2.imwrite('images/res.png',res)"
173 | ]
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "metadata": {},
178 | "source": [
179 | "So now you can take different images with different light conditions, equalize it and check the results.\n",
180 | "\n",
181 | "Histogram equalization is good when histogram of the image is confined to a particular region. It won’t work good in places where there is large intensity variations where histogram covers a large region, ie both bright and dark pixels are present. Please check the SOF links in Additional Resources.\n",
182 | "\n",
183 | "### CLAHE (Contrast Limited Adaptive Histogram Equalization)\n",
184 | "The first histogram equalization we just saw, considers the global contrast of the image. In many cases, it is not a good idea. For example, below image shows an input image and its result after global histogram equalization."
185 | ]
186 | },
187 | {
188 | "cell_type": "markdown",
189 | "metadata": {
190 | "collapsed": true
191 | },
192 | "source": [
193 | ""
194 | ]
195 | },
196 | {
197 | "cell_type": "markdown",
198 | "metadata": {},
199 | "source": [
200 | "It is true that the background contrast has improved after histogram equalization. But compare the face of statue in both images. We lost most of the information there due to over-brightness. It is because its histogram is not confined to a particular region as we saw in previous cases (Try to plot histogram of input image, you will get more intuition).\n",
201 | "\n",
202 | "So to solve this problem, adaptive histogram equalization is used. In this, image is divided into small blocks called “tiles” (tileSize is 8x8 by default in OpenCV). Then each of these blocks are histogram equalized as usual. So in a small area, histogram would confine to a small region (unless there is noise). If noise is there, it will be amplified. To avoid this, contrast limiting is applied. If any histogram bin is above the specified contrast limit (by default 40 in OpenCV), those pixels are clipped and distributed uniformly to other bins before applying histogram equalization. After equalization, to remove artifacts in tile borders, bilinear interpolation is applied.\n",
203 | "\n",
204 | "Below code snippet shows how to apply CLAHE in OpenCV:"
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": 5,
210 | "metadata": {
211 | "collapsed": false
212 | },
213 | "outputs": [
214 | {
215 | "data": {
216 | "text/plain": [
217 | "True"
218 | ]
219 | },
220 | "execution_count": 5,
221 | "metadata": {},
222 | "output_type": "execute_result"
223 | }
224 | ],
225 | "source": [
226 | "import numpy as np\n",
227 | "import cv2\n",
228 | "\n",
229 | "img = cv2.imread('images/test.png',0)\n",
230 | "\n",
231 | "# create a CLAHE object (Arguments are optional).\n",
232 | "clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))\n",
233 | "cl1 = clahe.apply(img)\n",
234 | "\n",
235 | "cv2.imwrite('images/clahe_2.jpg',cl1)"
236 | ]
237 | },
238 | {
239 | "cell_type": "markdown",
240 | "metadata": {},
241 | "source": [
242 | ""
243 | ]
244 | },
245 | {
246 | "cell_type": "code",
247 | "execution_count": null,
248 | "metadata": {
249 | "collapsed": true
250 | },
251 | "outputs": [],
252 | "source": []
253 | }
254 | ],
255 | "metadata": {
256 | "kernelspec": {
257 | "display_name": "Python 2",
258 | "language": "python",
259 | "name": "python2"
260 | },
261 | "language_info": {
262 | "codemirror_mode": {
263 | "name": "ipython",
264 | "version": 2
265 | },
266 | "file_extension": ".py",
267 | "mimetype": "text/x-python",
268 | "name": "python",
269 | "nbconvert_exporter": "python",
270 | "pygments_lexer": "ipython2",
271 | "version": "2.7.3"
272 | }
273 | },
274 | "nbformat": 4,
275 | "nbformat_minor": 0
276 | }
277 |
--------------------------------------------------------------------------------
/16-OpenCV/OpenCV - Image Gradients.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Image Gradients"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {
13 | "collapsed": true
14 | },
15 | "source": [
16 | "#### Goal\n",
17 | "\n",
18 | "In this chapter, we will learn to:\n",
19 | "\n",
20 | "- Find Image gradients, edges etc\n",
21 | "\n",
22 | "- We will see following functions : cv2.Sobel(), cv2.Scharr(), cv2.Laplacian() etc\n",
23 | "\n",
24 | "#### Theory\n",
25 | "\n",
26 | "OpenCV provides three types of gradient filters or High-pass filters, Sobel, Scharr and Laplacian. We will see each one of them."
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {
32 | "collapsed": false
33 | },
34 | "source": [
35 | "#### 1. Sobel and Scharr Derivatives\n",
36 | "Sobel operators is a joint Gausssian smoothing plus differentiation operation, so it is more resistant to noise. You can specify the direction of derivatives to be taken, vertical or horizontal (by the arguments, yorder and xorder respectively). You can also specify the size of kernel by the argument ksize. If ksize = -1, a 3x3 Scharr filter is used which gives better results than 3x3 Sobel filter. Please see the docs for kernels used.\n",
37 | "\n",
38 | "#### 2. Laplacian Derivatives\n",
39 | "It calculates the Laplacian of the image given by the relation, \\Delta src = \\frac{\\partial ^2{src}}{\\partial x^2} + \\frac{\\partial ^2{src}}{\\partial y^2} where each derivative is found using Sobel derivatives. If ksize = 1, then following kernel is used for filtering:\n",
40 | "\n",
41 | "kernel = \\begin{bmatrix} 0 & 1 & 0 \\\\ 1 & -4 & 1 \\\\ 0 & 1 & 0 \\end{bmatrix}\n",
42 | "\n",
43 | "#### Code\n",
44 | "Below code shows all operators in a single diagram. All kernels are of 5x5 size. Depth of output image is passed -1 to get the result in np.uint8 type."
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": 1,
50 | "metadata": {
51 | "collapsed": true
52 | },
53 | "outputs": [],
54 | "source": [
55 | "import cv2\n",
56 | "import numpy as np\n",
57 | "from matplotlib import pyplot as plt\n",
58 | "\n",
59 | "img = cv2.imread('images/test.jpg',0)\n",
60 | "\n",
61 | "laplacian = cv2.Laplacian(img,cv2.CV_64F)\n",
62 | "sobelx = cv2.Sobel(img,cv2.CV_64F,1,0,ksize=5)\n",
63 | "sobely = cv2.Sobel(img,cv2.CV_64F,0,1,ksize=5)\n",
64 | "\n",
65 | "plt.subplot(2,2,1),plt.imshow(img,cmap = 'gray')\n",
66 | "plt.title('Original'), plt.xticks([]), plt.yticks([])\n",
67 | "plt.subplot(2,2,2),plt.imshow(laplacian,cmap = 'gray')\n",
68 | "plt.title('Laplacian'), plt.xticks([]), plt.yticks([])\n",
69 | "plt.subplot(2,2,3),plt.imshow(sobelx,cmap = 'gray')\n",
70 | "plt.title('Sobel X'), plt.xticks([]), plt.yticks([])\n",
71 | "plt.subplot(2,2,4),plt.imshow(sobely,cmap = 'gray')\n",
72 | "plt.title('Sobel Y'), plt.xticks([]), plt.yticks([])\n",
73 | "\n",
74 | "plt.show()"
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {
80 | "collapsed": true
81 | },
82 | "source": [
83 | "Result:\n",
84 | ""
85 | ]
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {},
90 | "source": [
91 | "#### One Important Matter!\n",
92 | "In our last example, output datatype is cv2.CV_8U or np.uint8. But there is a slight problem with that. Black-to-White transition is taken as Positive slope (it has a positive value) while White-to-Black transition is taken as a Negative slope (It has negative value). So when you convert data to np.uint8, all negative slopes are made zero. In simple words, you miss that edge.\n",
93 | "\n",
94 | "If you want to detect both edges, better option is to keep the output datatype to some higher forms, like cv2.CV_16S, cv2.CV_64F etc, take its absolute value and then convert back to cv2.CV_8U. Below code demonstrates this procedure for a horizontal Sobel filter and difference in results."
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": 2,
100 | "metadata": {
101 | "collapsed": true
102 | },
103 | "outputs": [],
104 | "source": [
105 | "import cv2\n",
106 | "import numpy as np\n",
107 | "from matplotlib import pyplot as plt\n",
108 | "\n",
109 | "img = cv2.imread('images/test.png',0)\n",
110 | "\n",
111 | "# Output dtype = cv2.CV_8U\n",
112 | "sobelx8u = cv2.Sobel(img,cv2.CV_8U,1,0,ksize=5)\n",
113 | "\n",
114 | "# Output dtype = cv2.CV_64F. Then take its absolute and convert to cv2.CV_8U\n",
115 | "sobelx64f = cv2.Sobel(img,cv2.CV_64F,1,0,ksize=5)\n",
116 | "abs_sobel64f = np.absolute(sobelx64f)\n",
117 | "sobel_8u = np.uint8(abs_sobel64f)\n",
118 | "\n",
119 | "plt.subplot(1,3,1),plt.imshow(img,cmap = 'gray')\n",
120 | "plt.title('Original'), plt.xticks([]), plt.yticks([])\n",
121 | "plt.subplot(1,3,2),plt.imshow(sobelx8u,cmap = 'gray')\n",
122 | "plt.title('Sobel CV_8U'), plt.xticks([]), plt.yticks([])\n",
123 | "plt.subplot(1,3,3),plt.imshow(sobel_8u,cmap = 'gray')\n",
124 | "plt.title('Sobel abs(CV_64F)'), plt.xticks([]), plt.yticks([])\n",
125 | "\n",
126 | "plt.show()"
127 | ]
128 | },
129 | {
130 | "cell_type": "markdown",
131 | "metadata": {},
132 | "source": [
133 | "Check the result below:\n",
134 | ""
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": null,
140 | "metadata": {
141 | "collapsed": true
142 | },
143 | "outputs": [],
144 | "source": []
145 | }
146 | ],
147 | "metadata": {
148 | "kernelspec": {
149 | "display_name": "Python 2",
150 | "language": "python",
151 | "name": "python2"
152 | },
153 | "language_info": {
154 | "codemirror_mode": {
155 | "name": "ipython",
156 | "version": 2
157 | },
158 | "file_extension": ".py",
159 | "mimetype": "text/x-python",
160 | "name": "python",
161 | "nbconvert_exporter": "python",
162 | "pygments_lexer": "ipython2",
163 | "version": "2.7.3"
164 | }
165 | },
166 | "nbformat": 4,
167 | "nbformat_minor": 0
168 | }
169 |
--------------------------------------------------------------------------------
/16-OpenCV/OpenCV - Image Pyramids.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Image Pyramids"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {
13 | "collapsed": true
14 | },
15 | "source": [
16 | "#### Goal\n",
17 | "In this chapter,\n",
18 | "\n",
19 | "- We will learn about Image Pyramids\n",
20 | "\n",
21 | "- We will use Image pyramids to create a new fruit, “Orapple”\n",
22 | "\n",
23 | "- We will see these functions: cv2.pyrUp(), cv2.pyrDown()\n",
24 | "\n",
25 | "\n",
26 | "#### Theory\n",
27 | "Normally, we used to work with an image of constant size. But in some occassions, we need to work with images of different resolution of the same image. For example, while searching for something in an image, like face, we are not sure at what size the object will be present in the image. In that case, we will need to create a set of images with different resolution and search for object in all the images. These set of images with different resolution are called Image Pyramids (because when they are kept in a stack with biggest image at bottom and smallest image at top look like a pyramid).\n",
28 | "\n",
29 | "There are two kinds of Image Pyramids. 1) Gaussian Pyramid and 2) Laplacian Pyramids\n",
30 | "\n",
31 | "Higher level (Low resolution) in a Gaussian Pyramid is formed by removing consecutive rows and columns in Lower level (higher resolution) image. Then each pixel in higher level is formed by the contribution from 5 pixels in underlying level with gaussian weights. By doing so, a M \\times N image becomes M/2 \\times N/2 image. So area reduces to one-fourth of original area. It is called an Octave. The same pattern continues as we go upper in pyramid (ie, resolution decreases). Similarly while expanding, area becomes 4 times in each level. We can find Gaussian pyramids using cv2.pyrDown() and cv2.pyrUp() functions."
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": 3,
37 | "metadata": {
38 | "collapsed": false
39 | },
40 | "outputs": [],
41 | "source": [
42 | "import cv2\n",
43 | "img = cv2.imread('images/test.jpg')\n",
44 | "lower_reso = cv2.pyrDown(img)"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "metadata": {},
50 | "source": [
51 | "Below is the 4 levels in an image pyramid."
52 | ]
53 | },
54 | {
55 | "cell_type": "markdown",
56 | "metadata": {
57 | "collapsed": false
58 | },
59 | "source": [
60 | ""
61 | ]
62 | },
63 | {
64 | "cell_type": "markdown",
65 | "metadata": {
66 | "collapsed": true
67 | },
68 | "source": [
69 | "Now you can go down the image pyramid with cv2.pyrUp() function."
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 4,
75 | "metadata": {
76 | "collapsed": true
77 | },
78 | "outputs": [],
79 | "source": [
80 | "higher_reso2 = cv2.pyrUp(img)"
81 | ]
82 | },
83 | {
84 | "cell_type": "markdown",
85 | "metadata": {},
86 | "source": [
87 | "Remember, higher_reso2 is not equal to higher_reso, because once you decrease the resolution, you loose the information. Below image is 3 level down the pyramid created from smallest image in previous case. Compare it with original image:"
88 | ]
89 | },
90 | {
91 | "cell_type": "markdown",
92 | "metadata": {
93 | "collapsed": true
94 | },
95 | "source": [
96 | ""
97 | ]
98 | },
99 | {
100 | "cell_type": "markdown",
101 | "metadata": {},
102 | "source": [
103 | "Laplacian Pyramids are formed from the Gaussian Pyramids. There is no exclusive function for that. Laplacian pyramid images are like edge images only. Most of its elements are zeros. They are used in image compression. A level in Laplacian Pyramid is formed by the difference between that level in Gaussian Pyramid and expanded version of its upper level in Gaussian Pyramid. The three levels of a Laplacian level will look like below (contrast is adjusted to enhance the contents):"
104 | ]
105 | },
106 | {
107 | "cell_type": "markdown",
108 | "metadata": {},
109 | "source": [
110 | ""
111 | ]
112 | },
113 | {
114 | "cell_type": "markdown",
115 | "metadata": {
116 | "collapsed": true
117 | },
118 | "source": [
119 | "#### Image Blending using Pyramids\n",
120 | "One application of Pyramids is Image Blending. For example, in image stitching, you will need to stack two images together, but it may not look good due to discontinuities between images. In that case, image blending with Pyramids gives you seamless blending without leaving much data in the images. One classical example of this is the blending of two fruits, Orange and Apple. See the result now itself to understand what I am saying:"
121 | ]
122 | },
123 | {
124 | "cell_type": "markdown",
125 | "metadata": {},
126 | "source": [
127 | ""
128 | ]
129 | },
130 | {
131 | "cell_type": "markdown",
132 | "metadata": {
133 | "collapsed": true
134 | },
135 | "source": [
136 | "Please check first reference in additional resources, it has full diagramatic details on image blending, Laplacian Pyramids etc. Simply it is done as follows:\n",
137 | "\n",
138 | "1. Load the two images of apple and orange\n",
139 | "\n",
140 | "2. Find the Gaussian Pyramids for apple and orange (in this particular example, number of levels is 6)\n",
141 | "\n",
142 | "3. From Gaussian Pyramids, find their Laplacian Pyramids\n",
143 | "\n",
144 | "4. Now join the left half of apple and right half of orange in each levels of Laplacian Pyramids\n",
145 | "\n",
146 | "5. Finally from this joint image pyramids, reconstruct the original image.\n",
147 | "\n",
148 | "\n",
149 | "Below is the full code. (For sake of simplicity, each step is done separately which may take more memory. You can optimize it if you want so)."
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": null,
155 | "metadata": {
156 | "collapsed": true
157 | },
158 | "outputs": [],
159 | "source": [
160 | "import cv2\n",
161 | "import numpy as np,sys\n",
162 | "\n",
163 | "A = cv2.imread('apple.jpg')\n",
164 | "B = cv2.imread('orange.jpg')\n",
165 | "\n",
166 | "# generate Gaussian pyramid for A\n",
167 | "G = A.copy()\n",
168 | "gpA = [G]\n",
169 | "for i in xrange(6):\n",
170 | " G = cv2.pyrDown(G)\n",
171 | " gpA.append(G)\n",
172 | "\n",
173 | "# generate Gaussian pyramid for B\n",
174 | "G = B.copy()\n",
175 | "gpB = [G]\n",
176 | "for i in xrange(6):\n",
177 | " G = cv2.pyrDown(G)\n",
178 | " gpB.append(G)\n",
179 | "\n",
180 | "# generate Laplacian Pyramid for A\n",
181 | "lpA = [gpA[5]]\n",
182 | "for i in xrange(5,0,-1):\n",
183 | " GE = cv2.pyrUp(gpA[i])\n",
184 | " L = cv2.subtract(gpA[i-1],GE)\n",
185 | " lpA.append(L)\n",
186 | "\n",
187 | "# generate Laplacian Pyramid for B\n",
188 | "lpB = [gpB[5]]\n",
189 | "for i in xrange(5,0,-1):\n",
190 | " GE = cv2.pyrUp(gpB[i])\n",
191 | " L = cv2.subtract(gpB[i-1],GE)\n",
192 | " lpB.append(L)\n",
193 | "\n",
194 | "# Now add left and right halves of images in each level\n",
195 | "LS = []\n",
196 | "for la,lb in zip(lpA,lpB):\n",
197 | " rows,cols,dpt = la.shape\n",
198 | " ls = np.hstack((la[:,0:cols/2], lb[:,cols/2:]))\n",
199 | " LS.append(ls)\n",
200 | "\n",
201 | "# now reconstruct\n",
202 | "ls_ = LS[0]\n",
203 | "for i in xrange(1,6):\n",
204 | " ls_ = cv2.pyrUp(ls_)\n",
205 | " ls_ = cv2.add(ls_, LS[i])\n",
206 | "\n",
207 | "# image with direct connecting each half\n",
208 | "real = np.hstack((A[:,:cols/2],B[:,cols/2:]))\n",
209 | "\n",
210 | "cv2.imwrite('Pyramid_blending2.jpg',ls_)\n",
211 | "cv2.imwrite('Direct_blending.jpg',real)"
212 | ]
213 | }
214 | ],
215 | "metadata": {
216 | "kernelspec": {
217 | "display_name": "Python 2",
218 | "language": "python",
219 | "name": "python2"
220 | },
221 | "language_info": {
222 | "codemirror_mode": {
223 | "name": "ipython",
224 | "version": 2
225 | },
226 | "file_extension": ".py",
227 | "mimetype": "text/x-python",
228 | "name": "python",
229 | "nbconvert_exporter": "python",
230 | "pygments_lexer": "ipython2",
231 | "version": "2.7.3"
232 | }
233 | },
234 | "nbformat": 4,
235 | "nbformat_minor": 0
236 | }
237 |
--------------------------------------------------------------------------------
/16-OpenCV/OpenCV - Morphological Transformations.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Morphological Transformations"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {
13 | "collapsed": true
14 | },
15 | "source": [
16 | "#### Goal\n",
17 | "\n",
18 | "In this chapter,\n",
19 | "- We will learn different morphological operations like Erosion, Dilation, Opening, Closing etc.\n",
20 | "\n",
21 | "- We will see different functions like : cv2.erode(), cv2.dilate(), cv2.morphologyEx() etc.\n",
22 | "\n",
23 | "#### Theory\n",
24 | "Morphological transformations are some simple operations based on the image shape. It is normally performed on binary images. It needs two inputs, one is our original image, second one is called structuring element or kernel which decides the nature of operation. Two basic morphological operators are Erosion and Dilation. Then its variant forms like Opening, Closing, Gradient etc also comes into play. We will see them one-by-one with help of following image:"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {
30 | "collapsed": false
31 | },
32 | "source": [
33 | "Result:\n",
34 | ""
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {
40 | "collapsed": false
41 | },
42 | "source": [
43 | "#### 1. Erosion\n",
44 | "The basic idea of erosion is just like soil erosion only, it erodes away the boundaries of foreground object (Always try to keep foreground in white). So what does it do? The kernel slides through the image (as in 2D convolution). A pixel in the original image (either 1 or 0) will be considered 1 only if all the pixels under the kernel is 1, otherwise it is eroded (made to zero).\n",
45 | "\n",
46 | "So what happends is that, all the pixels near boundary will be discarded depending upon the size of kernel. So the thickness or size of the foreground object decreases or simply white region decreases in the image. It is useful for removing small white noises (as we have seen in colorspace chapter), detach two connected objects etc.\n",
47 | "\n",
48 | "Here, as an example, I would use a 5x5 kernel with full of ones. Let’s see it how it works:"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": null,
54 | "metadata": {
55 | "collapsed": true
56 | },
57 | "outputs": [],
58 | "source": [
59 | "import cv2\n",
60 | "import numpy as np\n",
61 | "\n",
62 | "img = cv2.imread('j.png',0)\n",
63 | "kernel = np.ones((5,5),np.uint8)\n",
64 | "erosion = cv2.erode(img,kernel,iterations = 1)"
65 | ]
66 | },
67 | {
68 | "cell_type": "markdown",
69 | "metadata": {
70 | "collapsed": true
71 | },
72 | "source": [
73 | "Result:\n",
74 | ""
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {},
80 | "source": [
81 | "#### 2. Dilation\n",
82 | "It is just opposite of erosion. Here, a pixel element is ‘1’ if atleast one pixel under the kernel is ‘1’. So it increases the white region in the image or size of foreground object increases. Normally, in cases like noise removal, erosion is followed by dilation. Because, erosion removes white noises, but it also shrinks our object. So we dilate it. Since noise is gone, they won’t come back, but our object area increases. It is also useful in joining broken parts of an object."
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": null,
88 | "metadata": {
89 | "collapsed": true
90 | },
91 | "outputs": [],
92 | "source": [
93 | "dilation = cv2.dilate(img,kernel,iterations = 1)"
94 | ]
95 | },
96 | {
97 | "cell_type": "markdown",
98 | "metadata": {},
99 | "source": [
100 | "Result :\n",
101 | ""
102 | ]
103 | },
104 | {
105 | "cell_type": "markdown",
106 | "metadata": {
107 | "collapsed": false
108 | },
109 | "source": [
110 | "#### 3. Opening\n",
111 | "Opening is just another name of erosion followed by dilation. It is useful in removing noise, as we explained above. Here we use the function, cv2.morphologyEx()"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": null,
117 | "metadata": {
118 | "collapsed": false
119 | },
120 | "outputs": [],
121 | "source": [
122 | "opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)"
123 | ]
124 | },
125 | {
126 | "cell_type": "markdown",
127 | "metadata": {},
128 | "source": [
129 | "Result :\n",
130 | ""
131 | ]
132 | },
133 | {
134 | "cell_type": "markdown",
135 | "metadata": {},
136 | "source": [
137 | "#### 4. Closing\n",
138 | "Closing is reverse of Opening, Dilation followed by Erosion. It is useful in closing small holes inside the foreground objects, or small black points on the object."
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": null,
144 | "metadata": {
145 | "collapsed": false
146 | },
147 | "outputs": [],
148 | "source": [
149 | "closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)"
150 | ]
151 | },
152 | {
153 | "cell_type": "markdown",
154 | "metadata": {
155 | "collapsed": true
156 | },
157 | "source": [
158 | "Result:\n",
159 | "\n",
160 | ""
161 | ]
162 | },
163 | {
164 | "cell_type": "markdown",
165 | "metadata": {},
166 | "source": [
167 | "#### 5. Morphological Gradient\n",
168 | "It is the difference between dilation and erosion of an image.\n",
169 | "\n",
170 | "The result will look like the outline of the object."
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": null,
176 | "metadata": {
177 | "collapsed": true
178 | },
179 | "outputs": [],
180 | "source": [
181 | "gradient = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, kernel)"
182 | ]
183 | },
184 | {
185 | "cell_type": "markdown",
186 | "metadata": {
187 | "collapsed": true
188 | },
189 | "source": [
190 | "Result:\n",
191 | "\n",
192 | ""
193 | ]
194 | },
195 | {
196 | "cell_type": "markdown",
197 | "metadata": {},
198 | "source": [
199 | "#### 6. Top Hat\n",
200 | "It is the difference between input image and Opening of the image. Below example is done for a 9x9 kernel."
201 | ]
202 | },
203 | {
204 | "cell_type": "code",
205 | "execution_count": null,
206 | "metadata": {
207 | "collapsed": true
208 | },
209 | "outputs": [],
210 | "source": [
211 | "tophat = cv2.morphologyEx(img, cv2.MORPH_TOPHAT, kernel)"
212 | ]
213 | },
214 | {
215 | "cell_type": "markdown",
216 | "metadata": {},
217 | "source": [
218 | "Result:\n",
219 | "\n",
220 | ""
221 | ]
222 | },
223 | {
224 | "cell_type": "markdown",
225 | "metadata": {},
226 | "source": [
227 | "#### 7. Black Hat\n",
228 | "It is the difference between the closing of the input image and input image."
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": null,
234 | "metadata": {
235 | "collapsed": true
236 | },
237 | "outputs": [],
238 | "source": [
239 | "blackhat = cv2.morphologyEx(img, cv2.MORPH_BLACKHAT, kernel)"
240 | ]
241 | },
242 | {
243 | "cell_type": "markdown",
244 | "metadata": {},
245 | "source": [
246 | "Result:\n",
247 | "\n",
248 | ""
249 | ]
250 | },
251 | {
252 | "cell_type": "markdown",
253 | "metadata": {},
254 | "source": [
255 | "#### Structuring Element\n",
256 | "We manually created a structuring elements in the previous examples with help of Numpy. It is rectangular shape. But in some cases, you may need elliptical/circular shaped kernels. So for this purpose, OpenCV has a function, cv2.getStructuringElement(). You just pass the shape and size of the kernel, you get the desired kernel."
257 | ]
258 | },
259 | {
260 | "cell_type": "code",
261 | "execution_count": null,
262 | "metadata": {
263 | "collapsed": true
264 | },
265 | "outputs": [],
266 | "source": [
267 | "# Rectangular Kernel\n",
268 | ">>> cv2.getStructuringElement(cv2.MORPH_RECT,(5,5))\n",
269 | "array([[1, 1, 1, 1, 1],\n",
270 | " [1, 1, 1, 1, 1],\n",
271 | " [1, 1, 1, 1, 1],\n",
272 | " [1, 1, 1, 1, 1],\n",
273 | " [1, 1, 1, 1, 1]], dtype=uint8)\n",
274 | "\n",
275 | "# Elliptical Kernel\n",
276 | ">>> cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))\n",
277 | "array([[0, 0, 1, 0, 0],\n",
278 | " [1, 1, 1, 1, 1],\n",
279 | " [1, 1, 1, 1, 1],\n",
280 | " [1, 1, 1, 1, 1],\n",
281 | " [0, 0, 1, 0, 0]], dtype=uint8)\n",
282 | "\n",
283 | "# Cross-shaped Kernel\n",
284 | ">>> cv2.getStructuringElement(cv2.MORPH_CROSS,(5,5))\n",
285 | "array([[0, 0, 1, 0, 0],\n",
286 | " [0, 0, 1, 0, 0],\n",
287 | " [1, 1, 1, 1, 1],\n",
288 | " [0, 0, 1, 0, 0],\n",
289 | " [0, 0, 1, 0, 0]], dtype=uint8)"
290 | ]
291 | }
292 | ],
293 | "metadata": {
294 | "kernelspec": {
295 | "display_name": "Python 2",
296 | "language": "python",
297 | "name": "python2"
298 | },
299 | "language_info": {
300 | "codemirror_mode": {
301 | "name": "ipython",
302 | "version": 2
303 | },
304 | "file_extension": ".py",
305 | "mimetype": "text/x-python",
306 | "name": "python",
307 | "nbconvert_exporter": "python",
308 | "pygments_lexer": "ipython2",
309 | "version": "2.7.3"
310 | }
311 | },
312 | "nbformat": 4,
313 | "nbformat_minor": 0
314 | }
315 |
--------------------------------------------------------------------------------
/18-Elastic/datasets/airports.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:e448270d8affa34d4c9d63df6fa68efa39d96f30c6b42ea59801b48a6c06fcf8
3 | size 937067
4 |
--------------------------------------------------------------------------------
/18-Elastic/datasets/movie_metadata.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:ee65e153a601b2fe6ff4f4db87cabf715d304635bb7a662a0f7fd6db21c621bc
3 | size 1494688
4 |
--------------------------------------------------------------------------------
/18-Elastic/datasets/table.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:ff00fe10e7cf76cb612803671844233f51f667ba3fcc0446ebe8a3f536a14898
3 | size 91149
4 |
--------------------------------------------------------------------------------
/18-Elastic/images/01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/01.png
--------------------------------------------------------------------------------
/18-Elastic/images/download.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/download.jpg
--------------------------------------------------------------------------------
/18-Elastic/images/elastic_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/elastic_logo.png
--------------------------------------------------------------------------------
/18-Elastic/images/g01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g01.png
--------------------------------------------------------------------------------
/18-Elastic/images/g02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g02.png
--------------------------------------------------------------------------------
/18-Elastic/images/g03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g03.png
--------------------------------------------------------------------------------
/18-Elastic/images/g04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g04.png
--------------------------------------------------------------------------------
/18-Elastic/images/g05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g05.png
--------------------------------------------------------------------------------
/18-Elastic/images/g06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g06.png
--------------------------------------------------------------------------------
/18-Elastic/images/g07.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g07.png
--------------------------------------------------------------------------------
/18-Elastic/images/g08.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g08.png
--------------------------------------------------------------------------------
/18-Elastic/images/g09.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g09.png
--------------------------------------------------------------------------------
/18-Elastic/images/g10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g10.png
--------------------------------------------------------------------------------
/18-Elastic/images/g11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g11.png
--------------------------------------------------------------------------------
/18-Elastic/images/g12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/g12.png
--------------------------------------------------------------------------------
/18-Elastic/images/k01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/k01.png
--------------------------------------------------------------------------------
/18-Elastic/images/k02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/k02.png
--------------------------------------------------------------------------------
/18-Elastic/images/k03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/k03.png
--------------------------------------------------------------------------------
/18-Elastic/images/k04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/k04.png
--------------------------------------------------------------------------------
/18-Elastic/images/k05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/k05.png
--------------------------------------------------------------------------------
/18-Elastic/images/k06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/k06.png
--------------------------------------------------------------------------------
/18-Elastic/images/k07.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/k07.png
--------------------------------------------------------------------------------
/18-Elastic/images/l01_ex01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/l01_ex01.png
--------------------------------------------------------------------------------
/18-Elastic/images/l01_ex03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/l01_ex03.png
--------------------------------------------------------------------------------
/18-Elastic/images/l02_ex01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/l02_ex01.png
--------------------------------------------------------------------------------
/18-Elastic/images/l02_ex03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/l02_ex03.png
--------------------------------------------------------------------------------
/18-Elastic/images/solr_vs_elasticsearch.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/18-Elastic/images/solr_vs_elasticsearch.jpg
--------------------------------------------------------------------------------
/21-Slack/S1-Slack_GetPermissions.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Getting the Access Code for Slack\n",
8 | "\n",
9 | "The code below illustrates how we connect to the Slack API to request an authorization token for our app. Remember that we have to register our app with Slack first, and get the `client_id`.\n",
10 | "\n",
11 | "#### Creating a Slack App\n",
12 | "\n",
13 | "* Go to https://api.slack.com/apps and create your app. You will need the \"Client ID\" and the \"Client Secret\" that is created for you.\n",
14 | "* Select the **\"OAuth & Permissions\"** tab from the left-hand side and add a \"Redirect URL\" for your app. The redirect URL ensures (for security) that the app can only talk to your own web server. Add `http://:5000/slack` as your redirect URL.\n",
15 | "* Select the \"Bot Users\" tab from the left-hand side and add a bot username for your app."
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {},
21 | "source": [
22 | "\n",
23 | "#### Get the Web Server up and running\n",
24 | "\n",
25 | "* See the Python Script `webserver.py` that is in this folder. \n",
26 | "* **IMPORTANT**: Modify the CLIENT_ID, CLIENT_SECRET, and REDIRECT variables in `slack_app.json` to match those of your own Slack app.\n",
27 | "* You now need to start the server, so that it can receive the authentication code for the user. "
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": null,
33 | "metadata": {},
34 | "outputs": [],
35 | "source": [
36 | "# the command below will run the webserver from the notebook\n",
37 | "# Notice that the server is configured to stop running \n",
38 | "# after receiving the first authorization grant and storing the access token\n",
39 | "%run webserver.py"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": null,
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "# Alternatively, you can launch the webserver from the Terminal issuing the command\n",
49 | "#\n",
50 | "# python3 webserver.py\n"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": null,
56 | "metadata": {},
57 | "outputs": [],
58 | "source": [
59 | "# Alternatively, if you want to run the server in the background, type:\n",
60 | "#\n",
61 | "# nohup ./webserver.py &\n",
62 | "#\n",
63 | "# which will put the server to run in the background\n",
64 | "#\n",
65 | "# If you need to stop the background server, you can isssue the following\n",
66 | "# command from the terminal, which will stop any process that contains \n",
67 | "# `WebServer` as part of its name\n",
68 | "#\n",
69 | "# kill $(pgrep WebServer)"
70 | ]
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "metadata": {},
75 | "source": [
76 | "Now, we are ready to execute the authentication flow, which is illustrated in the picture below."
77 | ]
78 | },
79 | {
80 | "cell_type": "markdown",
81 | "metadata": {},
82 | "source": [
83 | ""
84 | ]
85 | },
86 | {
87 | "cell_type": "markdown",
88 | "metadata": {},
89 | "source": [
90 | "#### Launch the user authentication process (Steps 1-4 in the picture above)\n",
91 | "\n",
92 | "* Now to go `http://:5000/install` and click the \"Install Slack Bot\" URL. You will be asked to login to your Slack account, and grant permissions to the bot to use your account credentials/permissions. This is the Step 1 in the picture above.\n",
93 | "\n",
94 | "* Once you grant permissions, then then the Slack server will call the `http://:5000/slack` URL and send the authentication code to the redirect URL. This is Step 2 in the picture above.\n",
95 | "\n",
96 | "* At that point, out web server, will call back Slack Service API saying \"hey, I got the code, here is the verification (my `client_secret`) that I am indeed the correct app. Can you send me the access token for the user?\" (See the `WebServer.py` code, under the \"/slack\" route.) This is Step 3 in the picture above.\n",
97 | "\n",
98 | "* The Slack service API will send us back the \"access token\" for the authenticated user. We will save this in the file `slack_secret.json`. This is Step 4 in the picture above.\n",
99 | "\n",
100 | "* The webserver will stop running after a successful authorization, and the cell above will stop showing a `[*]`"
101 | ]
102 | },
103 | {
104 | "cell_type": "markdown",
105 | "metadata": {},
106 | "source": [
107 | "#### Done with the authentication, the authentication data is now stored locally.\n",
108 | "\n",
109 | "After we authorize the app, our web server has received from Slack the authentication token, and it was stored in the `slack_secret.json` file. Now, we are ready to proceed with Steps 5 and 6, which are the regular API calls to the Slack API."
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": null,
115 | "metadata": {},
116 | "outputs": [],
117 | "source": [
118 | "import json\n",
119 | "\n",
120 | "# Read the access token from the file\n",
121 | "OAUTH_FILE = 'slack_secret.json'\n",
122 | "f = open(OAUTH_FILE, 'r') \n",
123 | "content = f.read()\n",
124 | "f.close()\n",
125 | "auth_info = json.loads(content)\n",
126 | "\n",
127 | "auth_info"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 1,
133 | "metadata": {
134 | "collapsed": true
135 | },
136 | "outputs": [
137 | {
138 | "name": "stdout",
139 | "output_type": "stream",
140 | "text": [
141 | "Requirement already satisfied: slackclient in /usr/local/lib/python3.5/dist-packages\n",
142 | "Requirement already satisfied: websocket-client<1.0a0,>=0.35 in /usr/local/lib/python3.5/dist-packages (from slackclient)\n",
143 | "Requirement already satisfied: requests<3.0a0,>=2.11 in /usr/local/lib/python3.5/dist-packages (from slackclient)\n",
144 | "Requirement already satisfied: six<2.0a0,>=1.10 in /usr/local/lib/python3.5/dist-packages (from slackclient)\n",
145 | "Requirement already satisfied: urllib3<1.23,>=1.21.1 in /usr/local/lib/python3.5/dist-packages (from requests<3.0a0,>=2.11->slackclient)\n",
146 | "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.5/dist-packages (from requests<3.0a0,>=2.11->slackclient)\n",
147 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.5/dist-packages (from requests<3.0a0,>=2.11->slackclient)\n",
148 | "Requirement already satisfied: idna<2.7,>=2.5 in /usr/local/lib/python3.5/dist-packages (from requests<3.0a0,>=2.11->slackclient)\n"
149 | ]
150 | }
151 | ],
152 | "source": [
153 | "# We will just use the library to test that the code that we get back\n",
154 | "# allows us to connect to the Slack API\n",
155 | "!sudo -H python3 -m pip install slackclient"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": null,
161 | "metadata": {},
162 | "outputs": [],
163 | "source": [
164 | "from slackclient import SlackClient\n",
165 | "sc = SlackClient(auth_info[\"access_token\"])"
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": null,
171 | "metadata": {},
172 | "outputs": [],
173 | "source": [
174 | "response = sc.api_call(\"users.info\", user=auth_info[\"user_id\"])\n",
175 | "user = response['user']"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "metadata": {},
182 | "outputs": [],
183 | "source": [
184 | "print(\"The username of the authenticated user is\", user.get('name'))\n",
185 | "print(\"The email of the authenticated user is\", user.get('profile').get('email'))\n",
186 | "print(\"The email of the authenticated user is\", user.get('profile').get('real_name'))"
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": null,
192 | "metadata": {
193 | "collapsed": true
194 | },
195 | "outputs": [],
196 | "source": []
197 | }
198 | ],
199 | "metadata": {
200 | "kernelspec": {
201 | "display_name": "Python 3",
202 | "language": "python",
203 | "name": "python3"
204 | },
205 | "language_info": {
206 | "codemirror_mode": {
207 | "name": "ipython",
208 | "version": 3
209 | },
210 | "file_extension": ".py",
211 | "mimetype": "text/x-python",
212 | "name": "python",
213 | "nbconvert_exporter": "python",
214 | "pygments_lexer": "ipython3",
215 | "version": "3.5.2"
216 | }
217 | },
218 | "nbformat": 4,
219 | "nbformat_minor": 1
220 | }
221 |
--------------------------------------------------------------------------------
/21-Slack/S2-Slack-EventProcessing.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Building a Chat Bot\n",
8 | "\n",
9 | "In the examples below, we will build a very simple application that creates a simple chat bot. The bot is monitoring the actions that are happening on Slack, and posts some acknowledgement messages every time someone types a message.\n",
10 | "\n",
11 | "\n",
12 | "#### Authentication\n",
13 | "\n",
14 | "We start first by authenticating our application. (See notes in S1). We read the `slack_secret.json` file, and we instantiate then our SlackClient."
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": null,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "# Read the access token from the file\n",
24 | "import json\n",
25 | "\n",
26 | "secrets_file = 'slack_secret.json'\n",
27 | "f = open(secrets_file, 'r') \n",
28 | "content = f.read()\n",
29 | "f.close()\n",
30 | "\n",
31 | "auth_info = json.loads(content)\n",
32 | "token = auth_info[\"access_token\"]"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": null,
38 | "metadata": {},
39 | "outputs": [],
40 | "source": [
41 | "# If you happen not to have the slackclient library installed, \n",
42 | "# you can uncomment and run the code below\n",
43 | "!sudo -H python3 -m pip install -U slackclient"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": null,
49 | "metadata": {},
50 | "outputs": [],
51 | "source": [
52 | "from slackclient import SlackClient\n",
53 | "sc = SlackClient(token)\n",
54 | "response = sc.api_call(\"users.info\", user=auth_info[\"user_id\"])\n",
55 | "user = response['user']\n",
56 | "print(\"The username of the authenticated user is\", user.get('name'))\n",
57 | "print(\"The email of the authenticated user is\", user.get('profile').get('email'))\n",
58 | "print(\"The email of the authenticated user is\", user.get('profile').get('real_name'))"
59 | ]
60 | },
61 | {
62 | "cell_type": "markdown",
63 | "metadata": {},
64 | "source": [
65 | "### Implementing the bot\n",
66 | "\n",
67 | "The code below continuously monitors the various events on Slack. Of course, the bot can be programmed to react to any type of event. In this particular example, the bot monitors for user messages, and posts a \"thank you\" note to each user into the #bots channel."
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": null,
73 | "metadata": {},
74 | "outputs": [],
75 | "source": [
76 | "import time\n",
77 | "import re\n",
78 | "\n",
79 | "if sc.rtm_connect():\n",
80 | " # We are going to be polling the Slack API for recent events continuously\n",
81 | " while True:\n",
82 | " # We are going to wait 1 second between monitoring attempts\n",
83 | " time.sleep(1)\n",
84 | " # If there are any new events, we will get a response. If there are no events, the response will be empty\n",
85 | " response = sc.rtm_read()\n",
86 | " for item in response:\n",
87 | " event_type = item.get(\"type\")\n",
88 | " # If the event is a message and the message is written by a user (and not a bot)\n",
89 | " if event_type == 'message' and item.get(\"user\")!=None:\n",
90 | " print(item)\n",
91 | " print(\"=========================\")\n",
92 | " message = \"Thank you user {u} for participating in channel {c}\".format(u=item[\"user\"], c=item[\"channel\"])\n",
93 | " sc.api_call(\"chat.postMessage\", channel=\"#bots\", text=message)"
94 | ]
95 | },
96 | {
97 | "cell_type": "markdown",
98 | "metadata": {
99 | "collapsed": true
100 | },
101 | "source": [
102 | "#### Exercises \n",
103 | "\n",
104 | "* Change the code above to retrieve the actual user name of the user, instead of the user id\n",
105 | "* Chagne the code above to retrieve the actual name of the channel instead of the channel id\n",
106 | "* Change the code above to post the answer to the channel where the user posted, instead of the '#bots' channel"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": null,
112 | "metadata": {},
113 | "outputs": [],
114 | "source": []
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": null,
119 | "metadata": {},
120 | "outputs": [],
121 | "source": []
122 | }
123 | ],
124 | "metadata": {
125 | "kernelspec": {
126 | "display_name": "Python 3",
127 | "language": "python",
128 | "name": "python3"
129 | },
130 | "language_info": {
131 | "codemirror_mode": {
132 | "name": "ipython",
133 | "version": 3
134 | },
135 | "file_extension": ".py",
136 | "mimetype": "text/x-python",
137 | "name": "python",
138 | "nbconvert_exporter": "python",
139 | "pygments_lexer": "ipython3",
140 | "version": "3.5.2"
141 | }
142 | },
143 | "nbformat": 4,
144 | "nbformat_minor": 1
145 | }
146 |
--------------------------------------------------------------------------------
/21-Slack/images/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/1.jpg
--------------------------------------------------------------------------------
/21-Slack/images/10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/10.jpg
--------------------------------------------------------------------------------
/21-Slack/images/11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/11.jpg
--------------------------------------------------------------------------------
/21-Slack/images/12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/12.jpg
--------------------------------------------------------------------------------
/21-Slack/images/13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/13.jpg
--------------------------------------------------------------------------------
/21-Slack/images/14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/14.jpg
--------------------------------------------------------------------------------
/21-Slack/images/15.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/15.jpg
--------------------------------------------------------------------------------
/21-Slack/images/16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/16.jpg
--------------------------------------------------------------------------------
/21-Slack/images/17.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/17.jpg
--------------------------------------------------------------------------------
/21-Slack/images/18.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/18.jpg
--------------------------------------------------------------------------------
/21-Slack/images/19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/19.jpg
--------------------------------------------------------------------------------
/21-Slack/images/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/2.jpg
--------------------------------------------------------------------------------
/21-Slack/images/20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/20.jpg
--------------------------------------------------------------------------------
/21-Slack/images/21.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/21.jpg
--------------------------------------------------------------------------------
/21-Slack/images/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/3.jpg
--------------------------------------------------------------------------------
/21-Slack/images/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/4.jpg
--------------------------------------------------------------------------------
/21-Slack/images/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/5.jpg
--------------------------------------------------------------------------------
/21-Slack/images/6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/6.jpg
--------------------------------------------------------------------------------
/21-Slack/images/7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/7.jpg
--------------------------------------------------------------------------------
/21-Slack/images/8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/8.jpg
--------------------------------------------------------------------------------
/21-Slack/images/9.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ipeirotis/dealing_with_data/7eb15bdca8c475fe13afef7418e6a71b46d7cd61/21-Slack/images/9.jpg
--------------------------------------------------------------------------------
/21-Slack/slack_app.json:
--------------------------------------------------------------------------------
1 | {
2 | "CLIENT_ID" : "PUT_YOUR_OWN_CLIENT_ID",
3 | "CLIENT_SECRET" : "PUT_YOUR_OWN_CLIENT_SECRET",
4 | "REDIRECT" : "PUT_YOUR_OWN_REDIRECT_URL",
5 | "PERMISSIONS" : "client"
6 | }
7 |
--------------------------------------------------------------------------------
/21-Slack/slack_secret.json:
--------------------------------------------------------------------------------
1 | {"ok":false,"error":"invalid_code"}
2 |
--------------------------------------------------------------------------------
/21-Slack/templates/install_slack_app.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
12 |
13 |
--------------------------------------------------------------------------------
/21-Slack/webserver.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Flask is a webserver library
4 | from flask import Flask, request, render_template
5 |
6 | # We will use the requests library to to issue a request to Slack
7 | # and the json library to parse it
8 | import requests
9 | import json
10 |
11 | SLACK_URL = "https://slack.com/oauth/authorize"
12 |
13 | # Edit this file to add your own client details in the slack_app.json file
14 | CONFIG_FILE = 'slack_app.json'
15 | # This is the location where we will store the authentication data from Slack
16 | OAUTH_FILE = 'slack_secret.json'
17 |
18 | # Initialize the Flask web server
19 | # We create a folder "plots" where we are going to store
20 | # plots to post them (later on) as messages to Slack channels
21 | webserver = Flask("SlackOAuth", static_folder='plots')
22 |
23 | # This URL will just have a link that the user clicks to install
24 | # the Slack bot
25 | @webserver.route("/install")
26 | def install_bot():
27 | url = (SLACK_URL +
28 | '?response_type=code' +
29 | '&client_id='+ CLIENT_ID +
30 | '&scope=' + PERMISSIONS +
31 | '&redirect_uri=' + REDIRECT )
32 |
33 | return render_template("install_slack_app.html", url=url)
34 |
35 | # This is the place where the webserver will receive the call from Slack
36 | # The call from Slack will have a parameter "code"
37 | @webserver.route("/slack")
38 | def oauth_helper():
39 | code = request.args.get('code')
40 |
41 | # Now that we got the code
42 | # we request the access token from Slask. Notice that we
43 | # use the client_secret to prove that the app is the real one
44 | # that was registered with the Slack API
45 | url = "https://slack.com/api/oauth.access"
46 | params = {"grant_type": "authorization_code",
47 | "client_id": CLIENT_ID,
48 | "client_secret": CLIENT_SECRET,
49 | "code": code,
50 | "redirect_uri": REDIRECT}
51 | resp = requests.get(url, params=params)
52 | data = json.loads(resp.text)
53 |
54 | # We store the code in a file as the webserver does not interact with the
55 | # rest of the Python code, and we also want to reuse the code in the future
56 | # (Typically, we would store the access_token in a database.)
57 | f = open(OAUTH_FILE, 'w') # Store the code as a file
58 | f.write(resp.text + '\n')
59 | f.close()
60 |
61 | # If we start the server just to get the code, it is safe (and convenient)
62 | # to shut down the web server after this request.
63 | # stop_server()
64 |
65 | # What we return here has no real impact on the functionality of the code
66 | # Normally, we would just redirect the user to a "Thank you" page.
67 | return 'Code: '+code+'
Response:'+resp.text+''
68 |
69 | def stop_server():
70 | shutdown_after_request = request.environ.get('werkzeug.server.shutdown')
71 | shutdown_after_request()
72 | return
73 |
74 | # This allows us to server files (in our case, images)
75 | # that we create on the server.
76 | @webserver.route('/plots/')
77 | def static_proxy(path):
78 | return webserver.send_static_file(path)
79 |
80 |
81 | if __name__ == '__main__':
82 |
83 | # We open the CONFIG file here and read the details for the app
84 | f = open(CONFIG_FILE, 'r')
85 | content = f.read()
86 | f.close()
87 | config= json.loads(content)
88 | CLIENT_ID = config['CLIENT_ID']
89 | CLIENT_SECRET = config['CLIENT_SECRET']
90 | REDIRECT = config['REDIRECT']
91 | PERMISSIONS = config['PERMISSIONS']
92 | webserver.run(host='0.0.0.0', port=5000, debug=True)
93 |
94 |
95 |
96 |
97 |
--------------------------------------------------------------------------------
/COURSES.md:
--------------------------------------------------------------------------------
1 | ## Related Courses
2 | * [Data Analytics for Business](http://inseaddataanalytics.github.io/INSEADAnalytics/), [Projects](http://inseaddataanalytics.github.io/INSEADAnalytics/ProjectsMenu.html)
3 | * [Udacity Intro to Data Science](https://www.udacity.com/course/ud359)
4 | * [Udacity Data Wrangling with MongoDB](https://www.udacity.com/course/ud032)
5 | * [Harvard CS 109](http://cs109.org/schedule.php)
6 | * [CU Boulder Research Computing Fall 2013 meetup](https://github.com/ResearchComputing/Meetup-Fall-2013)
7 | * [Udacity Exploratory Data Analysis](https://www.udacity.comcourse/ud651)
8 | * [USC COMM 620 Data Retrieval and Processing Techniques](http://www-bcf.usc.edu/~ljian/courses/large_data_syllabus.html)
9 | * [NYU/Stern Practical Data Science](http://jattenberg.github.io/PDS-Fall-2013/)
10 | * [Columbia Data Science](http://columbiadatascience.com/2012/08/29/syllabus/)
11 | * [UW-IS school Introduction to Data Science](http://www.jblumenstock.com/teaching/course=infx598)
12 | * [MIT’s How to Process, Analyze and Visualize Data](http://dataiap.github.io/dataiap/) [github](https://github.com/dataiap/dataiap)
13 | * [MIT’s ASCII to Answers](http://db.csail.mit.edu/6.885/) [github](https://github.com/mitdbg/asciiclass)
14 |
15 |
--------------------------------------------------------------------------------
/DATA_SOURCES.md:
--------------------------------------------------------------------------------
1 | ## Datasets on BigQuery
2 |
3 | * [NYU datasets](https://console.cloud.google.com/bigquery?invt=AbzacQ&project=nyu-datasets&inv=1)
4 | * Citibike
5 | * DOH restaurant inspection data
6 | * Facebook
7 | * IMDB
8 | * NYPD complaints
9 | * ...(others)
10 | * [Public Datsets on BigQuery](https://console.cloud.google.com/bigquery(cameo:browse)?invt=AbzacQ&project=bigquery-public-data&filter=solution-type:dataset)
11 | * [Wharton Research Data Services (WRDS)](https://wrds-www.wharton.upenn.edu/)
12 |
13 | ## Interesting data sources
14 | * [Public Datasets on Google BigQuery](https://console.cloud.google.com/marketplace/browse;page=1?filter=solution-type:dataset&filter=price:free&hl=en-GB&project=nyu-datasets)
15 | * Yelp APIs: [Yelp Fusion](https://www.yelp.com/developers/documentation/v3) and [Yelp GraphQL](https://www.yelp.com/developers/graphql/guides/intro)
16 | * [US Census API](https://www.census.gov/data/developers/data-sets.html) and [US Census Data](http://www.census.gov/data.html)
17 | * [Twitter API](https://developer.twitter.com/en/docs.html)
18 | * [Spotify API](https://developer.spotify.com/documentation/web-api/)
19 | * [NYC Open Data](https://data.cityofnewyork.us/)
20 | * [U.S. Government’s open data](https://www.data.gov/)
21 | * [The New York Times Data APIs](http://developer.nytimes.com/docs)
22 | * [Quandl: Finance and Economics Data](http://www.quandl.com/)
23 | * [Lending Club](https://www.lendingclub.com/info/download-data.action)
24 | * [World Bank Data](http://data.worldbank.org/)
25 | * [Unicef Data](http://www.unicef.org/sowc09/statistics/tables.php)
26 | * [Yahoo Labs Data](http://webscope.sandbox.yahoo.com/catalog.php)
27 | * [World Health Organization](http://www.who.int/research/en/)
28 | * [Weather Underground Data API](http://www.wunderground.com/weather/api/?MR=1)
29 | * [Google Public Data Explorer](http://www.google.com/publicdata/directory)
30 | * [NASDAQ](https://data.nasdaq.com/)
31 | * [CBOE Futures Exchange](http://cfe.cboe.com/Data/)
32 | * [Enron Email Dataset](http://www.cs.cmu.edu/~enron/)
33 |
34 |
35 | ## Aggregate lists of data sets
36 | * [Kaggle Datasets](https://www.kaggle.com/datasets)
37 | * [Data Collaboratives](http://datacollaboratives.org/explorer.html)
38 | * [30 Places to Find Open Data on the Web, by Romy Misra of Visual.ly](http://blog.visual.ly/data-sources/)
39 | * [Interesting datasets and APIs, by Prof. James Bagrow](http://bagrow.com/dsv/datasets.html)
40 | * [Datasets for Data Mining and Data Science, by KDnugets](http://www.kdnuggets.com/datasets/index.html)
41 | * [Research-quality data sets, by Hilary Mason](http://bitly.com/bundles/hmason/1)
42 | * [Reddit list of interesting data sets](http://www.reddit.com/r/datasets/)
43 | * [Finding Data on the Internet by Revolution Analytics](http://www.inside-r.org/howto/finding-data-internet)
44 |
45 | ## Discussions on data sources
46 | * [What data people are searching for](http://static.googleusercontent.com/media/www.google.com/en/us/googleblogs/pdfs/google_public_data_march2010.pdf)
47 | * [Discussion for accessing finance data](http://stackoverflow.com/questions/10040954/alternative-to-google-finance-api)
48 | * [Documentation for Yahoo! Finance](http://www.gummy-stuff.org/Yahoo-data.htm)
49 | * [Open Data Discussion](http://opendata.stackexchange.com/questions?sort=votes)
50 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Outside NYU, the content is shared under a Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license. For more details see https://creativecommons.org/licenses/by-nc/4.0/
2 |
3 | Inside NYU, any usage of the material by instructors other than the authors is strictly prohibited, and any violators will be prosecuted to the fullest extent of law.
4 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | This repository contains notes for various classes and seminars that I teach at NYU. They are focused on teaching programming for data science to non-CS majors. The emphasis is on offering live examples that students can use directly to complete their goals.
2 |
3 | ## Accessing your Data Science Environment
4 |
5 | * [Accessing your Data Science Environment](https://docs.google.com/document/d/1A5Y53eqBRRlrVMV-yLrpA9-3xZ3jQmv9i6qhOU5gn44/edit?usp=sharing)
6 |
7 | We setup and deploy our data science environment (effectively, Jupyter with Python and R support, plus MySQL) using docker. As our default option, we allow students to connect to a JupyterHub server that runs on Kubernetes. We also give the option to students to run the same environment locally on their laptops, or deploy the Docker image on AWS or Google Cloud.
8 |
9 | ## Data Sets
10 |
11 | * [List of interesting data sets](DATA_SOURCES.md)
12 |
13 | ## Related Courses
14 |
15 | * [List of related courses](COURSES.md)
16 |
17 | ## License
18 |
19 | * See [LICENSE](LICENSE)
20 |
--------------------------------------------------------------------------------
/jupyterhub/Makefile:
--------------------------------------------------------------------------------
1 | SHELL = /bin/bash
2 | DOCKER_PATH=docker/
3 | IMG_VERSION?=latest
4 |
5 | .PHONY: build-image push-image build-single-user push-single-user check-env
6 |
7 | check-env:
8 | ifndef DOCKER_REGISTRY
9 | $(error DOCKER_REGISTRY not set)
10 | endif
11 |
12 | build-image: check-env
13 | docker build -f ${DOCKER_PATH}/${IMAGE}/Dockerfile ${ARG} -t $(DOCKER_REGISTRY):$(IMAGE)-$(IMG_VERSION) .
14 |
15 | push-image: check-env build-image
16 | docker push $(DOCKER_REGISTRY):$(IMAGE)-$(IMG_VERSION)
17 |
18 | build-single-user:
19 | make IMAGE="single-user" build-image
20 |
21 | push-single-user: build-single-user
22 | make IMAGE="single-user" push-image
23 |
--------------------------------------------------------------------------------
/jupyterhub/README.md:
--------------------------------------------------------------------------------
1 | This is the Class Tools infrastructure specification and management tools.
2 |
3 | - [Prerequisites](#prerequisites)
4 | - [Build the notebook image](#build-the-notebook-image)
5 | - [Setup JupyterHub on your Kubernetes cluster](#setup-jupyterhub-on-your-kubernetes-cluster)
6 | - [Chart configuration](#chart-configuration)
7 | - [Reference](#reference)
8 |
9 | ### Prerequisites
10 |
11 | Make sure you have installed
12 |
13 | * [Docker](https://www.docker.com/) >= *17.x.x*
14 | * [Google Cloud SDK](https://cloud.google.com/sdk/)
15 | * [kubectl](https://kubernetes.io/docs/user-guide/kubectl/)
16 |
17 | ### Build the notebook image
18 |
19 | Specify the following environment variables:
20 |
21 | * `DOCKER_REGISTRY` : the registry and repository to push the images, e.g. `me/my-docker-hub-repo`,
22 | * `IMG_VERSION` : the suffix to append to each image. Each tag will be in the form `name-IMG_VERSION`. Defaults to *latest*.
23 |
24 | To build and push the single-user Jupyter Notebook image to the docker repo specified above, run
25 |
26 | ```bash
27 | make push-single-user
28 | ```
29 |
30 | in the project's root directory.
31 |
32 | You can also run
33 |
34 | ```bash
35 | make build-single-user
36 | ```
37 |
38 | to just build the docker image locally.
39 |
40 |
41 | ### Setup JupyterHub on your Kubernetes cluster
42 |
43 | First, you need to install Helm. See [these instructions](https://github.com/kubernetes/helm/blob/master/docs/install.md)
44 | for details on how to do this.
45 |
46 | To initialize Helm, execute
47 | ```bash
48 | kubectl --namespace kube-system create sa tiller
49 | kubectl create clusterrolebinding tiller --clusterrole cluster-admin --serviceaccount=kube-system:tiller
50 | helm init --service-account tiller
51 | ```
52 |
53 | **IMPORTANT**: Only execute the second command if the kubernetes cluster you are deploying to is RBAC-enabled.
54 |
55 | Once the Helm initialization is done, install the JupyterHub helm repository to Helm, by running:
56 | ```bash
57 | helm repo add jupyterhub https://jupyterhub.github.io/helm-chart/
58 | helm repo update
59 | ```
60 |
61 | ### Chart configuration
62 |
63 | Run
64 |
65 | ```bash
66 | cp deployment/helm/config.yaml.example deployment/helm/config.yaml
67 | ```
68 |
69 | and replace the placeholders inside the `config.yaml` file with their desirable values.
70 |
71 | * `proxy.secretToken`: Quoting from [[1]](#reference):
72 | > A 64-byte cryptographically secure randomly generated string used to secure communications between the hub and the configurable-http-proxy.
73 | >
74 | > This must be generated with `openssl rand -hex 32`.
75 | >
76 | > Changing this value will cause the proxy and hub pods to restart. It is good security practice to rotate these values over time. If this secret leaks, immediately change it to something else, or user data can be compromised
77 |
78 | * `singleuser.image`: The docker image you built during the first phase of the setup process.
79 | * `singleuser.image.name`: The repository in which the image is hosted.
80 | * `singleuser.image.tag`: The tag of the target notebook image.
81 |
82 | For more configuration options, see [[1]](#reference).
83 |
84 | Once you have setup the `config.yaml` file, run
85 | ```bash
86 | helm install jupyterhub/jupyterhub --version=v0.5 \
87 | --name=RELEASE-NAME --namespace=NAMESPACE-NAME \
88 | -f path/to/config.yaml [--set=rbac.enabled=false]
89 | ```
90 |
91 | where:
92 |
93 | * `--name` is a deployment identifier used by helm
94 | * `--namespace` is the name of the namespace in which JupyterHub will be deployed. If it does not exist, it will
95 | be created for you.
96 |
97 | **NOTE**: If the cluster you are deploying to is not RBAC-enabled, then you need to also use the `--set` flag
98 | in the above command.
99 |
100 | After the above command executes, check the status of the deployment by running
101 | ```bash
102 | kubectl get pods --namespace NAMESPACE-NAME
103 | ```
104 |
105 | When both the proxy and the hub pods have a status of 'Running', you are good to go.
106 |
107 | **NOTE**: You will also have to allow TCP traffic to the hub proxy's port on your cloud provider's firewall. Run
108 | ```bash
109 | kubectl get services --namespace NAMESPACE-NAME
110 | ```
111 |
112 | and look for the `proxy-public` service. Allow TCP traffic to the port which targets port 80 of the proxy. For instance,
113 | say the output of the `get services` command were:
114 | ```
115 | hub ClusterIP . . . 8081/TCP
116 | proxy-api ClusterIP . . . 8001/TCP
117 | proxy-http ClusterIP . . . 8000/TCP
118 | proxy-public LoadBalancer . . . 80:31870/TCP,443:31182/TCP
119 | ```
120 |
121 | We would have to allow traffic to `tcp:31870` on our firewall in order to be able to access the proxy.
122 |
123 | ### Reference
124 |
125 | [1] [Helm Chart Configuration](https://zero-to-jupyterhub.readthedocs.io/en/latest/reference.html#id1)
126 |
--------------------------------------------------------------------------------
/jupyterhub/deployment/helm/config.yaml.example:
--------------------------------------------------------------------------------
1 | proxy:
2 | secretToken: "YOUR_TOKEN_HERE"
3 | singleuser:
4 | image:
5 | name: HUB_REPOSITORY
6 | tag: IMAGE_TAG
7 |
--------------------------------------------------------------------------------
/jupyterhub/docker/single-user/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:latest
2 |
3 | ENV LANGUAGE en_US.UTF-8
4 | ENV LANG en_US.UTF-8
5 | ENV LC_ALL en_US.UTF-8
6 | ENV PYTHONIOENCODING UTF-8
7 | ENV NB_USER ubuntu
8 |
9 | RUN useradd -ms /bin/bash ubuntu
10 |
11 | RUN apt-get -y update && \
12 | apt-get -y dist-upgrade && \
13 | apt-get -y upgrade && \
14 | apt-get -y install \
15 | sudo \
16 | build-essential \
17 | python3-dev \
18 | python3-pip \
19 | ca-certificates \
20 | curl \
21 | git \
22 | gfortran \
23 | libblas-dev \
24 | liblapack-dev \
25 | libssl-dev \
26 | libffi-dev \
27 | libcurl4-openssl-dev \
28 | libgdal-dev \
29 | wget \
30 | jq \
31 | language-pack-en \
32 | libcurl4-openssl-dev \
33 | libffi-dev \
34 | libzmq3-dev \
35 | libxml2-dev \
36 | libxslt-dev \
37 | python3-lxml \
38 | zlib1g-dev \
39 | python3-mysqldb && \
40 | apt-get clean && \
41 | rm -rf /var/lib/apt/lists/*
42 |
43 | # install latest version of pip
44 | RUN pip3 install -U pip
45 |
46 | # TODO: Move the Python libraries to a requirements.txt file?
47 |
48 | # install basic Python libraries to run Jupyter
49 | RUN pip3 install -U \
50 | notebook==5.2.* \
51 | jupyterhub==0.8.* \
52 | ipython
53 |
54 | # add libraries used in intro to python exercise
55 | RUN pip3 install -U jellyfish \
56 | ngram
57 |
58 | # add standard data science libraries
59 | RUN pip3 install -U \
60 | numpy \
61 | scipy \
62 | matplotlib \
63 | pandas \
64 | statsmodels \
65 | scikit-learn
66 |
67 | # add libraries for teaching web APIs
68 | RUN pip3 install -U \
69 | requests \
70 | requests_oauthlib \
71 | Flask \
72 | slackclient
73 |
74 | # add libraries for NLP
75 | RUN pip3 install -U \
76 | spacy \
77 | nltk \
78 | gensim
79 |
80 | # add libraries for visualization/mapping
81 | RUN pip3 install -U \
82 | seaborn \
83 | bokeh \
84 | folium \
85 | geopandas \
86 | geopy
87 |
88 | # add libraries for finance
89 | RUN pip3 install -U \
90 | googlefinance \
91 | yahoo-finance \
92 | quandl
93 |
94 | # misc libraries
95 | RUN pip3 install -U \
96 | boto \
97 | boto3 \
98 | elasticsearch \
99 | networkx \
100 | py2neo \
101 | pymongo \
102 | selenium \
103 | tweepy
104 |
105 | ARG FILE_PATH
106 | # Add a notebook profile.
107 | COPY $FILE_PATH/jupyter_notebook_config.py /etc/jupyter/
108 | RUN echo "c.NotebookApp.notebook_dir = '/notebooks'" >> /etc/jupyter/jupyter_notebook_config.py
109 | RUN echo "c.NotebookApp.allow_root = True" >> /etc/jupyter/jupyter_notebook_config.py
110 | RUN echo "$NB_USER ALL=NOPASSWD: ALL" >> /etc/sudoers
111 |
112 | WORKDIR /notebooks
113 | RUN ["git", "clone", "--verbose", "https://github.com/ipeirotis/dealing_with_data.git", "/notebooks"]
114 | # VOLUME /notebooks
115 |
116 | WORKDIR /data
117 | RUN ["git", "clone", "--verbose", "https://github.com/ipeirotis/data.git", "/data"]
118 | # VOLUME /data
119 |
120 | RUN pip3 install ipython-sql sql_magic mysqlclient
121 |
122 | EXPOSE 8888
123 | LABEL org.jupyter.service="jupyter"
124 | RUN chmod -R 777 /notebooks
125 | RUN chmod -R 777 /data
126 |
127 | CMD ["start-notebook.sh"]
128 |
129 | # Add local files as late as possible to avoid cache busting
130 | COPY $FILE_PATH/start-notebook.sh /usr/local/bin/
131 |
132 | USER $NB_USER
133 |
--------------------------------------------------------------------------------
/jupyterhub/docker/single-user/jupyter_notebook_config.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Jupyter Development Team.
2 | # Distributed under the terms of the Modified BSD License.
3 |
4 | from jupyter_core.paths import jupyter_data_dir
5 | import subprocess
6 | import os
7 | import errno
8 | import stat
9 |
10 | c = get_config()
11 | c.NotebookApp.ip = '*'
12 | c.NotebookApp.port = 8888
13 | c.NotebookApp.open_browser = False
14 |
15 | # Generate a self-signed certificate
16 | if 'GEN_CERT' in os.environ:
17 | dir_name = jupyter_data_dir()
18 | pem_file = os.path.join(dir_name, 'notebook.pem')
19 | try:
20 | os.makedirs(dir_name)
21 | except OSError as exc: # Python >2.5
22 | if exc.errno == errno.EEXIST and os.path.isdir(dir_name):
23 | pass
24 | else:
25 | raise
26 | # Generate a certificate if one doesn't exist on disk
27 | subprocess.check_call(['openssl', 'req', '-new',
28 | '-newkey', 'rsa:2048',
29 | '-days', '365',
30 | '-nodes', '-x509',
31 | '-subj', '/C=XX/ST=XX/L=XX/O=generated/CN=generated',
32 | '-keyout', pem_file,
33 | '-out', pem_file])
34 | # Restrict access to the file
35 | os.chmod(pem_file, stat.S_IRUSR | stat.S_IWUSR)
36 | c.NotebookApp.certfile = pem_file
37 |
--------------------------------------------------------------------------------
/jupyterhub/docker/single-user/start-notebook.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright (c) Jupyter Development Team.
3 | # Distributed under the terms of the Modified BSD License.
4 |
5 | set -e
6 |
7 | if [[ ! -z "${JUPYTERHUB_API_TOKEN}" ]]; then
8 | # launched by JupyterHub, use single-user entrypoint
9 | exec jupyterhub-singleuser $*
10 | else
11 | exec jupyter notebook $*
12 | fi
13 |
--------------------------------------------------------------------------------
/start_jupyter.sh:
--------------------------------------------------------------------------------
1 | export PATH=$PATH:/usr/local/bin/geckodriver
2 | jupyter notebook --NotebookApp.iopub_data_rate_limit=1.0e10 --notebook-dir=/home/ubuntu/jupyter > /tmp/jupyter.out 2>&1 &
3 |
4 |
--------------------------------------------------------------------------------
/stop_jupyter.sh:
--------------------------------------------------------------------------------
1 | kill $(pgrep jupyter)
2 |
--------------------------------------------------------------------------------
/sync_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | sudo rm -rf /data
4 | sudo mkdir -p /data
5 | sudo git clone https://github.com/ipeirotis/data.git /data
6 |
7 | rm /home/ubuntu/sync_data.sh
8 | ln -s /home/ubuntu/jupyter/NYU_Notes/sync_data.sh /home/ubuntu/sync_data.sh
9 |
--------------------------------------------------------------------------------
/sync_notebooks.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | echo "############################################################"
4 | echo "This will replace the content of NYU_Notes"
5 | echo "the most recent content from the Github repository"
6 | echo ""
7 | echo "The existing NYU_Notes folder will be renamed"
8 | echo "NYU_Notes_"$(date '+%Y-%b-%d_%H%M')
9 | echo "and preserved until you delete it. "
10 | echo "############################################################"
11 | echo ""
12 | read -p "Are you sure that you want to proceed? (Y/N) " -n 1 -r
13 | echo
14 | if [[ $REPLY =~ ^[Yy]$ ]]
15 | then
16 | cd /home/ubuntu/jupyter
17 | mv NYU_Notes NYU_Notes_$(date '+%Y-%b-%d_%H%M')
18 | git clone https://github.com/ipeirotis/dealing_with_data.git NYU_Notes
19 | cd
20 | fi
21 |
22 | rm /home/ubuntu/sync_notebooks.sh
23 | ln -s /home/ubuntu/jupyter/NYU_Notes/sync_notebooks.sh /home/ubuntu/sync_notebooks.sh
24 |
--------------------------------------------------------------------------------
/test_notebooks.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import os\n",
10 | "import subprocess\n",
11 | "import tempfile\n",
12 | "\n",
13 | "import nbformat\n",
14 | "\n",
15 | "def notebook_run(path):\n",
16 | " \"\"\"Execute a notebook via nbconvert and collect output.\n",
17 | " :returns (parsed nb object, execution errors)\n",
18 | " \"\"\"\n",
19 | "\n",
20 | " with tempfile.NamedTemporaryFile(suffix=\".ipynb\") as fout:\n",
21 | " args = [\"jupyter\", \"nbconvert\", \"--to\", \"notebook\", \"--execute\",\n",
22 | " \"--ExecutePreprocessor.timeout=60\",\n",
23 | " \"--output\", fout.name, path]\n",
24 | " subprocess.check_call(args)\n",
25 | "\n",
26 | " fout.seek(0)\n",
27 | " nb = nbformat.read(fout.name, nbformat.current_nbformat)\n",
28 | "\n",
29 | " errors = [output for cell in nb.cells if \"outputs\" in cell\n",
30 | " for output in cell[\"outputs\"]\\\n",
31 | " if output.output_type == \"error\"]\n",
32 | "\n",
33 | " return nb, errors"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 2,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "from os import listdir\n",
43 | "from os.path import isfile, join, isdir\n",
44 | "\n",
45 | "def test_notebooks(path):\n",
46 | " \n",
47 | " notebooks = [f for f in listdir(path) if isfile(join(path, f)) and f.endswith('.ipynb')]\n",
48 | " directories = [f for f in listdir(path) if isdir(join(path, f))]\n",
49 | " \n",
50 | " for notebook in sorted(notebooks):\n",
51 | " print(notebook)\n",
52 | " nb, errors = notebook_run(join(path, notebook))\n",
53 | " assert errors == []\n",
54 | " \n",
55 | " for directory in sorted(directories):\n",
56 | " print(directory)"
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": 3,
62 | "metadata": {},
63 | "outputs": [
64 | {
65 | "name": "stdout",
66 | "output_type": "stream",
67 | "text": [
68 | "A-Introduction_to_iPython_Notebooks.ipynb\n"
69 | ]
70 | },
71 | {
72 | "ename": "CalledProcessError",
73 | "evalue": "Command '['jupyter', 'nbconvert', '--to', 'notebook', '--execute', '--ExecutePreprocessor.timeout=60', '--output', '/tmp/tmpiy8tr2a2.ipynb', '01-Introduction_to_Python/A-Introduction_to_iPython_Notebooks.ipynb']' returned non-zero exit status 1.",
74 | "output_type": "error",
75 | "traceback": [
76 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
77 | "\u001b[0;31mCalledProcessError\u001b[0m Traceback (most recent call last)",
78 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtest_notebooks\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'01-Introduction_to_Python'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
79 | "\u001b[0;32m\u001b[0m in \u001b[0;36mtest_notebooks\u001b[0;34m(path)\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mnotebook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msorted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnotebooks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnotebook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0mnb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnotebook_run\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnotebook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 12\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0merrors\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
80 | "\u001b[0;32m\u001b[0m in \u001b[0;36mnotebook_run\u001b[0;34m(path)\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;34m\"--ExecutePreprocessor.timeout=60\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \"--output\", fout.name, path]\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0msubprocess\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0mfout\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseek\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
81 | "\u001b[0;32m/usr/lib/python3.6/subprocess.py\u001b[0m in \u001b[0;36mcheck_call\u001b[0;34m(*popenargs, **kwargs)\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcmd\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 290\u001b[0m \u001b[0mcmd\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpopenargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 291\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mCalledProcessError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mretcode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcmd\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 292\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
82 | "\u001b[0;31mCalledProcessError\u001b[0m: Command '['jupyter', 'nbconvert', '--to', 'notebook', '--execute', '--ExecutePreprocessor.timeout=60', '--output', '/tmp/tmpiy8tr2a2.ipynb', '01-Introduction_to_Python/A-Introduction_to_iPython_Notebooks.ipynb']' returned non-zero exit status 1."
83 | ]
84 | }
85 | ],
86 | "source": [
87 | "test_notebooks('01-Introduction_to_Python')"
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "execution_count": null,
93 | "metadata": {},
94 | "outputs": [
95 | {
96 | "name": "stdout",
97 | "output_type": "stream",
98 | "text": [
99 | "D-MySQL_and_Python.ipynb\n"
100 | ]
101 | }
102 | ],
103 | "source": [
104 | "test_notebooks('02-SQL')"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": null,
110 | "metadata": {},
111 | "outputs": [],
112 | "source": []
113 | }
114 | ],
115 | "metadata": {
116 | "kernelspec": {
117 | "display_name": "Python 3",
118 | "language": "python",
119 | "name": "python3"
120 | },
121 | "language_info": {
122 | "codemirror_mode": {
123 | "name": "ipython",
124 | "version": 3
125 | },
126 | "file_extension": ".py",
127 | "mimetype": "text/x-python",
128 | "name": "python",
129 | "nbconvert_exporter": "python",
130 | "pygments_lexer": "ipython3",
131 | "version": "3.6.6"
132 | }
133 | },
134 | "nbformat": 4,
135 | "nbformat_minor": 2
136 | }
137 |
--------------------------------------------------------------------------------
/upgrade_linux.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | sudo apt-get -y update
3 | sudo apt-get -y dist-upgrade
4 | sudo apt-get -y upgrade
5 | sudo apt-get -y autoremove
6 |
--------------------------------------------------------------------------------
/upgrade_python.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | sudo -H pip freeze --local | grep -v '^\-e' | cut -d = -f 1 | tee >(xargs -n1 sudo -H python3 -m pip install -U) | grep -v "Requirement"
3 |
--------------------------------------------------------------------------------