├── .DS_Store
├── LICENSE
├── README.md
├── app
├── Dockerfile
├── code
│ ├── .ipynb_checkpoints
│ │ └── extract-checkpoint.ipynb
│ └── extract.ipynb
└── requirements.txt
└── docker-compose.yml
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrisgschon/docker-for-ds/b34fcb9e70e7b94cd809329e7a28d3da26ac908d/.DS_Store
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Christopher Schon
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 👨🏼💻 docker-for-ds
2 | A practical introduction to Docker for data scientists, showing how to use and combine Jupyter and MSSQL Server in local isolated Docker containers.
3 |
4 | The medium post [here](https://medium.com/applied-data-science/the-full-stack-data-scientist-part-2-a-practical-introduction-to-docker-1ea932c89b57) for the longer tutorial.
5 |
6 |
7 | ## 🐳 Starting with Docker
8 |
9 | - Install Docker. This is a straightforward download from the [docs](https://docs.docker.com/install/).
10 |
11 | - Open up the app and make sure there's a green light telling you it's running.
12 |
13 | - Follow the [hello-world tutorial](https://docs.docker.com/samples/library/hello-world/) to double check the installation works properly.
14 |
15 | ## 🆙 Walkthrough
16 |
17 | We will be running two separate Docker containers:
18 |
19 | 1. An ubuntu xenial container to run python code in Jupyter notebooks
20 |
21 | 2. An MSSSQL Server linux container to host our database
22 |
23 | Open a terminal in the root of the repo and run the following commands:
24 |
25 | ```docker-compose build```
26 |
27 | *Builds the images with docker-compose.yml acting as the configuration*
28 |
29 | ```docker-compose up -d```
30 |
31 | *Spins up the containers*
32 |
33 | ```docker-compose exec app bash```
34 |
35 | *Runs bash from inside the 'app' container*
36 |
37 | You will be inside the container's terminal now, run:
38 |
39 | ```jupyter notebook --ip 0.0.0.0 --no-browser --allow-root```
40 |
41 | Using the provided token, enter this into your browser
42 |
43 | ```http://localhost:8888/?token=URTOKEN```
44 |
45 |
46 | ✅ The token provides a security measure to make sure hackers can't access your code and data!
47 |
48 |
49 | 🎮 Running the notebook will extract Ninja's current Fortnite stats and load them into a fortnite table in your MSSQL instance. View your output table in a database management app. I like to use DBeaver.
50 |
51 | ## How to only build the Jupyter environment
52 |
53 | Open a terminal and navigate to /app, then run
54 |
55 | ```docker build -t ds . ```
56 |
57 | *builds image with tag 'ds'*
58 |
59 | ```docker run --rm -it -p 8888:8888 ds```
60 |
61 | *runs the ds image with notebook ports mapped*
--------------------------------------------------------------------------------
/app/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:xenial-20180417
2 |
3 | #create the environment
4 | RUN apt-get update
5 | RUN apt-get -y install python3-pip
6 | RUN apt-get -y install htop
7 |
8 | #add in ms sql dependencies
9 | RUN apt-get update
10 | RUN apt-get -y install apt-transport-https
11 | RUN apt-get -y install curl
12 | RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add -
13 | RUN curl https://packages.microsoft.com/config/ubuntu/16.04/prod.list > /etc/apt/sources.list.d/mssql-release.list
14 | RUN apt-get update
15 | RUN ACCEPT_EULA=Y apt-get -y install msodbcsql17
16 | RUN apt-get -y install unixodbc-dev
17 |
18 | #make directory for code
19 | RUN mkdir -p /code
20 |
21 | # install requirements first
22 | RUN pip3 install --upgrade pip
23 | RUN pip3 install --upgrade setuptools
24 | COPY ./requirements.txt /code/requirements.txt
25 | RUN pip3 install -r /code/requirements.txt
26 |
27 | # Copy code (mounted in development)
28 | COPY ./ /code/
29 |
30 | WORKDIR /code
31 |
32 |
--------------------------------------------------------------------------------
/app/code/.ipynb_checkpoints/extract-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 14,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import requests\n",
10 | "import pandas as pd\n",
11 | "import pyodbc\n",
12 | "import os\n",
13 | "from sqlalchemy import create_engine"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": 15,
19 | "metadata": {},
20 | "outputs": [],
21 | "source": [
22 | "apiURL = \"https://fortnite-public-api.theapinetwork.com/prod09/users/public/br_stats_v2?user_id=4735ce9132924caf8a5b17789b40f79c\""
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 16,
28 | "metadata": {},
29 | "outputs": [],
30 | "source": [
31 | "resp = requests.get(apiURL)"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": 17,
37 | "metadata": {},
38 | "outputs": [],
39 | "source": [
40 | "raw = resp.json()"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": 18,
46 | "metadata": {},
47 | "outputs": [],
48 | "source": [
49 | "df = pd.DataFrame(raw['overallData']['defaultModes']).drop('includedPlaylists', axis = 1).drop_duplicates()"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 19,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": [
58 | "df['recordCollectDate'] = pd.to_datetime('today')\n",
59 | "df['accountId'] = raw['accountId']\n",
60 | "df['epicName'] = raw['epicName']"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 20,
66 | "metadata": {},
67 | "outputs": [
68 | {
69 | "data": {
70 | "text/html": [
71 | "
\n",
72 | "\n",
85 | "
\n",
86 | " \n",
87 | " \n",
88 | " | \n",
89 | " kills | \n",
90 | " matchesplayed | \n",
91 | " placetop1 | \n",
92 | " playersoutlived | \n",
93 | " score | \n",
94 | " recordCollectDate | \n",
95 | " accountId | \n",
96 | " epicName | \n",
97 | "
\n",
98 | " \n",
99 | " \n",
100 | " \n",
101 | " 0 | \n",
102 | " 101676 | \n",
103 | " 15177 | \n",
104 | " 5379 | \n",
105 | " 108961 | \n",
106 | " 670232 | \n",
107 | " 2019-05-07 16:12:46.254249 | \n",
108 | " 4735ce9132924caf8a5b17789b40f79c | \n",
109 | " Ninja | \n",
110 | "
\n",
111 | " \n",
112 | "
\n",
113 | "
"
114 | ],
115 | "text/plain": [
116 | " kills matchesplayed placetop1 playersoutlived score \\\n",
117 | "0 101676 15177 5379 108961 670232 \n",
118 | "\n",
119 | " recordCollectDate accountId epicName \n",
120 | "0 2019-05-07 16:12:46.254249 4735ce9132924caf8a5b17789b40f79c Ninja "
121 | ]
122 | },
123 | "execution_count": 20,
124 | "metadata": {},
125 | "output_type": "execute_result"
126 | }
127 | ],
128 | "source": [
129 | "df.head()"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": 21,
135 | "metadata": {},
136 | "outputs": [],
137 | "source": [
138 | "con_str = 'mssql+pyodbc://' + os.environ['DB_USER'] + ':' + os.environ['DB_PWD'] + '@' + os.environ['DB_SERVER']+':' + os.environ['DB_PORT'] + '/' + os.environ['DB_NAME'] + '?driver=' + '+'.join(os.environ['DB_DRIVER'].split(' '))"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": 22,
144 | "metadata": {},
145 | "outputs": [
146 | {
147 | "data": {
148 | "text/plain": [
149 | "'mssql+pyodbc://sa:blogPWD123!@host.docker.internal:1433/master?driver=ODBC+Driver+17+for+SQL+Server'"
150 | ]
151 | },
152 | "execution_count": 22,
153 | "metadata": {},
154 | "output_type": "execute_result"
155 | }
156 | ],
157 | "source": [
158 | "con_str"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": 23,
164 | "metadata": {},
165 | "outputs": [],
166 | "source": [
167 | "con = create_engine(con_str)"
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": 24,
173 | "metadata": {},
174 | "outputs": [],
175 | "source": [
176 | "df.to_sql('fortnite', con, if_exists='append', index = False)"
177 | ]
178 | }
179 | ],
180 | "metadata": {
181 | "kernelspec": {
182 | "display_name": "Python 3",
183 | "language": "python",
184 | "name": "python3"
185 | },
186 | "language_info": {
187 | "codemirror_mode": {
188 | "name": "ipython",
189 | "version": 3
190 | },
191 | "file_extension": ".py",
192 | "mimetype": "text/x-python",
193 | "name": "python",
194 | "nbconvert_exporter": "python",
195 | "pygments_lexer": "ipython3",
196 | "version": "3.5.2"
197 | }
198 | },
199 | "nbformat": 4,
200 | "nbformat_minor": 2
201 | }
202 |
--------------------------------------------------------------------------------
/app/code/extract.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import requests\n",
10 | "import pandas as pd\n",
11 | "import pyodbc\n",
12 | "import os\n",
13 | "from sqlalchemy import create_engine"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": 2,
19 | "metadata": {},
20 | "outputs": [],
21 | "source": [
22 | "apiURL = \"https://fortnite-public-api.theapinetwork.com/prod09/users/public/br_stats_v2?user_id=4735ce9132924caf8a5b17789b40f79c\""
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 3,
28 | "metadata": {},
29 | "outputs": [],
30 | "source": [
31 | "resp = requests.get(apiURL)"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": 4,
37 | "metadata": {},
38 | "outputs": [],
39 | "source": [
40 | "raw = resp.json()"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": 5,
46 | "metadata": {},
47 | "outputs": [],
48 | "source": [
49 | "df = pd.DataFrame(raw['overallData']['defaultModes']).drop('includedPlaylists', axis = 1).drop_duplicates()"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 6,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": [
58 | "df['recordCollectDate'] = pd.to_datetime('today')\n",
59 | "df['accountId'] = raw['accountId']\n",
60 | "df['epicName'] = raw['epicName']"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 7,
66 | "metadata": {},
67 | "outputs": [
68 | {
69 | "data": {
70 | "text/html": [
71 | "\n",
72 | "\n",
85 | "
\n",
86 | " \n",
87 | " \n",
88 | " | \n",
89 | " kills | \n",
90 | " matchesplayed | \n",
91 | " placetop1 | \n",
92 | " playersoutlived | \n",
93 | " score | \n",
94 | " recordCollectDate | \n",
95 | " accountId | \n",
96 | " epicName | \n",
97 | "
\n",
98 | " \n",
99 | " \n",
100 | " \n",
101 | " 0 | \n",
102 | " 102262 | \n",
103 | " 15255 | \n",
104 | " 5418 | \n",
105 | " 114814 | \n",
106 | " 708835 | \n",
107 | " 2019-05-10 13:50:26.567529 | \n",
108 | " 4735ce9132924caf8a5b17789b40f79c | \n",
109 | " Ninja | \n",
110 | "
\n",
111 | " \n",
112 | "
\n",
113 | "
"
114 | ],
115 | "text/plain": [
116 | " kills matchesplayed placetop1 playersoutlived score \\\n",
117 | "0 102262 15255 5418 114814 708835 \n",
118 | "\n",
119 | " recordCollectDate accountId epicName \n",
120 | "0 2019-05-10 13:50:26.567529 4735ce9132924caf8a5b17789b40f79c Ninja "
121 | ]
122 | },
123 | "execution_count": 7,
124 | "metadata": {},
125 | "output_type": "execute_result"
126 | }
127 | ],
128 | "source": [
129 | "df.head()"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": 8,
135 | "metadata": {},
136 | "outputs": [],
137 | "source": [
138 | "con_str = 'mssql+pyodbc://' + os.environ['DB_USER'] + ':' + os.environ['DB_PWD'] + '@' + os.environ['DB_SERVER']+':' + os.environ['DB_PORT'] + '/' + os.environ['DB_NAME'] + '?driver=' + '+'.join(os.environ['DB_DRIVER'].split(' '))"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": 9,
144 | "metadata": {},
145 | "outputs": [
146 | {
147 | "data": {
148 | "text/plain": [
149 | "'mssql+pyodbc://sa:blogPWD123!@host.docker.internal:1433/master?driver=ODBC+Driver+17+for+SQL+Server'"
150 | ]
151 | },
152 | "execution_count": 9,
153 | "metadata": {},
154 | "output_type": "execute_result"
155 | }
156 | ],
157 | "source": [
158 | "con_str"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": 10,
164 | "metadata": {},
165 | "outputs": [],
166 | "source": [
167 | "con = create_engine(con_str)"
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": 11,
173 | "metadata": {},
174 | "outputs": [],
175 | "source": [
176 | "df.to_sql('fortnite', con, if_exists='append', index = False)"
177 | ]
178 | }
179 | ],
180 | "metadata": {
181 | "kernelspec": {
182 | "display_name": "Python 3",
183 | "language": "python",
184 | "name": "python3"
185 | },
186 | "language_info": {
187 | "codemirror_mode": {
188 | "name": "ipython",
189 | "version": 3
190 | },
191 | "file_extension": ".py",
192 | "mimetype": "text/x-python",
193 | "name": "python",
194 | "nbconvert_exporter": "python",
195 | "pygments_lexer": "ipython3",
196 | "version": "3.5.2"
197 | }
198 | },
199 | "nbformat": 4,
200 | "nbformat_minor": 2
201 | }
202 |
--------------------------------------------------------------------------------
/app/requirements.txt:
--------------------------------------------------------------------------------
1 | ipykernel==4.9.0
2 | ipython==6.5.0
3 | jupyter==1.0.0
4 | pandas==0.24.2
5 | requests==2.21.0
6 | matplotlib==2.2.3
7 | numpy==1.15.4
8 | scikit-learn==0.20.1
9 | scipy==1.1.0
10 | urllib3==1.24.2
11 | pyodbc==4.0.19
12 | sqlalchemy==1.3.3
13 |
14 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 | services:
3 | app:
4 | build:
5 | context: ./app
6 | dockerfile: Dockerfile
7 | volumes:
8 | - "./app/code/:/code/"
9 | ports:
10 | - 8888:8888
11 | depends_on:
12 | - db
13 | tty: true
14 | environment:
15 | DB_ENGINE: sql_server.pyodbc
16 | DB_NAME: master
17 | DB_SERVER: host.docker.internal
18 | DB_PORT: 1433
19 | DB_DRIVER: "ODBC Driver 17 for SQL Server"
20 | DB_USER: sa
21 | DB_PWD: blogPWD123!
22 |
23 | command: /bin/bash
24 |
25 | db:
26 | image: microsoft/mssql-server-linux:2017-CU9
27 | ports:
28 | - 1433:1433
29 | environment:
30 | SA_PASSWORD: blogPWD123!
31 | ACCEPT_EULA: Y
32 |
--------------------------------------------------------------------------------