├── data_go_th_api_key.ipynb
├── read_tis-620 file.ipynb
├── bangkok open data.ipynb
├── data_go_th_api.ipynb
├── README.md
└── pandas_transform_google_form_data2.ipynb
/data_go_th_api_key.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# data.go.th: Data API\n",
8 | "\n",
9 | "web: https://opendata.data.go.th/\n"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "---\n",
17 | "* author: [Prasert Kanawattanachai](prasert.k@chula.ac.th)\n",
18 | "* YouTube: https://www.youtube.com/prasertcbs\n",
19 | "* github: https://github.com/prasertcbs/\n",
20 | "* kaggle: https://www.kaggle.com/prasertk/\n",
21 | "* [Chulalongkorn Business School](https://www.cbs.chula.ac.th/en/)\n",
22 | "---\n"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "
Play YouTube Video
"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 1,
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "import pandas as pd\n",
39 | "import requests\n"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 2,
45 | "metadata": {
46 | "colab": {
47 | "base_uri": "https://localhost:8080/",
48 | "height": 64
49 | },
50 | "colab_type": "code",
51 | "id": "fdr0pYIf7P-_",
52 | "outputId": "c919deae-c99b-44b0-8924-4d2355ca0b63"
53 | },
54 | "outputs": [
55 | {
56 | "name": "stdout",
57 | "output_type": "stream",
58 | "text": [
59 | "pandas version: 1.4.2\n",
60 | "requests version: 2.28.0\n"
61 | ]
62 | },
63 | {
64 | "data": {
65 | "text/plain": [
66 | "Timestamp('2022-07-12 19:44:50.631955')"
67 | ]
68 | },
69 | "execution_count": 2,
70 | "metadata": {},
71 | "output_type": "execute_result"
72 | }
73 | ],
74 | "source": [
75 | "print(f\"pandas version: {pd.__version__}\")\n",
76 | "print(f\"requests version: {requests.__version__}\")\n",
77 | "\n",
78 | "pd.Timestamp.now()\n",
79 | "# pd.Timestamp.now().strftime('%Y-%m-%d')\n"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "## Data API\n"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 3,
92 | "metadata": {},
93 | "outputs": [],
94 | "source": [
95 | "from getpass import getpass\n",
96 | "api_key = getpass()"
97 | ]
98 | },
99 | {
100 | "cell_type": "markdown",
101 | "metadata": {},
102 | "source": [
103 | "- ข้อมูลโรงแรมที่เป็นมิตรกับสิ่งแวดล้อม (Green Hotel)\n",
104 | " * https://opendata.data.go.th/dataset/db0103-002\n",
105 | " * https://opend.data.go.th/get-ckan/datastore_search?resource_id=d13e3be5-9020-4a78-9a7e-760b2dcbf6be&limit=5\n",
106 | "- จำนวนคนทำงานในที่พักแรม\n",
107 | " * https://opendata.data.go.th/dataset/os_17_00010"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": 4,
113 | "metadata": {},
114 | "outputs": [
115 | {
116 | "data": {
117 | "text/html": [
118 | "
\n",
119 | "\n",
132 | "
\n",
133 | " \n",
134 | " \n",
135 | " | \n",
136 | " _id | \n",
137 | " No | \n",
138 | " Agency | \n",
139 | " Project | \n",
140 | " Budget Year | \n",
141 | " Award | \n",
142 | " Tumbol | \n",
143 | " Amphur | \n",
144 | " Province | \n",
145 | " Postcode | \n",
146 | " Telephone | \n",
147 | " Start_Date | \n",
148 | " End_Date | \n",
149 | "
\n",
150 | " \n",
151 | " \n",
152 | " \n",
153 | " | 0 | \n",
154 | " 1 | \n",
155 | " 1 | \n",
156 | " โรงแรมดุสิตธานี พัทยา ชลบุรี | \n",
157 | " Green Hotel | \n",
158 | " 2556 | \n",
159 | " ระดับดีเยี่ยม (ทอง) | \n",
160 | " นาเกลือ | \n",
161 | " บางละมุง | \n",
162 | " ชลบุรี | \n",
163 | " 20150 | \n",
164 | " 038 425 611 | \n",
165 | " 2556 | \n",
166 | " 2558 | \n",
167 | "
\n",
168 | " \n",
169 | " | 1 | \n",
170 | " 2 | \n",
171 | " 2 | \n",
172 | " โรงแรมสยามเบย์ ชอว์ พัทยา ชลบุรี | \n",
173 | " Green Hotel | \n",
174 | " 2556 | \n",
175 | " ระดับดีเยี่ยม (ทอง) | \n",
176 | " | \n",
177 | " บางละมุง | \n",
178 | " ชลบุรี | \n",
179 | " 20150 | \n",
180 | " 038 428 678 | \n",
181 | " 2556 | \n",
182 | " 2558 | \n",
183 | "
\n",
184 | " \n",
185 | " | 2 | \n",
186 | " 3 | \n",
187 | " 3 | \n",
188 | " โรงแรมไทยการ์เด้นรีสอร์ท พัทยา ชลบุรี | \n",
189 | " Green Hotel | \n",
190 | " 2556 | \n",
191 | " ระดับดีเยี่ยม (ทอง) | \n",
192 | " หนองปรือ | \n",
193 | " บางละมุง | \n",
194 | " ชลบุรี | \n",
195 | " 20150 | \n",
196 | " 038 370 614 | \n",
197 | " 2556 | \n",
198 | " 2558 | \n",
199 | "
\n",
200 | " \n",
201 | " | 3 | \n",
202 | " 4 | \n",
203 | " 4 | \n",
204 | " โรงแรมลองบีช รีสอร์ท พัทยา ชลบุรี | \n",
205 | " Green Hotel | \n",
206 | " 2556 | \n",
207 | " ระดับดีเยี่ยม (ทอง) | \n",
208 | " | \n",
209 | " บางละมุง | \n",
210 | " ชลบุรี | \n",
211 | " 20150 | \n",
212 | " 038-414-616-26 | \n",
213 | " 2556 | \n",
214 | " 2558 | \n",
215 | "
\n",
216 | " \n",
217 | " | 4 | \n",
218 | " 5 | \n",
219 | " 5 | \n",
220 | " โรงแรมบ้านอัมพวารีสอร์ทแอนด์สปา สมุทรสงคราม | \n",
221 | " Green Hotel | \n",
222 | " 2556 | \n",
223 | " ระดับดีเยี่ยม (ทอง) | \n",
224 | " อัมพวา | \n",
225 | " อัมพวา | \n",
226 | " สมุทรสงคราม | \n",
227 | " 75110 | \n",
228 | " 034 752 222 | \n",
229 | " 2556 | \n",
230 | " 2558 | \n",
231 | "
\n",
232 | " \n",
233 | " | 5 | \n",
234 | " 6 | \n",
235 | " 6 | \n",
236 | " โรงแรมบ้านทะเลดาว รีสอร์ท ประจวบคีรีขันธ์ | \n",
237 | " Green Hotel | \n",
238 | " 2556 | \n",
239 | " ระดับดีเยี่ยม (ทอง) | \n",
240 | " | \n",
241 | " หัวหิน | \n",
242 | " ประจวบคีรีขันธ์ | \n",
243 | " 77110 | \n",
244 | " 083 253 6024 | \n",
245 | " 2556 | \n",
246 | " 2558 | \n",
247 | "
\n",
248 | " \n",
249 | " | 6 | \n",
250 | " 7 | \n",
251 | " 7 | \n",
252 | " โรงแรมบ้านบาหยัน รีสอร์ท หัวหิน ประจวบคีรีขันธ์ | \n",
253 | " Green Hotel | \n",
254 | " 2556 | \n",
255 | " ระดับดีเยี่ยม (ทอง) | \n",
256 | " | \n",
257 | " หัวหิน | \n",
258 | " ประจวบคีรีขันธ์ | \n",
259 | " 77110 | \n",
260 | " 032 533 544 | \n",
261 | " 2556 | \n",
262 | " 2558 | \n",
263 | "
\n",
264 | " \n",
265 | " | 7 | \n",
266 | " 8 | \n",
267 | " 8 | \n",
268 | " โรงแรมสยามเคมเปนสกี้ กรุงเทพฯ | \n",
269 | " Green Hotel | \n",
270 | " 2556 | \n",
271 | " ระดับดีเยี่ยม (ทอง) | \n",
272 | " ปทุมวัน | \n",
273 | " ปทุมวัน | \n",
274 | " กรุงเทพมหานคร | \n",
275 | " 10330 | \n",
276 | " 02 162 9000 | \n",
277 | " 2556 | \n",
278 | " 2558 | \n",
279 | "
\n",
280 | " \n",
281 | " | 8 | \n",
282 | " 9 | \n",
283 | " 9 | \n",
284 | " โรงแรมอมารีดอนเมือง แอร์พอร์ต กรุงเทพฯ | \n",
285 | " Green Hotel | \n",
286 | " 2556 | \n",
287 | " ระดับดีเยี่ยม (ทอง) | \n",
288 | " สีกัน | \n",
289 | " ดอนเมือง | \n",
290 | " กรุงเทพมหานคร | \n",
291 | " 10210 | \n",
292 | " 02 566 1941 | \n",
293 | " 2556 | \n",
294 | " 2558 | \n",
295 | "
\n",
296 | " \n",
297 | " | 9 | \n",
298 | " 10 | \n",
299 | " 10 | \n",
300 | " โรงแรมสยามเบย์วิว พัทยา ชลบุรี | \n",
301 | " Green Hotel | \n",
302 | " 2556 | \n",
303 | " ระดับดีมาก (เงิน) | \n",
304 | " หนองปรือ | \n",
305 | " บางละมุง | \n",
306 | " ชลบุรี | \n",
307 | " 20150 | \n",
308 | " 038 423 871 | \n",
309 | " 2556 | \n",
310 | " 2558 | \n",
311 | "
\n",
312 | " \n",
313 | "
\n",
314 | "
"
315 | ],
316 | "text/plain": [
317 | " _id No Agency Project \\\n",
318 | "0 1 1 โรงแรมดุสิตธานี พัทยา ชลบุรี Green Hotel \n",
319 | "1 2 2 โรงแรมสยามเบย์ ชอว์ พัทยา ชลบุรี Green Hotel \n",
320 | "2 3 3 โรงแรมไทยการ์เด้นรีสอร์ท พัทยา ชลบุรี Green Hotel \n",
321 | "3 4 4 โรงแรมลองบีช รีสอร์ท พัทยา ชลบุรี Green Hotel \n",
322 | "4 5 5 โรงแรมบ้านอัมพวารีสอร์ทแอนด์สปา สมุทรสงคราม Green Hotel \n",
323 | "5 6 6 โรงแรมบ้านทะเลดาว รีสอร์ท ประจวบคีรีขันธ์ Green Hotel \n",
324 | "6 7 7 โรงแรมบ้านบาหยัน รีสอร์ท หัวหิน ประจวบคีรีขันธ์ Green Hotel \n",
325 | "7 8 8 โรงแรมสยามเคมเปนสกี้ กรุงเทพฯ Green Hotel \n",
326 | "8 9 9 โรงแรมอมารีดอนเมือง แอร์พอร์ต กรุงเทพฯ Green Hotel \n",
327 | "9 10 10 โรงแรมสยามเบย์วิว พัทยา ชลบุรี Green Hotel \n",
328 | "\n",
329 | " Budget Year Award Tumbol Amphur Province \\\n",
330 | "0 2556 ระดับดีเยี่ยม (ทอง) นาเกลือ บางละมุง ชลบุรี \n",
331 | "1 2556 ระดับดีเยี่ยม (ทอง) บางละมุง ชลบุรี \n",
332 | "2 2556 ระดับดีเยี่ยม (ทอง) หนองปรือ บางละมุง ชลบุรี \n",
333 | "3 2556 ระดับดีเยี่ยม (ทอง) บางละมุง ชลบุรี \n",
334 | "4 2556 ระดับดีเยี่ยม (ทอง) อัมพวา อัมพวา สมุทรสงคราม \n",
335 | "5 2556 ระดับดีเยี่ยม (ทอง) หัวหิน ประจวบคีรีขันธ์ \n",
336 | "6 2556 ระดับดีเยี่ยม (ทอง) หัวหิน ประจวบคีรีขันธ์ \n",
337 | "7 2556 ระดับดีเยี่ยม (ทอง) ปทุมวัน ปทุมวัน กรุงเทพมหานคร \n",
338 | "8 2556 ระดับดีเยี่ยม (ทอง) สีกัน ดอนเมือง กรุงเทพมหานคร \n",
339 | "9 2556 ระดับดีมาก (เงิน) หนองปรือ บางละมุง ชลบุรี \n",
340 | "\n",
341 | " Postcode Telephone Start_Date End_Date \n",
342 | "0 20150 038 425 611 2556 2558 \n",
343 | "1 20150 038 428 678 2556 2558 \n",
344 | "2 20150 038 370 614 2556 2558 \n",
345 | "3 20150 038-414-616-26 2556 2558 \n",
346 | "4 75110 034 752 222 2556 2558 \n",
347 | "5 77110 083 253 6024 2556 2558 \n",
348 | "6 77110 032 533 544 2556 2558 \n",
349 | "7 10330 02 162 9000 2556 2558 \n",
350 | "8 10210 02 566 1941 2556 2558 \n",
351 | "9 20150 038 423 871 2556 2558 "
352 | ]
353 | },
354 | "execution_count": 4,
355 | "metadata": {},
356 | "output_type": "execute_result"
357 | }
358 | ],
359 | "source": [
360 | "# Request headers\n",
361 | "headers = {\n",
362 | " \"api-key\": api_key,\n",
363 | "}\n",
364 | "\n",
365 | "params = {\"resource_id\": \"d13e3be5-9020-4a78-9a7e-760b2dcbf6be\", \"limit\": 10}\n",
366 | "r = requests.get(\n",
367 | " \"https://opend.data.go.th/get-ckan/datastore_search\", params, headers=headers\n",
368 | ")\n",
369 | "if r.ok:\n",
370 | " j = r.json()\n",
371 | " records = j[\"result\"][\"records\"]\n",
372 | "df = pd.DataFrame(records)\n",
373 | "df\n"
374 | ]
375 | },
376 | {
377 | "cell_type": "code",
378 | "execution_count": null,
379 | "metadata": {},
380 | "outputs": [],
381 | "source": [
382 | "r.json()"
383 | ]
384 | }
385 | ],
386 | "metadata": {
387 | "kernelspec": {
388 | "display_name": "Python 3.9.12 ('base')",
389 | "language": "python",
390 | "name": "python3"
391 | },
392 | "language_info": {
393 | "codemirror_mode": {
394 | "name": "ipython",
395 | "version": 3
396 | },
397 | "file_extension": ".py",
398 | "mimetype": "text/x-python",
399 | "name": "python",
400 | "nbconvert_exporter": "python",
401 | "pygments_lexer": "ipython3",
402 | "version": "3.9.12"
403 | },
404 | "vscode": {
405 | "interpreter": {
406 | "hash": "629cb9f199f624aadf5cdcebfeb0fdc9652b7a7e432d484384b677cab7914fce"
407 | }
408 | },
409 | "widgets": {
410 | "application/vnd.jupyter.widget-state+json": {
411 | "state": {},
412 | "version_major": 2,
413 | "version_minor": 0
414 | }
415 | }
416 | },
417 | "nbformat": 4,
418 | "nbformat_minor": 4
419 | }
420 |
--------------------------------------------------------------------------------
/read_tis-620 file.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "86e1940f",
6 | "metadata": {
7 | "toc-hr-collapsed": false
8 | },
9 | "source": [
10 | "# read/write TIS-620 file\n",
11 | "* standard encoding: https://docs.python.org/3.7/library/codecs.html#standard-encodings\n"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "---\n",
19 | "* author: [Prasert Kanawattanachai](prasert.k@chula.ac.th)\n",
20 | "* YouTube: https://www.youtube.com/prasertcbs\n",
21 | "* github: https://github.com/prasertcbs/\n",
22 | "* kaggle: https://www.kaggle.com/prasertk/\n",
23 | "* [Chulalongkorn Business School](https://www.cbs.chula.ac.th/en/)\n",
24 | "---\n"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 1,
30 | "metadata": {},
31 | "outputs": [],
32 | "source": [
33 | "import pandas as pd\n"
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "id": "86e1940f",
39 | "metadata": {
40 | "toc-hr-collapsed": false
41 | },
42 | "source": [
43 | "# pandas read TIS-620 file from URL\n"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": null,
49 | "metadata": {},
50 | "outputs": [
51 | {
52 | "data": {
53 | "text/html": [
54 | "\n",
55 | "\n",
68 | "
\n",
69 | " \n",
70 | " \n",
71 | " | \n",
72 | " abbr | \n",
73 | " province_th | \n",
74 | " province_en | \n",
75 | "
\n",
76 | " \n",
77 | " \n",
78 | " \n",
79 | " | 0 | \n",
80 | " กจ | \n",
81 | " กาญจนบุรี | \n",
82 | " Kanchanaburi | \n",
83 | "
\n",
84 | " \n",
85 | " | 1 | \n",
86 | " กทม | \n",
87 | " กรุงเทพมหานคร | \n",
88 | " Bangkok | \n",
89 | "
\n",
90 | " \n",
91 | " | 2 | \n",
92 | " จบ | \n",
93 | " จันทบุรี | \n",
94 | " Chanthaburi | \n",
95 | "
\n",
96 | " \n",
97 | " | 3 | \n",
98 | " ฉช | \n",
99 | " ฉะเชิงเทรา | \n",
100 | " Chachoengsao | \n",
101 | "
\n",
102 | " \n",
103 | " | 4 | \n",
104 | " ชน | \n",
105 | " ชัยนาท | \n",
106 | " Chainat | \n",
107 | "
\n",
108 | " \n",
109 | " | ... | \n",
110 | " ... | \n",
111 | " ... | \n",
112 | " ... | \n",
113 | "
\n",
114 | " \n",
115 | " | 72 | \n",
116 | " ยล | \n",
117 | " ยะลา | \n",
118 | " Yala | \n",
119 | "
\n",
120 | " \n",
121 | " | 73 | \n",
122 | " รน | \n",
123 | " ระนอง | \n",
124 | " Ranong | \n",
125 | "
\n",
126 | " \n",
127 | " | 74 | \n",
128 | " สข | \n",
129 | " สงขลา | \n",
130 | " Songkhla | \n",
131 | "
\n",
132 | " \n",
133 | " | 75 | \n",
134 | " สฎ | \n",
135 | " สุราษฎร์ธานี | \n",
136 | " Surat Thani | \n",
137 | "
\n",
138 | " \n",
139 | " | 76 | \n",
140 | " สต | \n",
141 | " สตูล | \n",
142 | " Satun | \n",
143 | "
\n",
144 | " \n",
145 | "
\n",
146 | "
77 rows × 3 columns
\n",
147 | "
"
148 | ],
149 | "text/plain": [
150 | " abbr province_th province_en\n",
151 | "0 กจ กาญจนบุรี Kanchanaburi\n",
152 | "1 กทม กรุงเทพมหานคร Bangkok\n",
153 | "2 จบ จันทบุรี Chanthaburi\n",
154 | "3 ฉช ฉะเชิงเทรา Chachoengsao\n",
155 | "4 ชน ชัยนาท Chainat\n",
156 | ".. ... ... ...\n",
157 | "72 ยล ยะลา Yala\n",
158 | "73 รน ระนอง Ranong\n",
159 | "74 สข สงขลา Songkhla\n",
160 | "75 สฎ สุราษฎร์ธานี Surat Thani\n",
161 | "76 สต สตูล Satun\n",
162 | "\n",
163 | "[77 rows x 3 columns]"
164 | ]
165 | },
166 | "metadata": {},
167 | "output_type": "display_data"
168 | }
169 | ],
170 | "source": [
171 | "url = \"https://raw.githubusercontent.com/prasertcbs/basic-dataset/master/province_tis.csv\"\n",
172 | "# url = \"https://data.go.th/dataset/c24ec42f-db0d-4a33-9e13-a98cd22de74d/resource/3d222c43-dea2-43f5-ad0d-7f0e58d4ad54/download/flightrule_11_2021.csv\"\n",
173 | "# df = pd.read_csv(url, encoding=\"utf-8\")\n",
174 | "df = pd.read_csv(url, encoding=\"iso8859_11\")\n",
175 | "# df = pd.read_csv(url, encoding=\"tis-620\")\n",
176 | "# df = pd.read_csv(url, encoding=\"thai\")\n",
177 | "# df = pd.read_csv(url, encoding=\"cp874\")\n",
178 | "df\n"
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": 9,
184 | "metadata": {},
185 | "outputs": [],
186 | "source": [
187 | "df.to_csv(\"province_utf-8.csv\", index=False)\n",
188 | "\n"
189 | ]
190 | },
191 | {
192 | "cell_type": "markdown",
193 | "metadata": {},
194 | "source": [
195 | "# Save as TIS-620\n"
196 | ]
197 | },
198 | {
199 | "cell_type": "code",
200 | "execution_count": 5,
201 | "metadata": {},
202 | "outputs": [],
203 | "source": [
204 | "df.to_csv(\"province_tis-620.csv\", index=False, encoding=\"iso8859_11\")\n",
205 | "\n"
206 | ]
207 | },
208 | {
209 | "cell_type": "markdown",
210 | "metadata": {},
211 | "source": [
212 | "# Read TIS-620 file from local\n"
213 | ]
214 | },
215 | {
216 | "cell_type": "code",
217 | "execution_count": 6,
218 | "metadata": {},
219 | "outputs": [
220 | {
221 | "name": "stdout",
222 | "output_type": "stream",
223 | "text": [
224 | "abbr,province_th,province_en\n",
225 | "กจ,กาญจนบุรี,Kanchanaburi\n",
226 | "กทม,กรุงเทพมหานคร,Bangkok\n",
227 | "จบ,จันทบุรี,Chanthaburi\n",
228 | "ฉช,ฉะเชิงเทรา,Chachoengsao\n",
229 | "ชน,ชัยนาท,Chainat\n",
230 | "ชบ,ชลบุรี,Chonburi\n",
231 | "ตร,ตราด,Trat\n",
232 | "นฐ,นครปฐม,Nakhon Pathom\n",
233 | "นบ,นนทบุรี,Nonthaburi\n",
234 | "นย,นครนายก,Nakhon Nayok\n",
235 | "ปข,ประจวบคีรีขันธ์,Prachuap Khiri Khan\n",
236 | "ปจ,ปราจีนบุรี,Prachinburi\n",
237 | "ปท,ปทุมธานี,Pathum Thani\n",
238 | "พบ,เพชรบุรี,Phetchaburi\n",
239 | "รบ,ราชบุรี,Ratchaburi\n",
240 | "รย,ระยอง,Rayong\n",
241 | "ลบ,ลพบุรี,Lopburi\n",
242 | "สก,สระแก้ว,Sa Kaeo\n",
243 | "สค,สมุทรสาคร,Samut Sakhon\n",
244 | "สบ,สระบุรี,Saraburi\n",
245 | "สป,สมุทรปราการ,Samut Prakan\n",
246 | "สพ,สุพรรณบุรี,Suphan Buri\n",
247 | "สส,สมุทรสงคราม,Samut Songkhram\n",
248 | "สห,สิงห์บุรี,Sing Buri\n",
249 | "อท,อ่างทอง,Ang Thong\n",
250 | "อย,พระนครศรีอยุธยา,Phra Nakhon Si Ayutthaya\n",
251 | "กพ,กำแพงเพชร,Kamphaeng Phet\n",
252 | "ชม,เชียงใหม่,Chiang Mai\n",
253 | "ชร,เชียงราย,Chiang Rai\n",
254 | "ตก,ตาก,Tak\n",
255 | "นน,น่าน,Nan\n",
256 | "นว,นครสวรรค์,Nakhon Sawan\n",
257 | "พจ,พิจิตร,Phichit\n",
258 | "พช,เพชรบูรณ์,Phetchabun\n",
259 | "พย,พะเยา,Phayao\n",
260 | "พร,แพร่,Phrae\n",
261 | "พล,พิษณุโลก,Phitsanulok\n",
262 | "มส,แม่ฮ่องสอน,Mae Hong Son\n",
263 | "ลป,ลำปาง,Lampang\n",
264 | "ลพ,ลำพูน,Lamphun\n",
265 | "สท,สุโขทัย,Sukhothai\n",
266 | "อต,อุตรดิตถ์,Uttaradit\n",
267 | "อน,อุทัยธานี,Uthai Thani\n",
268 | "กส,กาฬสินธุ์,Kalasin\n",
269 | "ขก,ขอนแก่น,Khon Kaen\n",
270 | "ชย,ชัยภูมิ,Chaiyaphum\n",
271 | "นค,หนองคาย,Nong Khai\n",
272 | "นพ,นครพนม,Nakhon Phanom\n",
273 | "นภ,หนองบัวลำภู,Nong Bua Lamphu\n",
274 | "นม,นครราชสีมา,Nakhon Ratchasima\n",
275 | "บก,บึงกาฬ,Bueng Kan\n",
276 | "บร,บุรีรัมย์,Buriram\n",
277 | "มค,มหาสารคาม,Maha Sarakham\n",
278 | "มห,มุกดาหาร,Mukdahan\n",
279 | "ยส,ยโสธร,Yasothon\n",
280 | "รอ,ร้อยเอ็ด,Roi Et\n",
281 | "ลย,เลย,Loei\n",
282 | "ศก,ศรีสะเกษ,Sisaket\n",
283 | "สน,สกลนคร,Sakon Nakhon\n",
284 | "สร,สุรินทร์,Surin\n",
285 | "อจ,อำนาจเจริญ,Amnat Charoen\n",
286 | "อด,อุดรธานี,Udon Thani\n",
287 | "อบ,อุบลราชธานี,Ubon Ratchathani\n",
288 | "กบ,กระบี่,Krabi\n",
289 | "ชพ,ชุมพร,Chumphon\n",
290 | "ตง,ตรัง,Trang\n",
291 | "นธ,นราธิวาส,Narathiwat\n",
292 | "นศ,นครศรีธรรมราช,Nakhon Si Thammarat\n",
293 | "ปน,ปัตตานี,Pattani\n",
294 | "พง,พังงา,Phang Nga\n",
295 | "พท,พัทลุง,Phattalung\n",
296 | "ภก,ภูเก็ต,Phuket\n",
297 | "ยล,ยะลา,Yala\n",
298 | "รน,ระนอง,Ranong\n",
299 | "สข,สงขลา,Songkhla\n",
300 | "สฎ,สุราษฎร์ธานี,Surat Thani\n",
301 | "สต,สตูล,Satun\n",
302 | "\n"
303 | ]
304 | }
305 | ],
306 | "source": [
307 | "with open(\"province_tis-620.csv\", \"r\", encoding=\"iso8859_11\") as f:\n",
308 | " print(f.read())\n",
309 | "\n"
310 | ]
311 | },
312 | {
313 | "cell_type": "markdown",
314 | "metadata": {},
315 | "source": [
316 | "# read text file from URL\n"
317 | ]
318 | },
319 | {
320 | "cell_type": "code",
321 | "execution_count": null,
322 | "metadata": {},
323 | "outputs": [
324 | {
325 | "name": "stdout",
326 | "output_type": "stream",
327 | "text": [
328 | "p_id,name,name_en,region,area_km2\n",
329 | "กจ,กาญจนบุรี,Kanchanaburi,C,19483.148\n",
330 | "กทม,กรุงเทพมหานคร,Bangkok,C,1568.737\n",
331 | "จบ,จันทบุรี,Chanthaburi,C,6338\n",
332 | "ฉช,ฉะเชิงเทรา,Chachoengsao,C,5351\n",
333 | "ชน,ชัยนาท,Chainat,C,2469.746\n",
334 | "ชบ,ชลบุรี,Chonburi,C,4611.829\n",
335 | "ตร,ตราด,Trat,C,2819\n",
336 | "นฐ,นครปฐม,Nakhon Pathom,C,2168.327\n",
337 | "นบ,นนทบุรี,Nonthaburi,C,622.303\n",
338 | "นย,นครนายก,Nakhon Nayok,C,2122\n",
339 | "ปข,ประจวบคีรีขันธ์,Prachuap Khiri Khan,C,6367.62\n",
340 | "ปจ,ปราจีนบุรี,Prachinburi,C,4762.362\n",
341 | "ปท,ปทุมธานี,Pathum Thani,C,1525.856\n",
342 | "พบ,เพชรบุรี,Phetchaburi,C,6225.138\n",
343 | "รบ,ราชบุรี,Ratchaburi,C,5196.462\n",
344 | "รย,ระยอง,Rayong,C,3552\n",
345 | "ลบ,ลพบุรี,Lopburi,C,6199.753\n",
346 | "สก,สระแก้ว,Sa Kaeo,C,7195.436\n",
347 | "สค,สมุทรสาคร,Samut Sakhon,C,872.347\n",
348 | "สบ,สระบุรี,Saraburi,C,3576.486\n",
349 | "สป,สมุทรปราการ,Samut Prakan,C,1004.092\n",
350 | "สพ,สุพรรณบุรี,Suphan Buri,C,5358.008\n",
351 | "สส,สมุทรสงคราม,Samut Songkhram,C,416.707\n",
352 | "สห,สิงห์บุรี,Sing Buri,C,822.478\n",
353 | "อท,อ่างทอง,Ang Thong,C,968.372\n",
354 | "อย,พระนครศรีอยุธยา,Phra Nakhon Si Ayutthaya,C,2556.64\n",
355 | "กพ,กำแพงเพชร,Kamphaeng Phet,N,8607.49\n",
356 | "ชม,เชียงใหม่,Chiang Mai,N,20107.057\n",
357 | "ชร,เชียงราย,Chiang Rai,N,11678.369\n",
358 | "ตก,ตาก,Tak,N,16406.65\n",
359 | "นน,น่าน,Nan,N,11472.072\n",
360 | "นว,นครสวรรค์,Nakhon Sawan,N,9597.677\n",
361 | "พจ,พิจิตร,Phichit,N,4531.013\n",
362 | "พช,เพชรบูรณ์,Phetchabun,N,12668.416\n",
363 | "พย,พะเยา,Phayao,N,6335.06\n",
364 | "พร,แพร่,Phrae,N,6538.598\n",
365 | "พล,พิษณุโลก,Phitsanulok,N,10815.854\n",
366 | "มส,แม่ฮ่องสอน,Mae Hong Son,N,12681.259\n",
367 | "ลป,ลำปาง,Lampang,N,12533.961\n",
368 | "ลพ,ลำพูน,Lamphun,N,4505.882\n",
369 | "สท,สุโขทัย,Sukhothai,N,6596.092\n",
370 | "อต,อุตรดิตถ์,Uttaradit,N,7838.592\n",
371 | "อน,อุทัยธานี,Uthai Thani,N,6730.246\n",
372 | "กส,กาฬสินธุ์,Kalasin,NE,6946.746\n",
373 | "ขก,ขอนแก่น,Khon Kaen,NE,10885.991\n",
374 | "ชย,ชัยภูมิ,Chaiyaphum,NE,12778.287\n",
375 | "นค,หนองคาย,Nong Khai,NE,3027.28\n",
376 | "นพ,นครพนม,Nakhon Phanom,NE,5512.668\n",
377 | "นภ,หนองบัวลำภู,Nong Bua Lamphu,NE,3859.086\n",
378 | "นม,นครราชสีมา,Nakhon Ratchasima,NE,20493.964\n",
379 | "บก,บึงกาฬ,Bueng Kan,NE,4305\n",
380 | "บร,บุรีรัมย์,Buriram,NE,10322.885\n",
381 | "มค,มหาสารคาม,Maha Sarakham,NE,5291.683\n",
382 | "มห,มุกดาหาร,Mukdahan,NE,4339.83\n",
383 | "ยส,ยโสธร,Yasothon,NE,4161.664\n",
384 | "รอ,ร้อยเอ็ด,Roi Et,NE,8299.449\n",
385 | "ลย,เลย,Loei,NE,11424.612\n",
386 | "ศก,ศรีสะเกษ,Sisaket,NE,8839.976\n",
387 | "สน,สกลนคร,Sakon Nakhon,NE,9605.764\n",
388 | "สร,สุรินทร์,Surin,NE,8124.056\n",
389 | "อจ,อำนาจเจริญ,Amnat Charoen,NE,3161.248\n",
390 | "อด,อุดรธานี,Udon Thani,NE,11730.302\n",
391 | "อบ,อุบลราชธานี,Ubon Ratchathani,NE,16112.65\n",
392 | "กบ,กระบี่,Krabi,S,4708.512\n",
393 | "ชพ,ชุมพร,Chumphon,S,6010.849\n",
394 | "ตง,ตรัง,Trang,S,4917.519\n",
395 | "นธ,นราธิวาส,Narathiwat,S,4475.43\n",
396 | "นศ,นครศรีธรรมราช,Nakhon Si Thammarat,S,9942.502\n",
397 | "ปน,ปัตตานี,Pattani,S,1940.356\n",
398 | "พง,พังงา,Phang Nga,S,4170.895\n",
399 | "พท,พัทลุง,Phattalung,S,3424.473\n",
400 | "ภก,ภูเก็ต,Phuket,S,543.034\n",
401 | "ยล,ยะลา,Yala,S,4521.078\n",
402 | "รน,ระนอง,Ranong,S,3298.045\n",
403 | "สข,สงขลา,Songkhla,S,7393.889\n",
404 | "สฎ,สุราษฎร์ธานี,Surat Thani,S,12891.469\n",
405 | "สต,สตูล,Satun,S,2478.977\n",
406 | "\n"
407 | ]
408 | }
409 | ],
410 | "source": [
411 | "import requests\n",
412 | "\n",
413 | "url = \"https://raw.githubusercontent.com/prasertcbs/basic-dataset/master/province.csv\"\n",
414 | "# url=\"https://data.go.th/dataset/78e7405b-947e-4752-a10c-67c4b819ec74/resource/1cdb37ad-f52b-4a94-a829-c1876421ea83/download/usage_64_sep.csv\"\n",
415 | "r = requests.get(url) # r -> response\n",
416 | "if r.ok:\n",
417 | " s = r.text\n",
418 | " print(s)\n",
419 | " # print(s.splitlines())\n"
420 | ]
421 | },
422 | {
423 | "cell_type": "markdown",
424 | "metadata": {},
425 | "source": [
426 | "# read TIS-620 from URL\n",
427 | "\n",
428 | "- standard encoding: https://docs.python.org/3.7/library/codecs.html#standard-encodings\n"
429 | ]
430 | },
431 | {
432 | "cell_type": "code",
433 | "execution_count": null,
434 | "metadata": {},
435 | "outputs": [],
436 | "source": [
437 | "import requests\n",
438 | "\n",
439 | "# url = \"https://raw.githubusercontent.com/prasertcbs/basic-dataset/master/province_tis.csv\"\n",
440 | "url = \"https://data.go.th/dataset/78e7405b-947e-4752-a10c-67c4b819ec74/resource/1cdb37ad-f52b-4a94-a829-c1876421ea83/download/usage_64_sep.csv\"\n",
441 | "\n",
442 | "r = requests.get(url)\n",
443 | "if r.ok:\n",
444 | " s = r.content.decode(\"iso8859_11\")\n",
445 | " # s = r.content.decode('cp874') # code page 874\n",
446 | " # s = r.content.decode(\"thai\")\n",
447 | " # s = r.content.decode('tis-620')\n",
448 | " print(s)\n",
449 | " # print(s.splitlines())\n"
450 | ]
451 | }
452 | ],
453 | "metadata": {
454 | "interpreter": {
455 | "hash": "eefe6ca76f6e878a3e3929bbec9156982baa217b1f16ff8dc984bf661b4791cc"
456 | },
457 | "kernelspec": {
458 | "display_name": "Python 3.8.12 ('base')",
459 | "language": "python",
460 | "name": "python3"
461 | },
462 | "language_info": {
463 | "codemirror_mode": {
464 | "name": "ipython",
465 | "version": 3
466 | },
467 | "file_extension": ".py",
468 | "mimetype": "text/x-python",
469 | "name": "python",
470 | "nbconvert_exporter": "python",
471 | "pygments_lexer": "ipython3",
472 | "version": "3.8.13"
473 | },
474 | "orig_nbformat": 4
475 | },
476 | "nbformat": 4,
477 | "nbformat_minor": 2
478 | }
479 |
--------------------------------------------------------------------------------
/bangkok open data.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# [Bangkok open data](https://data.bangkok.go.th/dataset/)\n",
8 | "\n",
9 | "---\n",
10 | "* author: [Prasert Kanawattanachai](prasert.k@chula.ac.th)\n",
11 | "* YouTube: https://www.youtube.com/prasertcbs\n",
12 | "* github: https://github.com/prasertcbs/\n",
13 | "* kaggle: https://www.kaggle.com/prasertk/\n",
14 | "* [Chulalongkorn Business School](https://www.cbs.chula.ac.th/en/)\n",
15 | "---\n"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": null,
21 | "metadata": {},
22 | "outputs": [],
23 | "source": [
24 | "import requests\n",
25 | "import pandas as pd\n"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 10,
31 | "metadata": {},
32 | "outputs": [
33 | {
34 | "data": {
35 | "text/html": [
36 | "\n",
37 | "\n",
50 | "
\n",
51 | " \n",
52 | " \n",
53 | " | \n",
54 | " id | \n",
55 | " station_area | \n",
56 | " pm10_min | \n",
57 | " pm10_max | \n",
58 | " pm10_overstd | \n",
59 | " pm10_count | \n",
60 | " no2_min | \n",
61 | " no2_max | \n",
62 | " no2_overstd | \n",
63 | " no2_count | \n",
64 | " ... | \n",
65 | " o38h_min | \n",
66 | " o38h_max | \n",
67 | " o38h_overstd | \n",
68 | " o38h_count | \n",
69 | " pm2_5_min | \n",
70 | " pm2_5_max | \n",
71 | " pm2_5_overstd | \n",
72 | " pm2_5_count | \n",
73 | " adddate | \n",
74 | " updatedate | \n",
75 | "
\n",
76 | " \n",
77 | " \n",
78 | " \n",
79 | " | 0 | \n",
80 | " 6 | \n",
81 | " ค่ามาตรฐาน | \n",
82 | " - | \n",
83 | " - | \n",
84 | " 120 | \n",
85 | " - | \n",
86 | " - | \n",
87 | " - | \n",
88 | " 170 | \n",
89 | " - | \n",
90 | " ... | \n",
91 | " - | \n",
92 | " - | \n",
93 | " 70 | \n",
94 | " - | \n",
95 | " - | \n",
96 | " - | \n",
97 | " 50 | \n",
98 | " - | \n",
99 | " 2020-05-23 | \n",
100 | " 2020-09-15 | \n",
101 | "
\n",
102 | " \n",
103 | " | 1 | \n",
104 | " 5 | \n",
105 | " สรุป | \n",
106 | " 13 | \n",
107 | " 185 | \n",
108 | " 59 | \n",
109 | " 1434 | \n",
110 | " 0 | \n",
111 | " 158.4 | \n",
112 | " 0 | \n",
113 | " 33142 | \n",
114 | " ... | \n",
115 | " 0 | \n",
116 | " 79.4 | \n",
117 | " 9 | \n",
118 | " 8665 | \n",
119 | " 9 | \n",
120 | " 76 | \n",
121 | " 16 | \n",
122 | " 354 | \n",
123 | " 2020-05-23 | \n",
124 | " 2020-09-15 | \n",
125 | "
\n",
126 | " \n",
127 | " | 2 | \n",
128 | " 4 | \n",
129 | " ราชเทวี | \n",
130 | " 18 | \n",
131 | " 125 | \n",
132 | " 1 | \n",
133 | " 354 | \n",
134 | " 2 | \n",
135 | " 100 | \n",
136 | " 0 | \n",
137 | " 8372 | \n",
138 | " ... | \n",
139 | " 0 | \n",
140 | " 79.4 | \n",
141 | " 9 | \n",
142 | " 8665 | \n",
143 | " 9 | \n",
144 | " 76 | \n",
145 | " 16 | \n",
146 | " 354 | \n",
147 | " 2020-05-23 | \n",
148 | " 2020-09-15 | \n",
149 | "
\n",
150 | " \n",
151 | " | 3 | \n",
152 | " 3 | \n",
153 | " ราษฎร์บูรณะ | \n",
154 | " 36 | \n",
155 | " 165 | \n",
156 | " 16 | \n",
157 | " 361 | \n",
158 | " 0 | \n",
159 | " 116.2 | \n",
160 | " 0 | \n",
161 | " 8355 | \n",
162 | " ... | \n",
163 | " - | \n",
164 | " - | \n",
165 | " - | \n",
166 | " - | \n",
167 | " - | \n",
168 | " - | \n",
169 | " - | \n",
170 | " - | \n",
171 | " 2020-05-23 | \n",
172 | " 2020-09-15 | \n",
173 | "
\n",
174 | " \n",
175 | " | 4 | \n",
176 | " 2 | \n",
177 | " พระโขนง | \n",
178 | " 13 | \n",
179 | " 155 | \n",
180 | " 14 | \n",
181 | " 362 | \n",
182 | " 0 | \n",
183 | " 158.4 | \n",
184 | " 0 | \n",
185 | " 8286 | \n",
186 | " ... | \n",
187 | " - | \n",
188 | " - | \n",
189 | " - | \n",
190 | " - | \n",
191 | " - | \n",
192 | " - | \n",
193 | " - | \n",
194 | " - | \n",
195 | " 2020-05-23 | \n",
196 | " 2020-09-15 | \n",
197 | "
\n",
198 | " \n",
199 | " | 5 | \n",
200 | " 1 | \n",
201 | " ดินแดง | \n",
202 | " 29 | \n",
203 | " 185 | \n",
204 | " 28 | \n",
205 | " 357 | \n",
206 | " 0 | \n",
207 | " 136.6 | \n",
208 | " 0 | \n",
209 | " 8129 | \n",
210 | " ... | \n",
211 | " - | \n",
212 | " - | \n",
213 | " - | \n",
214 | " - | \n",
215 | " - | \n",
216 | " - | \n",
217 | " - | \n",
218 | " - | \n",
219 | " 2020-05-23 | \n",
220 | " 2020-10-15 | \n",
221 | "
\n",
222 | " \n",
223 | "
\n",
224 | "
6 rows × 32 columns
\n",
225 | "
"
226 | ],
227 | "text/plain": [
228 | " id station_area pm10_min pm10_max pm10_overstd pm10_count no2_min no2_max \\\n",
229 | "0 6 ค่ามาตรฐาน - - 120 - - - \n",
230 | "1 5 สรุป 13 185 59 1434 0 158.4 \n",
231 | "2 4 ราชเทวี 18 125 1 354 2 100 \n",
232 | "3 3 ราษฎร์บูรณะ 36 165 16 361 0 116.2 \n",
233 | "4 2 พระโขนง 13 155 14 362 0 158.4 \n",
234 | "5 1 ดินแดง 29 185 28 357 0 136.6 \n",
235 | "\n",
236 | " no2_overstd no2_count ... o38h_min o38h_max o38h_overstd o38h_count \\\n",
237 | "0 170 - ... - - 70 - \n",
238 | "1 0 33142 ... 0 79.4 9 8665 \n",
239 | "2 0 8372 ... 0 79.4 9 8665 \n",
240 | "3 0 8355 ... - - - - \n",
241 | "4 0 8286 ... - - - - \n",
242 | "5 0 8129 ... - - - - \n",
243 | "\n",
244 | " pm2_5_min pm2_5_max pm2_5_overstd pm2_5_count adddate updatedate \n",
245 | "0 - - 50 - 2020-05-23 2020-09-15 \n",
246 | "1 9 76 16 354 2020-05-23 2020-09-15 \n",
247 | "2 9 76 16 354 2020-05-23 2020-09-15 \n",
248 | "3 - - - - 2020-05-23 2020-09-15 \n",
249 | "4 - - - - 2020-05-23 2020-09-15 \n",
250 | "5 - - - - 2020-05-23 2020-10-15 \n",
251 | "\n",
252 | "[6 rows x 32 columns]"
253 | ]
254 | },
255 | "execution_count": 10,
256 | "metadata": {},
257 | "output_type": "execute_result"
258 | }
259 | ],
260 | "source": [
261 | "url=\"https://data.bangkok.go.th/dataset/52a5da69-c086-425a-bcb3-fccfadd824f5/resource/1aecc616-c570-4efb-9398-7dd4e39356b2/download/env_5ec8dad38033f.csv\"\n",
262 | "df=pd.read_csv(url)\n",
263 | "df"
264 | ]
265 | },
266 | {
267 | "cell_type": "code",
268 | "execution_count": 5,
269 | "metadata": {},
270 | "outputs": [
271 | {
272 | "data": {
273 | "text/html": [
274 | "\n",
275 | "\n",
288 | "
\n",
289 | " \n",
290 | " \n",
291 | " | \n",
292 | " year | \n",
293 | " No. | \n",
294 | " Province | \n",
295 | " แหล่งน้ำ | \n",
296 | " WQI score | \n",
297 | " Type | \n",
298 | "
\n",
299 | " \n",
300 | " \n",
301 | " \n",
302 | " | 0 | \n",
303 | " 2561 | \n",
304 | " 64 | \n",
305 | " กรุงเทพมหานคร | \n",
306 | " แม่น้ำเจ้าพระยา | \n",
307 | " 35 | \n",
308 | " 4 | \n",
309 | "
\n",
310 | " \n",
311 | " | 1 | \n",
312 | " 2562 | \n",
313 | " 64 | \n",
314 | " กรุงเทพมหานคร | \n",
315 | " แม่น้ำเจ้าพระยา | \n",
316 | " 36 | \n",
317 | " 4 | \n",
318 | "
\n",
319 | " \n",
320 | " | 2 | \n",
321 | " 2563 | \n",
322 | " 64 | \n",
323 | " กรุงเทพมหานคร | \n",
324 | " แม่น้ำเจ้าพระยา | \n",
325 | " 38 | \n",
326 | " 4 | \n",
327 | "
\n",
328 | " \n",
329 | "
\n",
330 | "
"
331 | ],
332 | "text/plain": [
333 | " year No. Province แหล่งน้ำ WQI score Type\n",
334 | "0 2561 64 กรุงเทพมหานคร แม่น้ำเจ้าพระยา 35 4\n",
335 | "1 2562 64 กรุงเทพมหานคร แม่น้ำเจ้าพระยา 36 4\n",
336 | "2 2563 64 กรุงเทพมหานคร แม่น้ำเจ้าพระยา 38 4"
337 | ]
338 | },
339 | "execution_count": 5,
340 | "metadata": {},
341 | "output_type": "execute_result"
342 | }
343 | ],
344 | "source": [
345 | "url=\"https://data.bangkok.go.th/dataset/d7139135-3988-40fa-8703-d546eb480960/resource/70c4f872-6e4b-42db-b8c4-474ef29e0245/download/wq-4-wqi-..xlsx\"\n",
346 | "df=pd.read_excel(url)\n",
347 | "df"
348 | ]
349 | },
350 | {
351 | "cell_type": "code",
352 | "execution_count": 17,
353 | "metadata": {},
354 | "outputs": [
355 | {
356 | "data": {
357 | "text/html": [
358 | "\n",
359 | "\n",
372 | "
\n",
373 | " \n",
374 | " \n",
375 | " | \n",
376 | " _id | \n",
377 | " drag_g_id | \n",
378 | " dcode | \n",
379 | " scode | \n",
380 | " community_name | \n",
381 | " num_volunteers | \n",
382 | " population | \n",
383 | " amount_waste_kg | \n",
384 | " general_waste_kg | \n",
385 | " organic_waste_kg | \n",
386 | " toxic_waste_kg | \n",
387 | " infectious_waste_kg | \n",
388 | " recycle_waste_kg | \n",
389 | " sum_month | \n",
390 | " sum_year | \n",
391 | " remark | \n",
392 | "
\n",
393 | " \n",
394 | " \n",
395 | " \n",
396 | " | 0 | \n",
397 | " 1 | \n",
398 | " 1025001 | \n",
399 | " 1025 | \n",
400 | " 102504 | \n",
401 | " ชุมชนวัดดาวดึงษาราม | \n",
402 | " 2 | \n",
403 | " 2095 | \n",
404 | " 36266 | \n",
405 | " 17408 | \n",
406 | " 10880 | \n",
407 | " 725 | \n",
408 | " 0 | \n",
409 | " 7253 | \n",
410 | " 7 | \n",
411 | " 2564 | \n",
412 | " 0 | \n",
413 | "
\n",
414 | " \n",
415 | " | 1 | \n",
416 | " 2 | \n",
417 | " 1025002 | \n",
418 | " 1025 | \n",
419 | " 102501 | \n",
420 | " ชุมชนวัดเทพนารี | \n",
421 | " 1 | \n",
422 | " 990 | \n",
423 | " 20425 | \n",
424 | " 9804 | \n",
425 | " 6128 | \n",
426 | " 409 | \n",
427 | " 0 | \n",
428 | " 4085 | \n",
429 | " 7 | \n",
430 | " 2564 | \n",
431 | " 0 | \n",
432 | "
\n",
433 | " \n",
434 | " | 2 | \n",
435 | " 3 | \n",
436 | " 1025003 | \n",
437 | " 1025 | \n",
438 | " 102502 | \n",
439 | " ชุมชนเติมสุข | \n",
440 | " 1 | \n",
441 | " 440 | \n",
442 | " 20115 | \n",
443 | " 9655 | \n",
444 | " 6035 | \n",
445 | " 402 | \n",
446 | " 0 | \n",
447 | " 4023 | \n",
448 | " 7 | \n",
449 | " 2564 | \n",
450 | " 0 | \n",
451 | "
\n",
452 | " \n",
453 | " | 3 | \n",
454 | " 4 | \n",
455 | " 1025004 | \n",
456 | " 1025 | \n",
457 | " 102502 | \n",
458 | " ชุมชนริมคลองบางพลัด | \n",
459 | " 1 | \n",
460 | " 534 | \n",
461 | " 25548 | \n",
462 | " 12263 | \n",
463 | " 7664 | \n",
464 | " 511 | \n",
465 | " 0 | \n",
466 | " 5110 | \n",
467 | " 7 | \n",
468 | " 2564 | \n",
469 | " 0 | \n",
470 | "
\n",
471 | " \n",
472 | " | 4 | \n",
473 | " 5 | \n",
474 | " 1025005 | \n",
475 | " 1025 | \n",
476 | " 102502 | \n",
477 | " ชุมชนวัดฉัตรแก้วจงกลนี | \n",
478 | " 1 | \n",
479 | " 1498 | \n",
480 | " 19215 | \n",
481 | " 9223 | \n",
482 | " 5765 | \n",
483 | " 384 | \n",
484 | " 0 | \n",
485 | " 3843 | \n",
486 | " 7 | \n",
487 | " 2564 | \n",
488 | " 0 | \n",
489 | "
\n",
490 | " \n",
491 | " | 5 | \n",
492 | " 6 | \n",
493 | " 1025006 | \n",
494 | " 1025 | \n",
495 | " 102502 | \n",
496 | " ชุมชนแสงทอง | \n",
497 | " 1 | \n",
498 | " 1922 | \n",
499 | " 37914 | \n",
500 | " 18199 | \n",
501 | " 11374 | \n",
502 | " 758 | \n",
503 | " 0 | \n",
504 | " 7583 | \n",
505 | " 7 | \n",
506 | " 2564 | \n",
507 | " 0 | \n",
508 | "
\n",
509 | " \n",
510 | " | 6 | \n",
511 | " 7 | \n",
512 | " 1025007 | \n",
513 | " 1025 | \n",
514 | " 102501 | \n",
515 | " ชุมชนสวนปรก | \n",
516 | " 1 | \n",
517 | " 276 | \n",
518 | " 18900 | \n",
519 | " 9072 | \n",
520 | " 5670 | \n",
521 | " 378 | \n",
522 | " 0 | \n",
523 | " 3780 | \n",
524 | " 7 | \n",
525 | " 2564 | \n",
526 | " 0 | \n",
527 | "
\n",
528 | " \n",
529 | "
\n",
530 | "
"
531 | ],
532 | "text/plain": [
533 | " _id drag_g_id dcode scode community_name num_volunteers \\\n",
534 | "0 1 1025001 1025 102504 ชุมชนวัดดาวดึงษาราม 2 \n",
535 | "1 2 1025002 1025 102501 ชุมชนวัดเทพนารี 1 \n",
536 | "2 3 1025003 1025 102502 ชุมชนเติมสุข 1 \n",
537 | "3 4 1025004 1025 102502 ชุมชนริมคลองบางพลัด 1 \n",
538 | "4 5 1025005 1025 102502 ชุมชนวัดฉัตรแก้วจงกลนี 1 \n",
539 | "5 6 1025006 1025 102502 ชุมชนแสงทอง 1 \n",
540 | "6 7 1025007 1025 102501 ชุมชนสวนปรก 1 \n",
541 | "\n",
542 | " population amount_waste_kg general_waste_kg organic_waste_kg \\\n",
543 | "0 2095 36266 17408 10880 \n",
544 | "1 990 20425 9804 6128 \n",
545 | "2 440 20115 9655 6035 \n",
546 | "3 534 25548 12263 7664 \n",
547 | "4 1498 19215 9223 5765 \n",
548 | "5 1922 37914 18199 11374 \n",
549 | "6 276 18900 9072 5670 \n",
550 | "\n",
551 | " toxic_waste_kg infectious_waste_kg recycle_waste_kg sum_month sum_year \\\n",
552 | "0 725 0 7253 7 2564 \n",
553 | "1 409 0 4085 7 2564 \n",
554 | "2 402 0 4023 7 2564 \n",
555 | "3 511 0 5110 7 2564 \n",
556 | "4 384 0 3843 7 2564 \n",
557 | "5 758 0 7583 7 2564 \n",
558 | "6 378 0 3780 7 2564 \n",
559 | "\n",
560 | " remark \n",
561 | "0 0 \n",
562 | "1 0 \n",
563 | "2 0 \n",
564 | "3 0 \n",
565 | "4 0 \n",
566 | "5 0 \n",
567 | "6 0 "
568 | ]
569 | },
570 | "execution_count": 17,
571 | "metadata": {},
572 | "output_type": "execute_result"
573 | }
574 | ],
575 | "source": [
576 | "def bkkapi(resource_id):\n",
577 | "\turl=f\"https://data.bangkok.go.th/api/3/action/datastore_search?resource_id={resource_id}\"\n",
578 | "\tr=requests.get(url)\n",
579 | "\tj=r.json()\n",
580 | "\t# j['result']['records']\n",
581 | "\tdf=pd.DataFrame(j['result']['records'])\n",
582 | "\treturn df\n",
583 | "\n",
584 | "# https://data.bangkok.go.th/dataset/65eb2ee6-efc6-4045-8e3e-ef451b844953/resource/95bd1379-177e-4dd8-9374-c0ce307bede9/download/-..-64.csv\n",
585 | "dt=bkkapi(\"95bd1379-177e-4dd8-9374-c0ce307bede9\")\n",
586 | "dt"
587 | ]
588 | },
589 | {
590 | "cell_type": "markdown",
591 | "metadata": {},
592 | "source": [
593 | "## read CSV\n",
594 | "- ข้อมูลสถิติการจัดเก็บภาษีเปรียบเทียบยอดประมาณการ\n",
595 | " - https://data.bangkok.go.th/dataset/statvat0864/resource/5cd749fe-11a8-457d-b88f-2151ac33fe27\n"
596 | ]
597 | },
598 | {
599 | "cell_type": "code",
600 | "execution_count": null,
601 | "metadata": {},
602 | "outputs": [],
603 | "source": [
604 | "url = \"https://data.bangkok.go.th/dataset/c051d4a7-8418-49c1-86b0-5a5ea8511c33/resource/5cd749fe-11a8-457d-b88f-2151ac33fe27/download/fin_600ea1e3c81a7.csv\"\n",
605 | "df = pd.read_csv(url)\n",
606 | "df\n"
607 | ]
608 | },
609 | {
610 | "cell_type": "markdown",
611 | "metadata": {},
612 | "source": [
613 | "## read Excel\n",
614 | "\n",
615 | "- รายงานปริมาณน้ำที่ผ่านการบำบัดแล้วกลับมาใช้ประโยชน์ (พ.ศ.2561-2563)\n",
616 | " - https://data.bangkok.go.th/dataset/kpisbangkok_1100/resource/272b8fbc-6146-40d6-be7c-335ea30c2f7e\n"
617 | ]
618 | },
619 | {
620 | "cell_type": "code",
621 | "execution_count": null,
622 | "metadata": {},
623 | "outputs": [],
624 | "source": [
625 | "url = \"https://data.bangkok.go.th/dataset/e53ef05f-32b2-4b06-b800-cad2702f426e/resource/272b8fbc-6146-40d6-be7c-335ea30c2f7e/download/wq-2-reused_water-2561-2563.xlsx\"\n",
626 | "print(url)\n",
627 | "df = pd.read_excel(url)\n",
628 | "df[:5]\n"
629 | ]
630 | },
631 | {
632 | "cell_type": "code",
633 | "execution_count": null,
634 | "metadata": {},
635 | "outputs": [],
636 | "source": [
637 | "def download(url):\n",
638 | "\tif (url[-4:] == \".xls\") or (url[-5:] == \".xlsx\"):\n",
639 | "\t\tdf = pd.read_excel(url)\n",
640 | "\telse:\n",
641 | "\t\tdf = pd.read_csv(url)\n",
642 | "\treturn df\n",
643 | "\n",
644 | "# url = \"https://data.bangkok.go.th/dataset/c051d4a7-8418-49c1-86b0-5a5ea8511c33/resource/5cd749fe-11a8-457d-b88f-2151ac33fe27/download/fin_600ea1e3c81a7.csv\"\n",
645 | "url = \"https://data.bangkok.go.th/dataset/e53ef05f-32b2-4b06-b800-cad2702f426e/resource/272b8fbc-6146-40d6-be7c-335ea30c2f7e/download/wq-2-reused_water-2561-2563.xlsx\"\n",
646 | "df=download(url)\n",
647 | "df[:5]"
648 | ]
649 | },
650 | {
651 | "cell_type": "markdown",
652 | "metadata": {},
653 | "source": [
654 | "## CKAN API\n",
655 | "- รายงานปริมาณน้ำที่ผ่านการบำบัดแล้วกลับมาใช้ประโยชน์ (พ.ศ.2561-2563)\n",
656 | " - https://data.bangkok.go.th/dataset/kpisbangkok_1100/resource/272b8fbc-6146-40d6-be7c-335ea30c2f7e\n"
657 | ]
658 | },
659 | {
660 | "cell_type": "code",
661 | "execution_count": null,
662 | "metadata": {},
663 | "outputs": [],
664 | "source": [
665 | "# resource_id=\"5cd749fe-11a8-457d-b88f-2151ac33fe27\"\n",
666 | "resource_id = \"272b8fbc-6146-40d6-be7c-335ea30c2f7e\"\n",
667 | "url = f\"https://data.bangkok.go.th/api/3/action/datastore_search?resource_id={resource_id}\"\n",
668 | "print(url)\n",
669 | "r = requests.get(url)\n",
670 | "\n",
671 | "j = r.json()\n",
672 | "# j\n",
673 | "j[\"result\"][\"records\"]\n",
674 | "\n"
675 | ]
676 | },
677 | {
678 | "cell_type": "code",
679 | "execution_count": null,
680 | "metadata": {},
681 | "outputs": [],
682 | "source": [
683 | "df = pd.DataFrame(j[\"result\"][\"records\"])\n",
684 | "df\n"
685 | ]
686 | },
687 | {
688 | "cell_type": "code",
689 | "execution_count": null,
690 | "metadata": {},
691 | "outputs": [],
692 | "source": [
693 | "df.loc[:, 'Year':]"
694 | ]
695 | },
696 | {
697 | "cell_type": "code",
698 | "execution_count": null,
699 | "metadata": {},
700 | "outputs": [],
701 | "source": []
702 | }
703 | ],
704 | "metadata": {
705 | "kernelspec": {
706 | "display_name": "Python 3.9.12 ('base')",
707 | "language": "python",
708 | "name": "python3"
709 | },
710 | "language_info": {
711 | "codemirror_mode": {
712 | "name": "ipython",
713 | "version": 3
714 | },
715 | "file_extension": ".py",
716 | "mimetype": "text/x-python",
717 | "name": "python",
718 | "nbconvert_exporter": "python",
719 | "pygments_lexer": "ipython3",
720 | "version": "3.9.12"
721 | },
722 | "orig_nbformat": 4,
723 | "vscode": {
724 | "interpreter": {
725 | "hash": "629cb9f199f624aadf5cdcebfeb0fdc9652b7a7e432d484384b677cab7914fce"
726 | }
727 | }
728 | },
729 | "nbformat": 4,
730 | "nbformat_minor": 2
731 | }
732 |
--------------------------------------------------------------------------------
/data_go_th_api.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# data.go.th\n",
8 | "\n",
9 | "web: https://data.go.th/\n"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "---\n",
17 | "* author: [Prasert Kanawattanachai](prasert.k@chula.ac.th)\n",
18 | "* YouTube: https://www.youtube.com/prasertcbs\n",
19 | "* github: https://github.com/prasertcbs/\n",
20 | "* kaggle: https://www.kaggle.com/prasertk/\n",
21 | "* [Chulalongkorn Business School](https://www.cbs.chula.ac.th/en/)\n",
22 | "---\n"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "
Play YouTube Video
"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 1,
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "import pandas as pd\n",
39 | "import requests\n"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 2,
45 | "metadata": {
46 | "colab": {
47 | "base_uri": "https://localhost:8080/",
48 | "height": 64
49 | },
50 | "colab_type": "code",
51 | "id": "fdr0pYIf7P-_",
52 | "outputId": "c919deae-c99b-44b0-8924-4d2355ca0b63"
53 | },
54 | "outputs": [
55 | {
56 | "name": "stdout",
57 | "output_type": "stream",
58 | "text": [
59 | "pandas version: 1.4.2\n",
60 | "requests version: 2.28.0\n"
61 | ]
62 | },
63 | {
64 | "data": {
65 | "text/plain": [
66 | "Timestamp('2022-07-13 05:48:50.328872')"
67 | ]
68 | },
69 | "execution_count": 2,
70 | "metadata": {},
71 | "output_type": "execute_result"
72 | }
73 | ],
74 | "source": [
75 | "print(f\"pandas version: {pd.__version__}\")\n",
76 | "print(f\"requests version: {requests.__version__}\")\n",
77 | "\n",
78 | "pd.Timestamp.now()\n",
79 | "# pd.Timestamp.now().strftime('%Y-%m-%d')\n"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "# read CSV\n",
87 | "* https://data.go.th/dataset/mrta-crmk\n"
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "execution_count": 3,
93 | "metadata": {},
94 | "outputs": [
95 | {
96 | "data": {
97 | "text/html": [
98 | "\n",
99 | "\n",
112 | "
\n",
113 | " \n",
114 | " \n",
115 | " | \n",
116 | " เดือน | \n",
117 | " ปี | \n",
118 | " โครงการ | \n",
119 | " จำนวนผู้โดยสารรวม | \n",
120 | " จำนวนผู้โดยสารเฉลี่ยรายวัน | \n",
121 | " จำนวนผู้โดยสารเฉลี่ยรายวันธรรมดา | \n",
122 | " จำนวนผู้โดยสารเฉลี่ยรายวันหยุด | \n",
123 | "
\n",
124 | " \n",
125 | " \n",
126 | " \n",
127 | " | 0 | \n",
128 | " มกราคม | \n",
129 | " 2557 | \n",
130 | " โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล | \n",
131 | " 8,613,608 | \n",
132 | " 277,858 | \n",
133 | " 316,029 | \n",
134 | " 184,553 | \n",
135 | "
\n",
136 | " \n",
137 | " | 1 | \n",
138 | " กุมภาพันธ์ | \n",
139 | " 2557 | \n",
140 | " โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล | \n",
141 | " 8,212,613 | \n",
142 | " 293,308 | \n",
143 | " 329,204 | \n",
144 | " 217,526 | \n",
145 | "
\n",
146 | " \n",
147 | " | 2 | \n",
148 | " มีนาคม | \n",
149 | " 2557 | \n",
150 | " โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล | \n",
151 | " 8,123,428 | \n",
152 | " 262,046 | \n",
153 | " 287,668 | \n",
154 | " 208,239 | \n",
155 | "
\n",
156 | " \n",
157 | " | 3 | \n",
158 | " เมษายน | \n",
159 | " 2557 | \n",
160 | " โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล | \n",
161 | " 6,990,396 | \n",
162 | " 233,013 | \n",
163 | " 285,863 | \n",
164 | " 153,738 | \n",
165 | "
\n",
166 | " \n",
167 | " | 4 | \n",
168 | " พฤษภาคม | \n",
169 | " 2557 | \n",
170 | " โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล | \n",
171 | " 7,265,594 | \n",
172 | " 234,374 | \n",
173 | " 276,809 | \n",
174 | " 175,618 | \n",
175 | "
\n",
176 | " \n",
177 | " | ... | \n",
178 | " ... | \n",
179 | " ... | \n",
180 | " ... | \n",
181 | " ... | \n",
182 | " ... | \n",
183 | " ... | \n",
184 | " ... | \n",
185 | "
\n",
186 | " \n",
187 | " | 157 | \n",
188 | " มกราคม | \n",
189 | " 2565 | \n",
190 | " โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม | \n",
191 | " 949885 | \n",
192 | " 30641 | \n",
193 | " 36093 | \n",
194 | " 20729 | \n",
195 | "
\n",
196 | " \n",
197 | " | 158 | \n",
198 | " กุมภาพันธ์ | \n",
199 | " 2565 | \n",
200 | " โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม | \n",
201 | " 980551 | \n",
202 | " 35020 | \n",
203 | " 40477 | \n",
204 | " 23499 | \n",
205 | "
\n",
206 | " \n",
207 | " | 159 | \n",
208 | " มีนาคม | \n",
209 | " 2565 | \n",
210 | " โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม | \n",
211 | " 949885 | \n",
212 | " 32684 | \n",
213 | " 36292 | \n",
214 | " 22313 | \n",
215 | "
\n",
216 | " \n",
217 | " | 160 | \n",
218 | " เมษายน | \n",
219 | " 2565 | \n",
220 | " โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม | \n",
221 | " 903,899 | \n",
222 | " 30,130 | \n",
223 | " 37,815 | \n",
224 | " 20,080 | \n",
225 | "
\n",
226 | " \n",
227 | " | 161 | \n",
228 | " พฤษภาคม | \n",
229 | " 2565 | \n",
230 | " โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม | \n",
231 | " 1198303 | \n",
232 | " 38655 | \n",
233 | " 47597 | \n",
234 | " 26274 | \n",
235 | "
\n",
236 | " \n",
237 | "
\n",
238 | "
162 rows × 7 columns
\n",
239 | "
"
240 | ],
241 | "text/plain": [
242 | " เดือน ปี โครงการ จำนวนผู้โดยสารรวม \\\n",
243 | "0 มกราคม 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล 8,613,608 \n",
244 | "1 กุมภาพันธ์ 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล 8,212,613 \n",
245 | "2 มีนาคม 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล 8,123,428 \n",
246 | "3 เมษายน 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล 6,990,396 \n",
247 | "4 พฤษภาคม 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล 7,265,594 \n",
248 | ".. ... ... ... ... \n",
249 | "157 มกราคม 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม 949885 \n",
250 | "158 กุมภาพันธ์ 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม 980551 \n",
251 | "159 มีนาคม 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม 949885 \n",
252 | "160 เมษายน 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม 903,899 \n",
253 | "161 พฤษภาคม 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม 1198303 \n",
254 | "\n",
255 | " จำนวนผู้โดยสารเฉลี่ยรายวัน จำนวนผู้โดยสารเฉลี่ยรายวันธรรมดา \\\n",
256 | "0 277,858 316,029 \n",
257 | "1 293,308 329,204 \n",
258 | "2 262,046 287,668 \n",
259 | "3 233,013 285,863 \n",
260 | "4 234,374 276,809 \n",
261 | ".. ... ... \n",
262 | "157 30641 36093 \n",
263 | "158 35020 40477 \n",
264 | "159 32684 36292 \n",
265 | "160 30,130 37,815 \n",
266 | "161 38655 47597 \n",
267 | "\n",
268 | " จำนวนผู้โดยสารเฉลี่ยรายวันหยุด \n",
269 | "0 184,553 \n",
270 | "1 217,526 \n",
271 | "2 208,239 \n",
272 | "3 153,738 \n",
273 | "4 175,618 \n",
274 | ".. ... \n",
275 | "157 20729 \n",
276 | "158 23499 \n",
277 | "159 22313 \n",
278 | "160 20,080 \n",
279 | "161 26274 \n",
280 | "\n",
281 | "[162 rows x 7 columns]"
282 | ]
283 | },
284 | "execution_count": 3,
285 | "metadata": {},
286 | "output_type": "execute_result"
287 | }
288 | ],
289 | "source": [
290 | "url = \"https://data.go.th/dataset/fd781923-6c64-4cbc-90b8-83ad77c96ecd/resource/a542d7d4-bc27-4c03-81ef-bef0a5213210/download/-..-65.csv\"\n",
291 | "df = pd.read_csv(url)\n",
292 | "df\n"
293 | ]
294 | },
295 | {
296 | "cell_type": "markdown",
297 | "metadata": {},
298 | "source": [
299 | "# read CSV (TIS-620, cp874, Windows-874)\n",
300 | "* https://data.go.th/dataset/bangkok-fir-may-2565"
301 | ]
302 | },
303 | {
304 | "cell_type": "code",
305 | "execution_count": 14,
306 | "metadata": {},
307 | "outputs": [
308 | {
309 | "data": {
310 | "text/html": [
311 | "\n",
312 | "\n",
325 | "
\n",
326 | " \n",
327 | " \n",
328 | " | \n",
329 | " flight_rule | \n",
330 | " เที่ยวบินรายเดือน | \n",
331 | " เที่ยวบินสะสม | \n",
332 | " เฉลี่ยต่อวัน | \n",
333 | " การเปลี่ยนแปลง | \n",
334 | "
\n",
335 | " \n",
336 | " \n",
337 | " \n",
338 | " | 0 | \n",
339 | " VFR | \n",
340 | " 5643 | \n",
341 | " 47737 | \n",
342 | " 225.17 | \n",
343 | " 8.31 | \n",
344 | "
\n",
345 | " \n",
346 | " | 1 | \n",
347 | " IFR | \n",
348 | " 34309 | \n",
349 | " 224719 | \n",
350 | " 1.00 | \n",
351 | " 18.34 | \n",
352 | "
\n",
353 | " \n",
354 | " | 2 | \n",
355 | " TOTAL | \n",
356 | " 39952 | \n",
357 | " 272456 | \n",
358 | " 226.17 | \n",
359 | " 26.65 | \n",
360 | "
\n",
361 | " \n",
362 | "
\n",
363 | "
"
364 | ],
365 | "text/plain": [
366 | " flight_rule เที่ยวบินรายเดือน เที่ยวบินสะสม เฉลี่ยต่อวัน การเปลี่ยนแปลง\n",
367 | "0 VFR 5643 47737 225.17 8.31\n",
368 | "1 IFR 34309 224719 1.00 18.34\n",
369 | "2 TOTAL 39952 272456 226.17 26.65"
370 | ]
371 | },
372 | "execution_count": 14,
373 | "metadata": {},
374 | "output_type": "execute_result"
375 | }
376 | ],
377 | "source": [
378 | "url = \"https://data.go.th/dataset/31170cfe-eaf5-4583-83aa-000e4de39064/resource/0899889b-ae8e-44d7-a2ba-da81bcdb24a9/download/flightrule_05_2022.csv\"\n",
379 | "\n",
380 | "# df = pd.read_csv(url)\n",
381 | "\n",
382 | "# df = pd.read_csv(url, encoding=\"iso8859-11\")\n",
383 | "df = pd.read_csv(url, encoding=\"tis-620\")\n",
384 | "# df = pd.read_csv(url, encoding=\"cp874\")\n",
385 | "# df = pd.read_csv(url, encoding=\"thai\")\n",
386 | "\n",
387 | "df\n"
388 | ]
389 | },
390 | {
391 | "cell_type": "markdown",
392 | "metadata": {},
393 | "source": [
394 | "# read excel from URL\n",
395 | "* https://opendata.data.go.th/dataset/mrta-crmk"
396 | ]
397 | },
398 | {
399 | "cell_type": "code",
400 | "execution_count": 15,
401 | "metadata": {},
402 | "outputs": [
403 | {
404 | "data": {
405 | "text/html": [
406 | "\n",
407 | "\n",
420 | "
\n",
421 | " \n",
422 | " \n",
423 | " | \n",
424 | " เดือน | \n",
425 | " ปี | \n",
426 | " โครงการ | \n",
427 | " จำนวนผู้โดยสารรวม | \n",
428 | " จำนวนผู้โดยสารเฉลี่ยรายวัน | \n",
429 | " จำนวนผู้โดยสารเฉลี่ยรายวันธรรมดา | \n",
430 | " จำนวนผู้โดยสารเฉลี่ยรายวันหยุด | \n",
431 | "
\n",
432 | " \n",
433 | " \n",
434 | " \n",
435 | " | 0 | \n",
436 | " มกราคม | \n",
437 | " 2557 | \n",
438 | " โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล | \n",
439 | " 8613608 | \n",
440 | " 277858 | \n",
441 | " 316029 | \n",
442 | " 184553 | \n",
443 | "
\n",
444 | " \n",
445 | " | 1 | \n",
446 | " กุมภาพันธ์ | \n",
447 | " 2557 | \n",
448 | " โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล | \n",
449 | " 8212613 | \n",
450 | " 293308 | \n",
451 | " 329204 | \n",
452 | " 217526 | \n",
453 | "
\n",
454 | " \n",
455 | " | 2 | \n",
456 | " มีนาคม | \n",
457 | " 2557 | \n",
458 | " โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล | \n",
459 | " 8123428 | \n",
460 | " 262046 | \n",
461 | " 287668 | \n",
462 | " 208239 | \n",
463 | "
\n",
464 | " \n",
465 | " | 3 | \n",
466 | " เมษายน | \n",
467 | " 2557 | \n",
468 | " โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล | \n",
469 | " 6990396 | \n",
470 | " 233013 | \n",
471 | " 285863 | \n",
472 | " 153738 | \n",
473 | "
\n",
474 | " \n",
475 | " | 4 | \n",
476 | " พฤษภาคม | \n",
477 | " 2557 | \n",
478 | " โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล | \n",
479 | " 7265594 | \n",
480 | " 234374 | \n",
481 | " 276809 | \n",
482 | " 175618 | \n",
483 | "
\n",
484 | " \n",
485 | " | ... | \n",
486 | " ... | \n",
487 | " ... | \n",
488 | " ... | \n",
489 | " ... | \n",
490 | " ... | \n",
491 | " ... | \n",
492 | " ... | \n",
493 | "
\n",
494 | " \n",
495 | " | 166 | \n",
496 | " มกราคม | \n",
497 | " 2565 | \n",
498 | " โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม | \n",
499 | " 949885 | \n",
500 | " 30641 | \n",
501 | " 36093 | \n",
502 | " 20729 | \n",
503 | "
\n",
504 | " \n",
505 | " | 167 | \n",
506 | " กุมภาพันธ์ | \n",
507 | " 2565 | \n",
508 | " โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม | \n",
509 | " 980551 | \n",
510 | " 35020 | \n",
511 | " 40477 | \n",
512 | " 23499 | \n",
513 | "
\n",
514 | " \n",
515 | " | 168 | \n",
516 | " มีนาคม | \n",
517 | " 2565 | \n",
518 | " โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม | \n",
519 | " 1013287 | \n",
520 | " 32684 | \n",
521 | " 36292 | \n",
522 | " 22313 | \n",
523 | "
\n",
524 | " \n",
525 | " | 169 | \n",
526 | " เมษายน | \n",
527 | " 2565 | \n",
528 | " โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม | \n",
529 | " 903899 | \n",
530 | " 30130 | \n",
531 | " 37815 | \n",
532 | " 20080 | \n",
533 | "
\n",
534 | " \n",
535 | " | 170 | \n",
536 | " พฤษภาคม | \n",
537 | " 2565 | \n",
538 | " โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม | \n",
539 | " 1198303 | \n",
540 | " 38655 | \n",
541 | " 47597 | \n",
542 | " 26274 | \n",
543 | "
\n",
544 | " \n",
545 | "
\n",
546 | "
171 rows × 7 columns
\n",
547 | "
"
548 | ],
549 | "text/plain": [
550 | " เดือน ปี โครงการ \\\n",
551 | "0 มกราคม 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล \n",
552 | "1 กุมภาพันธ์ 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล \n",
553 | "2 มีนาคม 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล \n",
554 | "3 เมษายน 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล \n",
555 | "4 พฤษภาคม 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล \n",
556 | ".. ... ... ... \n",
557 | "166 มกราคม 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม \n",
558 | "167 กุมภาพันธ์ 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม \n",
559 | "168 มีนาคม 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม \n",
560 | "169 เมษายน 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม \n",
561 | "170 พฤษภาคม 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม \n",
562 | "\n",
563 | " จำนวนผู้โดยสารรวม จำนวนผู้โดยสารเฉลี่ยรายวัน \\\n",
564 | "0 8613608 277858 \n",
565 | "1 8212613 293308 \n",
566 | "2 8123428 262046 \n",
567 | "3 6990396 233013 \n",
568 | "4 7265594 234374 \n",
569 | ".. ... ... \n",
570 | "166 949885 30641 \n",
571 | "167 980551 35020 \n",
572 | "168 1013287 32684 \n",
573 | "169 903899 30130 \n",
574 | "170 1198303 38655 \n",
575 | "\n",
576 | " จำนวนผู้โดยสารเฉลี่ยรายวันธรรมดา จำนวนผู้โดยสารเฉลี่ยรายวันหยุด \n",
577 | "0 316029 184553 \n",
578 | "1 329204 217526 \n",
579 | "2 287668 208239 \n",
580 | "3 285863 153738 \n",
581 | "4 276809 175618 \n",
582 | ".. ... ... \n",
583 | "166 36093 20729 \n",
584 | "167 40477 23499 \n",
585 | "168 36292 22313 \n",
586 | "169 37815 20080 \n",
587 | "170 47597 26274 \n",
588 | "\n",
589 | "[171 rows x 7 columns]"
590 | ]
591 | },
592 | "execution_count": 15,
593 | "metadata": {},
594 | "output_type": "execute_result"
595 | }
596 | ],
597 | "source": [
598 | "url = \"https://data.go.th/dataset/fd781923-6c64-4cbc-90b8-83ad77c96ecd/resource/8364f303-8f2f-4693-aa0c-2c96dc7cb075/download/-..-65.xlsx\"\n",
599 | "df = pd.read_excel(url)\n",
600 | "df\n"
601 | ]
602 | },
603 | {
604 | "cell_type": "code",
605 | "execution_count": 16,
606 | "metadata": {},
607 | "outputs": [],
608 | "source": [
609 | "df.to_csv('mrt.csv', index=False)\n",
610 | "df.to_excel('mrt.xlsx')"
611 | ]
612 | },
613 | {
614 | "cell_type": "markdown",
615 | "metadata": {},
616 | "source": [
617 | "## Data API\n"
618 | ]
619 | },
620 | {
621 | "cell_type": "code",
622 | "execution_count": null,
623 | "metadata": {},
624 | "outputs": [],
625 | "source": [
626 | "from getpass import getpass\n",
627 | "api_key = getpass()"
628 | ]
629 | },
630 | {
631 | "cell_type": "markdown",
632 | "metadata": {},
633 | "source": [
634 | "- ข้อมูลโรงแรมที่เป็นมิตรกับสิ่งแวดล้อม (Green Hotel)\n",
635 | " * https://opendata.data.go.th/dataset/db0103-002\n",
636 | "- จำนวนคนทำงานในที่พักแรม\n",
637 | " * https://opendata.data.go.th/dataset/os_17_00010"
638 | ]
639 | },
640 | {
641 | "cell_type": "code",
642 | "execution_count": null,
643 | "metadata": {},
644 | "outputs": [],
645 | "source": [
646 | "# Request headers\n",
647 | "headers = {\n",
648 | " \"api-key\": api_key,\n",
649 | "}\n",
650 | "\n",
651 | "params = {\"resource_id\": \"6c865fd0-93e4-4642-8e57-7063b3162896\", \"limit\": 10}\n",
652 | "r = requests.get(\n",
653 | " \"https://opend.data.go.th/get-ckan/datastore_search\", params, headers=headers\n",
654 | ")\n",
655 | "if r.ok:\n",
656 | " j = r.json()\n",
657 | " records = j[\"result\"][\"records\"]\n",
658 | "df = pd.DataFrame(records)\n",
659 | "df\n"
660 | ]
661 | },
662 | {
663 | "cell_type": "markdown",
664 | "metadata": {},
665 | "source": [
666 | "# functions\n"
667 | ]
668 | },
669 | {
670 | "cell_type": "code",
671 | "execution_count": null,
672 | "metadata": {},
673 | "outputs": [],
674 | "source": [
675 | "import requests\n",
676 | "import pandas as pd\n",
677 | "\n",
678 | "def gov_open_data(api_key, resource_id: str, nrows=10000):\n",
679 | " \"\"\"\n",
680 | " >>> gov_open_data(\"your api-key\", \"ce7f4a78-71db-4754-9084-edca971903bd\", nrows=3)\n",
681 | " \"\"\"\n",
682 | " # Request headers\n",
683 | " headers = {\n",
684 | " \"api-key\": api_key,\n",
685 | " }\n",
686 | "\n",
687 | " params = {\"resource_id\": resource_id, \"limit\": nrows}\n",
688 | " r = requests.get(\n",
689 | " \"https://opend.data.go.th/get-ckan/datastore_search\", params, headers=headers\n",
690 | " )\n",
691 | "\n",
692 | " j = r.json()\n",
693 | " records = j[\"result\"][\"records\"]\n",
694 | " return pd.DataFrame(records)\n",
695 | "\n"
696 | ]
697 | },
698 | {
699 | "cell_type": "code",
700 | "execution_count": null,
701 | "metadata": {},
702 | "outputs": [],
703 | "source": [
704 | "# ข้อมูลพิกัด LAT/LONG ที่ตั้งตำบล\n",
705 | "# https://data.go.th/dataset/item_c6d42e1b-3219-47e1-b6b7-dfe914f27910\n",
706 | "gov_open_data(api_key, \"48039a2a-2f01-448c-b2a2-bb0d541dedcd\", nrows=3)\n"
707 | ]
708 | },
709 | {
710 | "cell_type": "code",
711 | "execution_count": null,
712 | "metadata": {},
713 | "outputs": [],
714 | "source": []
715 | }
716 | ],
717 | "metadata": {
718 | "kernelspec": {
719 | "display_name": "Python 3.9.12 ('base')",
720 | "language": "python",
721 | "name": "python3"
722 | },
723 | "language_info": {
724 | "codemirror_mode": {
725 | "name": "ipython",
726 | "version": 3
727 | },
728 | "file_extension": ".py",
729 | "mimetype": "text/x-python",
730 | "name": "python",
731 | "nbconvert_exporter": "python",
732 | "pygments_lexer": "ipython3",
733 | "version": "3.9.12"
734 | },
735 | "vscode": {
736 | "interpreter": {
737 | "hash": "629cb9f199f624aadf5cdcebfeb0fdc9652b7a7e432d484384b677cab7914fce"
738 | }
739 | },
740 | "widgets": {
741 | "application/vnd.jupyter.widget-state+json": {
742 | "state": {},
743 | "version_major": 2,
744 | "version_minor": 0
745 | }
746 | }
747 | },
748 | "nbformat": 4,
749 | "nbformat_minor": 4
750 | }
751 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pandas for Data Science
2 | | YouTube | Title |
3 | |:------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|
4 | |
| สอน Python สำหรับ Data science: การติดตั้ง Anaconda Python และ Jupyter Notebook บน Windows |
5 | |
| สอน Jupyter notebook: รู้จัก Jupyter notebook |
6 | |
| สอน Jupyter notebook: แนะนำหลักการและชุดคำสั่งพื้นฐาน |
7 | |
| สอน Jupyter notebook: การใช้คำสั่งเบื้องต้น ตอนที่ 1 |
8 | |
| สอน Jupyter notebook: การใช้คำสั่งเบื้องต้น ตอนที่ 2 |
9 | |
| สอน Jupyter notebook: การใช้ markdown ตอนที่ 1 |
10 | |
| สอน Jupyter notebook: การใช้ markdown ตอนที่ 2 |
11 | |
| สอน Jupyter notebook: วิธีการเขียน Markdown เพื่อสร้างสารบัญให้กับ Jupyter Notebook |
12 | |
| เทคนิคการใช้ IPython.display เพื่อเปิดไฟล์เสียง แสดงรูป JSON และ YouTube ใน Jupyter Notebook |
13 | |
| สอน Jupyter notebook: การแสดงหน้าเว็บ รูปภาพและ YouTube ใน notebook |
14 | |
| สอน Jupyter notebook: เทคนิคการใช้ autocomplete และ help เพื่อเขียนโค้ด |
15 | |
| สอน Jupyter notebook: เทคนิคการ start Jupyter notebook บน Windows |
16 | |
| สอน Jupyter notebook: การ run Jupyter notebook ใน browser ที่ต้องการ |
17 | |
| สอน Jupyter Notebook: การ run notebook cell ที่มี Python Prompt (เครื่องหมายมากกว่า 3 ตัว) |
18 | |
| สอน Jupyter notebook: การ copy cells ข้าม notebooks |
19 | |
| สอน Jupyter notebook: การเก็บและแชร์ notebooks บน github |
20 | |
| สอน Jupyter Notebook: ลองใช้ Jupyter notebook บน Cloud ด้วย Google Colaboratory (Colab) |
21 | |
| การใช้ pip เพื่อจัดการ Python packages ใน Google Colaboratory |
22 | |
| เทคนิคการใช้ Jupyter notebook บน Visual Studio Code |
23 | |
| สอน Python สำหรับ Data science: รู้จักกับ pandas |
24 | |
| สอน Python สำหรับ Data science: pandas.Series |
25 | |
| สอน Python สำหรับ Data science: รู้จักกับ pandas DataFrame |
26 | |
| สอน Python สำหรับ Data science: การสำรวจข้อมูลเบื้องต้นด้วย pandas |
27 | |
| สอน Python สำหรับ Data science: การแสดงแถวข้อมูลใน pandas โดยใช้ head, tail และ sample |
28 | |
| สอน Python สำหรับ Data science: ประเภทข้อมูล (data types) ใน pandas |
29 | |
| การกำหนด max_rows, max_columns, float_format เพื่อแสดงข้อมูลใน pandas DataFrame |
30 | |
| การสร้างกราฟด้วย pandas: การสร้างฮีสโตแกรม (histogram) และ density plot |
31 | |
| การสร้างกราฟด้วย pandas: การสร้าง boxplot |
32 | |
| การสร้างกราฟด้วย pandas: การสร้าง boxplot หลายรูปพร้อมกัน |
33 | |
| การสร้างกราฟด้วย pandas: การสร้างกราฟ XY หรือ Scatter Plot |
34 | |
| การสร้างกราฟด้วย pandas: การสร้างกราฟแท่ง (bar graph) |
35 | |
| การสร้างกราฟด้วย pandas: การสร้างกราฟ stacked bar |
36 | |
| การสร้างกราฟด้วย pandas: กราฟเส้น (line graph) |
37 | |
| การสร้างกราฟด้วย pandas: การสร้างกราฟเส้นแสดงการเปลี่ยนแปลงเป็น % เปรียบเทียบ |
38 | |
| การสร้างกราฟด้วย pandas: การสร้างกราฟเส้นหลายเส้นและหลายรูปพร้อม ๆ กัน |
39 | |
| การสร้างกราฟด้วย pandas: การสร้างกราฟที่มีตัวอักษรไทยด้วย pandas |
40 | |
| การสร้างกราฟด้วย pandas: การกำหนด style ให้กับกราฟ |
41 | |
| สอน pandas: การสร้าง correlation matrix พร้อมแสดงผลแบบ heatmap |
42 | |
| สอน Python สำหรับ Data science: การอ่านไฟล์ CSV, TSV, FWF, Excel, Zip มาสร้างเป็น pandas DataFrame |
43 | |
| สอน Python สำหรับ Data science: การอ่านไฟล์ข้อมูลจาก local, web, dropbox, github ด้วย pandas |
44 | |
| สอน pandas: การอ่านไฟล์ที่แชร์จาก OneDrive มาสร้างเป็น DataFrame |
45 | |
| สอน Python สำหรับ Data science: การอ่านไฟล์ที่ไม่มี header row |
46 | |
| สอน Python สำหรับ Data science: การสร้าง pandas DataFrame จาก Clipboard |
47 | |
| สอน Python สำหรับ Data science: การอ่านไฟล์ที่มีข้อมูลวันเวลา (date/time data) ด้วย pandas |
48 | |
| สอน Python สำหรับ Data science: การอ่านและรวมไฟล์ CSV หลาย ๆ ไฟล์ |
49 | |
| สอน Python สำหรับ Data science: การ save pandas DataFrame เป็นไฟล์ CSV |
50 | |
| สอน Python สำหรับ Data science: การอ่านไฟล์ Excel ด้วย pandas |
51 | |
| สอน Python สำหรับ Data science: การอ่านชีทหลาย ๆ ชีทจาก Excel มาสร้างเป็น DataFrame (ตอนที่ 1) |
52 | |
| สอน Python สำหรับ Data science: การอ่านชีทหลาย ๆ ชีทจาก Excel มาสร้างเป็น DataFrame (ตอนที่ 2) |
53 | |
| สอน Python สำหรับ Data science: การ save หลาย ๆ pandas DataFrame ให้เป็นไฟล์ Excel |
54 | |
| สอน Python สำหรับ Data science: การอ่านข้อมูลจาก google sheets มาสร้างเป็น pandas DataFrame |
55 | |
| สอน pandas: การอ่านไฟล์ csv, tsv, json, excel ที่แชร์บน Google Drive |
56 | |
| สอน pandas: การอ่านและแปลงข้อมูลจาก Google Form เช่น แปลงข้อความ "พอใจมาก" ให้เป็นเลข 4 |
57 | |
| การดึงข้อมูลตัวชี้วัดจาก World Bank ด้วย pandas |
58 | |
| สอน Python สำหรับ Data science: การอ่านข้อมูลจาก web ที่อยู่ในรูปตาราง html ด้วย read_html |
59 | |
| สอน Python สำหรับ Data science: การอ่านไฟล์ JSON มาสร้างเป็น pandas dataframe |
60 | |
| การใช้ GET requests ในการดึงข้อมูล text file (CSV, JSON) และรูปภาพจาก web URL |
61 | |
| สอน Python สำหรับ data science: การใช้ Pandas ดึงข้อมูลจาก Bank Of Thailand API (REST API) |
62 | |
| การอ่าน R datasets มาเป็น pandas DataFrame |
63 | |
| สอน pandas: การอ่านและเขียนไฟล์ SPSS (read and write SPSS sav and zsav file) |
64 | |
| สอน Python สำหรับ Data science: การอ่านข้อมูลจาก sqlite3 มาเป็น pandas DataFrame |
65 | |
| สอน Python สำหรับ Data science: การแปลง pandas DataFrame ให้เป็นตารางใน sqlite3 |
66 | |
| สอน Python สำหรับ data science: การดึงข้อมูลจาก Microsoft SQL Server มาสร้างเป็น pandas DataFrame |
67 | |
| สอน pandas: save DataFrame ไปเป็นตารางใน MS SQL Server |
68 | |
| สอน pandas: การเชื่อมต่อกับฐานข้อมูล PostgreSQL |
69 | |
| สอน Python สำหรับ data science: การดึงข้อมูลจาก MySQL มาสร้างเป็น pandas DataFrame |
70 | |
| สอน pandas: save DataFrame ไปเป็นตารางใน MySQL, PostgreSQL และ SQLite |
71 | |
| สอน Python สำหรับ Data science: การใช้งาน pandas index เบื้องต้น |
72 | |
| สอน Python สำหรับ Data science: การเลือกคอลัมน์ใน pandas DataFrame ตอนที่ 1 |
73 | |
| สอน Python สำหรับ Data science: การเลือกคอลัมน์ใน pandas DataFrame ตอนที่ 2 |
74 | |
| สอน Python สำหรับ Data science: การเลือกแถวข้อมูลตามเงื่อนไข |
75 | |
| สอน Python สำหรับ Data science: การลบแถวและคอลัมน์ใน DataFrame |
76 | |
| สอน Python สำหรับ Data science: การเปลี่ยนชื่อคอลัมน์ใน pandas DataFrame |
77 | |
| สอน Python สำหรับ Data science: การเรียงลำดับข้อมูลใน pandas ด้วย sort_values และ sort_index |
78 | |
| สอน Python สำหรับ Data science: การใช้ loc เพื่อเลือกแถวและคอลัมน์ |
79 | |
| สอน Python สำหรับ Data science: การใช้ loc ร่วมกับ regex เพื่อเลือกชื่อคอลัมน์ |
80 | |
| สอน Python สำหรับ Data science: การใช้ iloc เพื่อเลือกแถวและคอลัมน์ใน pandas DataFrame |
81 | |
| สอน Python สำหรับ Data science: การใช้ at และ iat ในการเข้าถึงและกำหนดค่าแบบ scalar ใน DataFrame |
82 | |
| สอน Python สำหรับ Data science: การสร้าง two-level index ให้ pandas DataFrame |
83 | |
| สอน Python สำหรับ Data science: การสร้าง three-level index ให้ pandas DataFrame |
84 | |
| สอน Python สำหรับ Data science: การใช้ pandas IndexSlice เพื่อดึงข้อมูล multi-level index ตอนที่ 1 |
85 | |
| สอน Python สำหรับ Data science: การใช้ pandas IndexSlice เพื่อดึงข้อมูล multi-level index ตอนที่ 2 |
86 | |
| การดึงราคาหุ้นในตลาดหุ้นไทยและต่างประเทศด้วย pandas (get daily stock price) |
87 | |
| สอน pandas: เข้าใจการทำงานของ multilevel column names เช่น ข้อมูลราคาหุ้นรายวัน |
88 | |
| สอน Python สำหรับ Data science: การทำงานกับข้อมูลวันและเวลาใน pandas |
89 | |
| สอน Python สำหรับ Data science: การจัดการข้อมูลแบบ TimeSeries (DatetimeIndex) |
90 | |
| สอน Python สำหรับ Data science: การใช้ groupby เพื่อจัดกลุ่มข้อมูลใน pandas DataFrame |
91 | |
| สอน Python สำหรับ Data science: การใช้ Series.str.replace เพื่อ clean ข้อมูล |
92 | |
| สอน Python สำหรับ Data science: การปรับแต่งข้อมูลในคอลัมน์ เช่น ลบส่วนที่ไม่ใช่เลข 0-9 ออกจากข้อมูล |
93 | |
| สอน pandas: การแปลงตัวเลขที่มีสัญลักษณ์สกุลเงินและหน่วย (clean currency text) เช่น $70K เป็น 70000 |
94 | |
| สอน Python สำหรับ Data science: การใช้ pandas drop_duplicates เพื่อลบแถวซ้ำ |
95 | |
| สอน Python สำหรับ Data science: การจัดการกับ missing values ตอนที่ 1 |
96 | |
| สอน Python สำหรับ Data science: การจัดการกับ missing values ตอนที่ 2 |
97 | |
| สอน Python สำหรับ Data science: การแทนที่ missing values |
98 | |
| สอน Python สำหรับ Data science: ตัวแปรแบบ category เบื้องต้น |
99 | |
| สอน Python สำหรับ Data science: การสร้าง ordered category ใน pandas DataFrame |
100 | |
| สอน Python สำหรับ Data science: การใช้ cut เพื่อแบ่งข้อมูลออกเป็น category |
101 | |
| สอน Python สำหรับ Data science: การรวมหลาย ๆ DataFrame เข้าด้วยกันด้วย pandas.concat |
102 | |
| สอน Python สำหรับ Data science: การใช้ aggregate function ใน Series และ DataFrame |
103 | |
| สอน pandas: เข้าใจการทำงาน axis=0 (by index) และ axis=1 (by columns) |
104 | |
| สอน Python สำหรับ Data science: การสร้างตารางไขว้ (crosstab) ด้วย pandas |
105 | |
| สอน Python สำหรับ Data science: การสร้าง pivot table ด้วย pandas เบื้องต้น |
106 | |
| สอน Python สำหรับ Data science: การ melt/unpivot pandas DataFrame |
107 | |
| สอน Python สำหรับ Data science: การสร้าง pandas pivot table แสดงความถี่ และการแสดง missing values |
108 | |
| สอน Python สำหรับ Data science: การใช้ aggregate function ใน pivot table ของ pandas |
109 | |
| สอน Python สำหรับ Data science: การใช้ diff() และ pct_change() เพื่อหาผลต่างระหว่างสองแถว |
110 | |
| สอน Python สำหรับ Data science: การใช้ diff() และ pct_change() ร่วมกับ groupby ใน pandas DataFrame |
111 | |
| สอน Python สำหรับ Data science: การหาค่าแรกและค่าสุดท้ายของแต่ละ group ใน pandas DataFrame |
112 | |
| สอน pandas: การคำนวณราคาหุ้นเทียบกับวันที่ใช้เป็นฐานด้วย groupby().first() |
113 | |
| สอน pandas การหาแถวที่มีค่าสูงสุดและต่ำสุดด้วย nlargest, nsmallest |
114 | |
| สอน Python สำหรับ Data science: สร้างคอลัมน์แสดงลำดับ (rank) ด้วย pandas |
115 | |
| สอน Python สำหรับ Data science: การ standardize data (z-score) ด้วย pandas |
116 | |
| การสร้าง dummy variable และใช้งานกับ Linear Regression ของ scikit-learn และ statsmodels |
117 | |
| สอน Python สำหรับ Data science: การ merge DataFrames แบบ inner join |
118 | |
| สอน Python สำหรับ Data science: การ merge DataFrames แบบ left join |
119 | |
| สอน Python สำหรับ Data science: รู้จักกับ lambda function |
120 | |
| สอน Python สำหรับ Data science: การใช้ lambda function กับ pandas Series และ DataFrame |
121 | |
| สอน Python สำหรับ Data science: การใช้ map ใน pandas |
122 | |
| สอน Python สำหรับ Data science: การใช้ applymap ใน pandas |
123 | |
| สอน Python สำหรับ Data science: การใช้ apply ใน pandas |
124 | |
| สอน pandas: จัดการข้อมูลแยกตามกลุ่มด้วยหลักการ Split-Apply-Combine |
125 | |
| สอน Python สำหรับ Data science: การใช้ query ในการกรองข้อมูลใน pandas DataFrame |
126 | |
| สอน Python สำหรับ Data science: การใช้ any() และ all() ในการตรวจสอบ True/False |
127 | |
| สอน Python สำหรับ Data science: การสลับแถว (shuffle) ใน pandas DataFrame |
128 | |
| สอน Python สำหรับ Data science: การสุ่มแถวจากข้อมูลที่แบ่งออกเป็นกลุ่ม ๆ |
129 | |
| สอน Python สำหรับ Data science: การสุ่มแถวจากข้อมูลที่แบ่งออกเป็นกลุ่ม ๆ โดยระบุจำนวนที่ต้องการ |
130 | |
| สอน Python สำหรับ Data science: การแบ่ง DataFrame ออกเป็น training และ test datasets (ตอนที่ 1) |
131 | |
| สอน Python สำหรับ Data science: การแบ่ง DataFrame ออกเป็น training และ test datasets (ตอนที่ 2) |
132 | |
| สอน pandas: การเข้าถึงแต่ละแถวใน DataFrame (iterate rows in dataframe) |
133 | |
| สอน Python สำหรับ Data science: กำหนดจำนวนแถวที่จะแสดงผลและรูปแบบการแสดงตัวเลข |
134 | |
| สอน Python สำหรับ Data science: การปรับแต่ง style ในการแสดงข้อมูล DataFrame เบื้องต้น |
135 | |
| สอน Python สำหรับ Data science: การปรับแต่ง style ในการแสดงข้อมูล DataFrame ด้วย custom function |
136 | |
| สอน Python สำหรับ Data science: การแสดงรูปและ hyperlinkใน pandas DataFrame |
137 | |
| สอนไพธอน Python 3: การดาวน์โหลดรูปภาพจาก URL เพื่อแสดงผลบน jupyter notebook |
138 | |
| สอนทำ web scraping ด้วย Python: การ scrape รูปภาพจากหน้าเว็บด้วย BeautifulSoup |
139 | |
| สอนทำ web scraping ด้วย Python: การ scrape url link และ img เพื่อสร้างเป็น DataFrame ตอนที่ 1 |
140 | |
| สอนทำ web scraping ด้วย Python: การ scrape url link และ img เพื่อสร้างเป็น DataFrame ตอนที่ 2 |
141 | |
| สอน web scraping: การ scape นักเตะทีม Liverpool มาเก็บใน pandas.DataFrame |
142 | |
| สอนการ download รูปภาพจาก url ที่เก็บใน pandas DataFrame |
143 | |
| สอน web scraping ในการดึงข้อความและรูปภาพเพื่อแสดงบน Notebook และบันทึกเป็นไฟล์ |
144 | |
| สอน web scraping ในการดึงข้อความและรูปภาพมาเก็บในฐานข้อมูล MySQL |
145 | |
| สอน pandas: explode (unnest) multivalue column เช่น คอลัมน์ที่เก็บค่าแบบ list |
146 | |
| สอน pandas: ทำความสะอาดข้อมูล JSON |
147 | |
| สอน pandas: ดึงข้อมูล JSON จาก public RESTful APIs เช่น อัตราแลกเปลี่ยน พยากรณ์อากาศ Pokemon |
148 | |
| สอน pandas: รู้จักกับ JSON string format แบบต่าง ๆ ที่ pandas รองรับ |
149 | |
| สอน pandas: การจัดการกับคอลัมน์ใน DataFrame ที่เก็บค่าแบบ dict และ list |
150 | |
| สอน pandas: การดึงข้อมูลดัชนีตลาดหุ้นทั่วโลกแบบรายวัน (get daily stock market index) |
151 | |
| สอน pandas: การอ่านไฟล์ SAS (xport, sas7bdat) |
152 | |
| สอน pandas: การ stack และ unstack DataFrame ที่มี MultiIndex |
153 | |
| สอน pandas: การเชื่อมต่อกับ Azure SQL Database |
154 | |
| สอน pandas: การทำงานกับคอลัมน์ที่มีชื่อเป็นตัวเลข (numeric column name) |
155 | |
| สอน pandas: การจัดรูปแบบการแสดงผลตัวเลช เช่น จำนวนจุดทศนิยม เปอร์เซ็นต์ ใส่ , คั่นทุก 3 หลัก |
156 | |
| สอน pandas: การเลือก row/column label ด้วย filter เช่น เลือกชื่อคอลัมน์ที่มีคำว่า math |
157 | |
| สอน pandas: การรวมชื่อคอลัมน์แบบ MultiIndex เข้าด้วยกัน |
158 | |
| สอน pandas: การดึงอัตราแลกเปลี่ยนเงินตราต่างประเทศจาก RESTful api |
159 | |
| สอน pandas: การหาค่าเฉลี่ยดัชนีตลาดหุ้นแยกตามสัปดาห์ เดือน ไตรมาส และปี |
160 | |
| สอนไพธอน Python: การใช้ tqdm เพื่อสร้าง progress meter |
161 | |
| สอนไพธอน Python: การใช้ io.StringIO เพื่อทำ memory file (อ่าน string ให้เหมือนอ่านไฟล์) |
162 | |
| สอน pandas: การใช้คำสั่ง SQL เพื่อดึงข้อมูลใน DataFrame ด้วย pandasql package |
163 | |
| สอน pandas: การ merge ข้อมูลราคาน้ำมันและทองคำรายเดือน (merge oil and gold prices) |
164 | |
| สอน pandas: การ merge ราคาน้ำมันและดัชนีตลาดหุ้นรายวันเข้าด้วยกัน (merge oil prices and stock index) |
165 | |
| สอน pandas: การอ่านและรวมหลาย ๆ ชีทจาก Google Sheets มาสร้างเป็น DataFrame |
166 | |
| สอน pandas: เทคนิคการใช้ regular expression ในการกรองข้อมูลแบบต่าง ๆ |
167 | |
| สอน data science: การดึงข้อความภาษาไทยด้วย regex (extract Thai characters with regular expression) |
168 | |
| สอน pandas: การทำความสะอาดข้อมูลด้วย regular expression (str.extract และ str.split) |
169 | |
| สอน pandas: การสร้าง dummy/one-hot จาก multivalued column |
170 | |
| การแปลง pandas DataFrame ให้เป็น Sparse เพื่อทำ Market Basket Analysis |
171 | |
| สอน Jupyter notebook: แนะนำ Azure Notebook เพื่อใช้ Jupyter Notebook บน Cloud |
172 | |
| สอน data science: ทำความสะอาดข้อมูลแบบ multilevel ด้วย pandas (clean multilevel table) |
173 | |
| สอน pandas: แปลง DataFrame เป็น HTML, Markdown และ Excel (DataFrame to HTML, Markdown, Excel table) |
174 | |
| สอน pandas: การทำงานกับคอลัมน์ที่เก็บค่าเป็น dictionary |
175 | |
| สอน data science: preprocess ข้อมูลที่ได้จาก Google Form |
176 |
--------------------------------------------------------------------------------
/pandas_transform_google_form_data2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# read and transform Google Form data (checkboxes, multiple choice grid items)\n",
8 | "* Google Form: https://forms.gle/7a35kfYmzyJJb5GD9\n",
9 | "* Google Sheets: https://docs.google.com/spreadsheets/d/1W0EaP8WGWaOK8XYDQA3Z4HIZaOQXXYj6U6kzqMuVrUI/edit#gid=1695829581"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "---\n",
17 | "* author: [Prasert Kanawattanachai](prasert.k@chula.ac.th)\n",
18 | "* YouTube: https://www.youtube.com/prasertcbs\n",
19 | "* github: https://github.com/prasertcbs/\n",
20 | "* [Chulalongkorn Business School](https://www.cbs.chula.ac.th/en/)\n",
21 | "---"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 1,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "import pandas as pd\n",
31 | "import numpy as np\n",
32 | "import re"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 2,
38 | "metadata": {},
39 | "outputs": [
40 | {
41 | "name": "stdout",
42 | "output_type": "stream",
43 | "text": [
44 | "pandas version: 1.2.1\n",
45 | "numpy version: 1.19.2\n",
46 | "2021-02-20 09:20:15.132975\n"
47 | ]
48 | }
49 | ],
50 | "source": [
51 | "print(f'pandas version: {pd.__version__}')\n",
52 | "print(f'numpy version: {np.__version__}')\n",
53 | "print(pd.Timestamp.now())"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": 54,
59 | "metadata": {
60 | "scrolled": true
61 | },
62 | "outputs": [
63 | {
64 | "data": {
65 | "text/html": [
66 | "\n",
67 | "\n",
80 | "
\n",
81 | " \n",
82 | " \n",
83 | " | \n",
84 | " Timestamp | \n",
85 | " gender | \n",
86 | " age (years) | \n",
87 | " skills | \n",
88 | " satisfaction [product] | \n",
89 | " satisfaction [price] | \n",
90 | " satisfaction [service] | \n",
91 | " satisfaction [overall] | \n",
92 | "
\n",
93 | " \n",
94 | " \n",
95 | " \n",
96 | " | 0 | \n",
97 | " 2021-02-19 22:36:35.982 | \n",
98 | " Male | \n",
99 | " 24.0 | \n",
100 | " Word, Excel, PowerPoint, Access | \n",
101 | " very dissatisfied | \n",
102 | " dissatisfied | \n",
103 | " neutral | \n",
104 | " satisfied | \n",
105 | "
\n",
106 | " \n",
107 | " | 1 | \n",
108 | " 2021-02-19 22:38:17.913 | \n",
109 | " Male | \n",
110 | " 22.0 | \n",
111 | " Excel, SQL | \n",
112 | " very satisfied | \n",
113 | " satisfied | \n",
114 | " neutral | \n",
115 | " dissatisfied | \n",
116 | "
\n",
117 | " \n",
118 | " | 2 | \n",
119 | " 2021-02-19 22:49:14.802 | \n",
120 | " Prefer not to say | \n",
121 | " 35.0 | \n",
122 | " Word, Excel, PowerPoint, SPSS | \n",
123 | " very dissatisfied | \n",
124 | " dissatisfied | \n",
125 | " neutral | \n",
126 | " dissatisfied | \n",
127 | "
\n",
128 | " \n",
129 | " | 3 | \n",
130 | " 2021-02-19 23:01:24.057 | \n",
131 | " Female | \n",
132 | " 23.0 | \n",
133 | " Word, Excel, PowerPoint, SQL, Python, R, JavaS... | \n",
134 | " very satisfied | \n",
135 | " satisfied | \n",
136 | " neutral | \n",
137 | " dissatisfied | \n",
138 | "
\n",
139 | " \n",
140 | " | 4 | \n",
141 | " 2021-02-20 08:27:19.584 | \n",
142 | " Female | \n",
143 | " 27.0 | \n",
144 | " Word, Excel, PowerPoint | \n",
145 | " neutral | \n",
146 | " satisfied | \n",
147 | " very satisfied | \n",
148 | " very satisfied | \n",
149 | "
\n",
150 | " \n",
151 | " | 5 | \n",
152 | " 2021-02-20 08:27:36.059 | \n",
153 | " Female | \n",
154 | " 19.0 | \n",
155 | " Excel, Python, R | \n",
156 | " satisfied | \n",
157 | " satisfied | \n",
158 | " satisfied | \n",
159 | " satisfied | \n",
160 | "
\n",
161 | " \n",
162 | " | 6 | \n",
163 | " 2021-02-20 08:28:19.725 | \n",
164 | " Prefer not to say | \n",
165 | " 30.0 | \n",
166 | " SQL, R, SPSS, SAS | \n",
167 | " dissatisfied | \n",
168 | " satisfied | \n",
169 | " neutral | \n",
170 | " very satisfied | \n",
171 | "
\n",
172 | " \n",
173 | " | 7 | \n",
174 | " 2021-02-20 09:16:02.448 | \n",
175 | " Prefer not to say | \n",
176 | " 24.0 | \n",
177 | " PowerPoint | \n",
178 | " satisfied | \n",
179 | " very satisfied | \n",
180 | " satisfied | \n",
181 | " very satisfied | \n",
182 | "
\n",
183 | " \n",
184 | " | 8 | \n",
185 | " 2021-02-20 09:22:23.851 | \n",
186 | " Male | \n",
187 | " NaN | \n",
188 | " Word | \n",
189 | " very dissatisfied | \n",
190 | " dissatisfied | \n",
191 | " neutral | \n",
192 | " None | \n",
193 | "
\n",
194 | " \n",
195 | " | 9 | \n",
196 | " 2021-02-20 09:23:22.259 | \n",
197 | " NaN | \n",
198 | " NaN | \n",
199 | " Excel | \n",
200 | " NaN | \n",
201 | " NaN | \n",
202 | " NaN | \n",
203 | " satisfied | \n",
204 | "
\n",
205 | " \n",
206 | "
\n",
207 | "
"
208 | ],
209 | "text/plain": [
210 | " Timestamp gender age (years) \\\n",
211 | "0 2021-02-19 22:36:35.982 Male 24.0 \n",
212 | "1 2021-02-19 22:38:17.913 Male 22.0 \n",
213 | "2 2021-02-19 22:49:14.802 Prefer not to say 35.0 \n",
214 | "3 2021-02-19 23:01:24.057 Female 23.0 \n",
215 | "4 2021-02-20 08:27:19.584 Female 27.0 \n",
216 | "5 2021-02-20 08:27:36.059 Female 19.0 \n",
217 | "6 2021-02-20 08:28:19.725 Prefer not to say 30.0 \n",
218 | "7 2021-02-20 09:16:02.448 Prefer not to say 24.0 \n",
219 | "8 2021-02-20 09:22:23.851 Male NaN \n",
220 | "9 2021-02-20 09:23:22.259 NaN NaN \n",
221 | "\n",
222 | " skills satisfaction [product] \\\n",
223 | "0 Word, Excel, PowerPoint, Access very dissatisfied \n",
224 | "1 Excel, SQL very satisfied \n",
225 | "2 Word, Excel, PowerPoint, SPSS very dissatisfied \n",
226 | "3 Word, Excel, PowerPoint, SQL, Python, R, JavaS... very satisfied \n",
227 | "4 Word, Excel, PowerPoint neutral \n",
228 | "5 Excel, Python, R satisfied \n",
229 | "6 SQL, R, SPSS, SAS dissatisfied \n",
230 | "7 PowerPoint satisfied \n",
231 | "8 Word very dissatisfied \n",
232 | "9 Excel NaN \n",
233 | "\n",
234 | " satisfaction [price] satisfaction [service] satisfaction [overall] \n",
235 | "0 dissatisfied neutral satisfied \n",
236 | "1 satisfied neutral dissatisfied \n",
237 | "2 dissatisfied neutral dissatisfied \n",
238 | "3 satisfied neutral dissatisfied \n",
239 | "4 satisfied very satisfied very satisfied \n",
240 | "5 satisfied satisfied satisfied \n",
241 | "6 satisfied neutral very satisfied \n",
242 | "7 very satisfied satisfied very satisfied \n",
243 | "8 dissatisfied neutral None \n",
244 | "9 NaN NaN satisfied "
245 | ]
246 | },
247 | "execution_count": 54,
248 | "metadata": {},
249 | "output_type": "execute_result"
250 | }
251 | ],
252 | "source": [
253 | "# google sheets generated by google form\n",
254 | "# https://docs.google.com/spreadsheets/d/1W0EaP8WGWaOK8XYDQA3Z4HIZaOQXXYj6U6kzqMuVrUI/edit#gid=1695829581\n",
255 | "# note: replace 'edit#' with 'export?format=xlsx&'\n",
256 | "gs_url='https://docs.google.com/spreadsheets/d/1W0EaP8WGWaOK8XYDQA3Z4HIZaOQXXYj6U6kzqMuVrUI/edit#gid=1695829581'\n",
257 | "url=re.sub('edit#', 'export?format=xlsx&', gs_url)\n",
258 | "# url='https://docs.google.com/spreadsheets/d/1W0EaP8WGWaOK8XYDQA3Z4HIZaOQXXYj6U6kzqMuVrUI/export?format=xlsx&gid=1695829581'\n",
259 | "df = pd.read_excel(url)\n",
260 | "df"
261 | ]
262 | },
263 | {
264 | "cell_type": "code",
265 | "execution_count": 55,
266 | "metadata": {},
267 | "outputs": [
268 | {
269 | "data": {
270 | "text/plain": [
271 | "Index(['Timestamp', 'gender', 'age (years)', 'skills',\n",
272 | " 'satisfaction [product]', 'satisfaction [price]',\n",
273 | " 'satisfaction [service]', 'satisfaction [overall]'],\n",
274 | " dtype='object')"
275 | ]
276 | },
277 | "execution_count": 55,
278 | "metadata": {},
279 | "output_type": "execute_result"
280 | }
281 | ],
282 | "source": [
283 | "df.columns"
284 | ]
285 | },
286 | {
287 | "cell_type": "code",
288 | "execution_count": 56,
289 | "metadata": {},
290 | "outputs": [
291 | {
292 | "data": {
293 | "text/plain": [
294 | "Index(['Timestamp', 'gender', 'age (years)', 'skills', 'product', 'price',\n",
295 | " 'service', 'overall'],\n",
296 | " dtype='object')"
297 | ]
298 | },
299 | "execution_count": 56,
300 | "metadata": {},
301 | "output_type": "execute_result"
302 | }
303 | ],
304 | "source": [
305 | "df.columns.str.replace(r'(satisfaction|\\[|\\])', '', regex=True).str.strip()"
306 | ]
307 | },
308 | {
309 | "cell_type": "code",
310 | "execution_count": 57,
311 | "metadata": {
312 | "scrolled": true
313 | },
314 | "outputs": [
315 | {
316 | "data": {
317 | "text/html": [
318 | "\n",
319 | "\n",
332 | "
\n",
333 | " \n",
334 | " \n",
335 | " | \n",
336 | " timestamp | \n",
337 | " gender | \n",
338 | " age | \n",
339 | " skills | \n",
340 | " product | \n",
341 | " price | \n",
342 | " service | \n",
343 | " overall | \n",
344 | "
\n",
345 | " \n",
346 | " \n",
347 | " \n",
348 | " | 0 | \n",
349 | " 2021-02-19 22:36:35.982 | \n",
350 | " Male | \n",
351 | " 24.0 | \n",
352 | " Word, Excel, PowerPoint, Access | \n",
353 | " very dissatisfied | \n",
354 | " dissatisfied | \n",
355 | " neutral | \n",
356 | " satisfied | \n",
357 | "
\n",
358 | " \n",
359 | " | 1 | \n",
360 | " 2021-02-19 22:38:17.913 | \n",
361 | " Male | \n",
362 | " 22.0 | \n",
363 | " Excel, SQL | \n",
364 | " very satisfied | \n",
365 | " satisfied | \n",
366 | " neutral | \n",
367 | " dissatisfied | \n",
368 | "
\n",
369 | " \n",
370 | " | 2 | \n",
371 | " 2021-02-19 22:49:14.802 | \n",
372 | " Prefer not to say | \n",
373 | " 35.0 | \n",
374 | " Word, Excel, PowerPoint, SPSS | \n",
375 | " very dissatisfied | \n",
376 | " dissatisfied | \n",
377 | " neutral | \n",
378 | " dissatisfied | \n",
379 | "
\n",
380 | " \n",
381 | " | 3 | \n",
382 | " 2021-02-19 23:01:24.057 | \n",
383 | " Female | \n",
384 | " 23.0 | \n",
385 | " Word, Excel, PowerPoint, SQL, Python, R, JavaS... | \n",
386 | " very satisfied | \n",
387 | " satisfied | \n",
388 | " neutral | \n",
389 | " dissatisfied | \n",
390 | "
\n",
391 | " \n",
392 | " | 4 | \n",
393 | " 2021-02-20 08:27:19.584 | \n",
394 | " Female | \n",
395 | " 27.0 | \n",
396 | " Word, Excel, PowerPoint | \n",
397 | " neutral | \n",
398 | " satisfied | \n",
399 | " very satisfied | \n",
400 | " very satisfied | \n",
401 | "
\n",
402 | " \n",
403 | " | 5 | \n",
404 | " 2021-02-20 08:27:36.059 | \n",
405 | " Female | \n",
406 | " 19.0 | \n",
407 | " Excel, Python, R | \n",
408 | " satisfied | \n",
409 | " satisfied | \n",
410 | " satisfied | \n",
411 | " satisfied | \n",
412 | "
\n",
413 | " \n",
414 | " | 6 | \n",
415 | " 2021-02-20 08:28:19.725 | \n",
416 | " Prefer not to say | \n",
417 | " 30.0 | \n",
418 | " SQL, R, SPSS, SAS | \n",
419 | " dissatisfied | \n",
420 | " satisfied | \n",
421 | " neutral | \n",
422 | " very satisfied | \n",
423 | "
\n",
424 | " \n",
425 | " | 7 | \n",
426 | " 2021-02-20 09:16:02.448 | \n",
427 | " Prefer not to say | \n",
428 | " 24.0 | \n",
429 | " PowerPoint | \n",
430 | " satisfied | \n",
431 | " very satisfied | \n",
432 | " satisfied | \n",
433 | " very satisfied | \n",
434 | "
\n",
435 | " \n",
436 | " | 8 | \n",
437 | " 2021-02-20 09:22:23.851 | \n",
438 | " Male | \n",
439 | " NaN | \n",
440 | " Word | \n",
441 | " very dissatisfied | \n",
442 | " dissatisfied | \n",
443 | " neutral | \n",
444 | " None | \n",
445 | "
\n",
446 | " \n",
447 | " | 9 | \n",
448 | " 2021-02-20 09:23:22.259 | \n",
449 | " NaN | \n",
450 | " NaN | \n",
451 | " Excel | \n",
452 | " NaN | \n",
453 | " NaN | \n",
454 | " NaN | \n",
455 | " satisfied | \n",
456 | "
\n",
457 | " \n",
458 | "
\n",
459 | "
"
460 | ],
461 | "text/plain": [
462 | " timestamp gender age \\\n",
463 | "0 2021-02-19 22:36:35.982 Male 24.0 \n",
464 | "1 2021-02-19 22:38:17.913 Male 22.0 \n",
465 | "2 2021-02-19 22:49:14.802 Prefer not to say 35.0 \n",
466 | "3 2021-02-19 23:01:24.057 Female 23.0 \n",
467 | "4 2021-02-20 08:27:19.584 Female 27.0 \n",
468 | "5 2021-02-20 08:27:36.059 Female 19.0 \n",
469 | "6 2021-02-20 08:28:19.725 Prefer not to say 30.0 \n",
470 | "7 2021-02-20 09:16:02.448 Prefer not to say 24.0 \n",
471 | "8 2021-02-20 09:22:23.851 Male NaN \n",
472 | "9 2021-02-20 09:23:22.259 NaN NaN \n",
473 | "\n",
474 | " skills product \\\n",
475 | "0 Word, Excel, PowerPoint, Access very dissatisfied \n",
476 | "1 Excel, SQL very satisfied \n",
477 | "2 Word, Excel, PowerPoint, SPSS very dissatisfied \n",
478 | "3 Word, Excel, PowerPoint, SQL, Python, R, JavaS... very satisfied \n",
479 | "4 Word, Excel, PowerPoint neutral \n",
480 | "5 Excel, Python, R satisfied \n",
481 | "6 SQL, R, SPSS, SAS dissatisfied \n",
482 | "7 PowerPoint satisfied \n",
483 | "8 Word very dissatisfied \n",
484 | "9 Excel NaN \n",
485 | "\n",
486 | " price service overall \n",
487 | "0 dissatisfied neutral satisfied \n",
488 | "1 satisfied neutral dissatisfied \n",
489 | "2 dissatisfied neutral dissatisfied \n",
490 | "3 satisfied neutral dissatisfied \n",
491 | "4 satisfied very satisfied very satisfied \n",
492 | "5 satisfied satisfied satisfied \n",
493 | "6 satisfied neutral very satisfied \n",
494 | "7 very satisfied satisfied very satisfied \n",
495 | "8 dissatisfied neutral None \n",
496 | "9 NaN NaN satisfied "
497 | ]
498 | },
499 | "execution_count": 57,
500 | "metadata": {},
501 | "output_type": "execute_result"
502 | }
503 | ],
504 | "source": [
505 | "df.columns = ['timestamp', 'gender', 'age', 'skills', 'product', 'price', 'service', 'overall']\n",
506 | "df"
507 | ]
508 | },
509 | {
510 | "cell_type": "markdown",
511 | "metadata": {},
512 | "source": [
513 | "## recode gender "
514 | ]
515 | },
516 | {
517 | "cell_type": "code",
518 | "execution_count": 58,
519 | "metadata": {},
520 | "outputs": [
521 | {
522 | "data": {
523 | "text/plain": [
524 | "0 M\n",
525 | "1 M\n",
526 | "2 X\n",
527 | "3 F\n",
528 | "4 F\n",
529 | "5 F\n",
530 | "6 X\n",
531 | "7 X\n",
532 | "8 M\n",
533 | "9 NaN\n",
534 | "Name: gender, dtype: object"
535 | ]
536 | },
537 | "execution_count": 58,
538 | "metadata": {},
539 | "output_type": "execute_result"
540 | }
541 | ],
542 | "source": [
543 | "d = {'Female': 'F',\n",
544 | " 'Male': 'M',\n",
545 | " 'Prefer not to say': 'X'}\n",
546 | "\n",
547 | "df.gender.map(d, na_action='ignore')"
548 | ]
549 | },
550 | {
551 | "cell_type": "code",
552 | "execution_count": 59,
553 | "metadata": {},
554 | "outputs": [],
555 | "source": [
556 | "df.gender=df.gender.map(d, na_action='ignore')"
557 | ]
558 | },
559 | {
560 | "cell_type": "code",
561 | "execution_count": 60,
562 | "metadata": {},
563 | "outputs": [
564 | {
565 | "data": {
566 | "text/html": [
567 | "\n",
568 | "\n",
581 | "
\n",
582 | " \n",
583 | " \n",
584 | " | \n",
585 | " timestamp | \n",
586 | " gender | \n",
587 | " age | \n",
588 | " skills | \n",
589 | " product | \n",
590 | " price | \n",
591 | " service | \n",
592 | " overall | \n",
593 | "
\n",
594 | " \n",
595 | " \n",
596 | " \n",
597 | " | 0 | \n",
598 | " 2021-02-19 22:36:35.982 | \n",
599 | " M | \n",
600 | " 24.0 | \n",
601 | " Word, Excel, PowerPoint, Access | \n",
602 | " very dissatisfied | \n",
603 | " dissatisfied | \n",
604 | " neutral | \n",
605 | " satisfied | \n",
606 | "
\n",
607 | " \n",
608 | " | 1 | \n",
609 | " 2021-02-19 22:38:17.913 | \n",
610 | " M | \n",
611 | " 22.0 | \n",
612 | " Excel, SQL | \n",
613 | " very satisfied | \n",
614 | " satisfied | \n",
615 | " neutral | \n",
616 | " dissatisfied | \n",
617 | "
\n",
618 | " \n",
619 | " | 2 | \n",
620 | " 2021-02-19 22:49:14.802 | \n",
621 | " X | \n",
622 | " 35.0 | \n",
623 | " Word, Excel, PowerPoint, SPSS | \n",
624 | " very dissatisfied | \n",
625 | " dissatisfied | \n",
626 | " neutral | \n",
627 | " dissatisfied | \n",
628 | "
\n",
629 | " \n",
630 | " | 3 | \n",
631 | " 2021-02-19 23:01:24.057 | \n",
632 | " F | \n",
633 | " 23.0 | \n",
634 | " Word, Excel, PowerPoint, SQL, Python, R, JavaS... | \n",
635 | " very satisfied | \n",
636 | " satisfied | \n",
637 | " neutral | \n",
638 | " dissatisfied | \n",
639 | "
\n",
640 | " \n",
641 | " | 4 | \n",
642 | " 2021-02-20 08:27:19.584 | \n",
643 | " F | \n",
644 | " 27.0 | \n",
645 | " Word, Excel, PowerPoint | \n",
646 | " neutral | \n",
647 | " satisfied | \n",
648 | " very satisfied | \n",
649 | " very satisfied | \n",
650 | "
\n",
651 | " \n",
652 | " | 5 | \n",
653 | " 2021-02-20 08:27:36.059 | \n",
654 | " F | \n",
655 | " 19.0 | \n",
656 | " Excel, Python, R | \n",
657 | " satisfied | \n",
658 | " satisfied | \n",
659 | " satisfied | \n",
660 | " satisfied | \n",
661 | "
\n",
662 | " \n",
663 | " | 6 | \n",
664 | " 2021-02-20 08:28:19.725 | \n",
665 | " X | \n",
666 | " 30.0 | \n",
667 | " SQL, R, SPSS, SAS | \n",
668 | " dissatisfied | \n",
669 | " satisfied | \n",
670 | " neutral | \n",
671 | " very satisfied | \n",
672 | "
\n",
673 | " \n",
674 | " | 7 | \n",
675 | " 2021-02-20 09:16:02.448 | \n",
676 | " X | \n",
677 | " 24.0 | \n",
678 | " PowerPoint | \n",
679 | " satisfied | \n",
680 | " very satisfied | \n",
681 | " satisfied | \n",
682 | " very satisfied | \n",
683 | "
\n",
684 | " \n",
685 | " | 8 | \n",
686 | " 2021-02-20 09:22:23.851 | \n",
687 | " M | \n",
688 | " NaN | \n",
689 | " Word | \n",
690 | " very dissatisfied | \n",
691 | " dissatisfied | \n",
692 | " neutral | \n",
693 | " None | \n",
694 | "
\n",
695 | " \n",
696 | " | 9 | \n",
697 | " 2021-02-20 09:23:22.259 | \n",
698 | " NaN | \n",
699 | " NaN | \n",
700 | " Excel | \n",
701 | " NaN | \n",
702 | " NaN | \n",
703 | " NaN | \n",
704 | " satisfied | \n",
705 | "
\n",
706 | " \n",
707 | "
\n",
708 | "
"
709 | ],
710 | "text/plain": [
711 | " timestamp gender age \\\n",
712 | "0 2021-02-19 22:36:35.982 M 24.0 \n",
713 | "1 2021-02-19 22:38:17.913 M 22.0 \n",
714 | "2 2021-02-19 22:49:14.802 X 35.0 \n",
715 | "3 2021-02-19 23:01:24.057 F 23.0 \n",
716 | "4 2021-02-20 08:27:19.584 F 27.0 \n",
717 | "5 2021-02-20 08:27:36.059 F 19.0 \n",
718 | "6 2021-02-20 08:28:19.725 X 30.0 \n",
719 | "7 2021-02-20 09:16:02.448 X 24.0 \n",
720 | "8 2021-02-20 09:22:23.851 M NaN \n",
721 | "9 2021-02-20 09:23:22.259 NaN NaN \n",
722 | "\n",
723 | " skills product \\\n",
724 | "0 Word, Excel, PowerPoint, Access very dissatisfied \n",
725 | "1 Excel, SQL very satisfied \n",
726 | "2 Word, Excel, PowerPoint, SPSS very dissatisfied \n",
727 | "3 Word, Excel, PowerPoint, SQL, Python, R, JavaS... very satisfied \n",
728 | "4 Word, Excel, PowerPoint neutral \n",
729 | "5 Excel, Python, R satisfied \n",
730 | "6 SQL, R, SPSS, SAS dissatisfied \n",
731 | "7 PowerPoint satisfied \n",
732 | "8 Word very dissatisfied \n",
733 | "9 Excel NaN \n",
734 | "\n",
735 | " price service overall \n",
736 | "0 dissatisfied neutral satisfied \n",
737 | "1 satisfied neutral dissatisfied \n",
738 | "2 dissatisfied neutral dissatisfied \n",
739 | "3 satisfied neutral dissatisfied \n",
740 | "4 satisfied very satisfied very satisfied \n",
741 | "5 satisfied satisfied satisfied \n",
742 | "6 satisfied neutral very satisfied \n",
743 | "7 very satisfied satisfied very satisfied \n",
744 | "8 dissatisfied neutral None \n",
745 | "9 NaN NaN satisfied "
746 | ]
747 | },
748 | "execution_count": 60,
749 | "metadata": {},
750 | "output_type": "execute_result"
751 | }
752 | ],
753 | "source": [
754 | "df"
755 | ]
756 | },
757 | {
758 | "cell_type": "code",
759 | "execution_count": 61,
760 | "metadata": {},
761 | "outputs": [
762 | {
763 | "data": {
764 | "text/plain": [
765 | "0 1.0\n",
766 | "1 5.0\n",
767 | "2 1.0\n",
768 | "3 5.0\n",
769 | "4 3.0\n",
770 | "5 4.0\n",
771 | "6 2.0\n",
772 | "7 4.0\n",
773 | "8 1.0\n",
774 | "9 NaN\n",
775 | "Name: product, dtype: float64"
776 | ]
777 | },
778 | "execution_count": 61,
779 | "metadata": {},
780 | "output_type": "execute_result"
781 | }
782 | ],
783 | "source": [
784 | "df['product'].map({'very dissatisfied':1, 'dissatisfied':2, 'neutral':3, 'satisfied':4, 'very satisfied':5}, na_action='ignore')"
785 | ]
786 | },
787 | {
788 | "cell_type": "code",
789 | "execution_count": 62,
790 | "metadata": {},
791 | "outputs": [
792 | {
793 | "data": {
794 | "text/plain": [
795 | "['very dissatisfied', 'very satisfied', 'very dissatisfied', 'very satisfied', 'neutral', 'satisfied', 'dissatisfied', 'satisfied', 'very dissatisfied', NaN]\n",
796 | "Categories (5, object): ['very dissatisfied' < 'dissatisfied' < 'neutral' < 'satisfied' < 'very satisfied']"
797 | ]
798 | },
799 | "execution_count": 62,
800 | "metadata": {},
801 | "output_type": "execute_result"
802 | }
803 | ],
804 | "source": [
805 | "pd.Categorical(df['product'], \n",
806 | " categories=['very dissatisfied', 'dissatisfied', 'neutral', 'satisfied', 'very satisfied'], ordered=True)"
807 | ]
808 | },
809 | {
810 | "cell_type": "code",
811 | "execution_count": 63,
812 | "metadata": {},
813 | "outputs": [
814 | {
815 | "data": {
816 | "text/plain": [
817 | "0 very dissatisfied\n",
818 | "1 very satisfied\n",
819 | "2 very dissatisfied\n",
820 | "3 very satisfied\n",
821 | "4 neutral\n",
822 | "5 satisfied\n",
823 | "6 dissatisfied\n",
824 | "7 satisfied\n",
825 | "8 very dissatisfied\n",
826 | "9 NaN\n",
827 | "Name: product, dtype: object"
828 | ]
829 | },
830 | "execution_count": 63,
831 | "metadata": {},
832 | "output_type": "execute_result"
833 | }
834 | ],
835 | "source": [
836 | "df['product']"
837 | ]
838 | },
839 | {
840 | "cell_type": "code",
841 | "execution_count": 64,
842 | "metadata": {},
843 | "outputs": [],
844 | "source": [
845 | "for c in ['product', 'price', 'service', 'overall']:\n",
846 | " df[f'{c}_n']=df[c].map({'very dissatisfied':1, 'dissatisfied':2, 'neutral':3, 'satisfied':4, 'very satisfied':5}) \n",
847 | " df[c]=pd.Categorical(df[c], \n",
848 | " categories=['very dissatisfied', 'dissatisfied', 'neutral', 'satisfied', 'very satisfied'], ordered=True) \n",
849 | " df[f'{c}_cat']=df[c].cat.codes + 1"
850 | ]
851 | },
852 | {
853 | "cell_type": "code",
854 | "execution_count": 65,
855 | "metadata": {},
856 | "outputs": [
857 | {
858 | "name": "stdout",
859 | "output_type": "stream",
860 | "text": [
861 | "\n",
862 | "RangeIndex: 10 entries, 0 to 9\n",
863 | "Data columns (total 16 columns):\n",
864 | " # Column Non-Null Count Dtype \n",
865 | "--- ------ -------------- ----- \n",
866 | " 0 timestamp 10 non-null datetime64[ns]\n",
867 | " 1 gender 9 non-null object \n",
868 | " 2 age 8 non-null float64 \n",
869 | " 3 skills 10 non-null object \n",
870 | " 4 product 9 non-null category \n",
871 | " 5 price 9 non-null category \n",
872 | " 6 service 9 non-null category \n",
873 | " 7 overall 9 non-null category \n",
874 | " 8 product_n 9 non-null float64 \n",
875 | " 9 product_cat 10 non-null int8 \n",
876 | " 10 price_n 9 non-null float64 \n",
877 | " 11 price_cat 10 non-null int8 \n",
878 | " 12 service_n 9 non-null float64 \n",
879 | " 13 service_cat 10 non-null int8 \n",
880 | " 14 overall_n 9 non-null float64 \n",
881 | " 15 overall_cat 10 non-null int8 \n",
882 | "dtypes: category(4), datetime64[ns](1), float64(5), int8(4), object(2)\n",
883 | "memory usage: 1.7+ KB\n"
884 | ]
885 | }
886 | ],
887 | "source": [
888 | "df.info()"
889 | ]
890 | },
891 | {
892 | "cell_type": "code",
893 | "execution_count": 66,
894 | "metadata": {},
895 | "outputs": [
896 | {
897 | "data": {
898 | "text/plain": [
899 | "0 1\n",
900 | "1 5\n",
901 | "2 1\n",
902 | "3 5\n",
903 | "4 3\n",
904 | "5 4\n",
905 | "6 2\n",
906 | "7 4\n",
907 | "8 1\n",
908 | "9 0\n",
909 | "dtype: int8"
910 | ]
911 | },
912 | "execution_count": 66,
913 | "metadata": {},
914 | "output_type": "execute_result"
915 | }
916 | ],
917 | "source": [
918 | "df['product'].cat.codes + 1"
919 | ]
920 | },
921 | {
922 | "cell_type": "code",
923 | "execution_count": 67,
924 | "metadata": {},
925 | "outputs": [
926 | {
927 | "data": {
928 | "text/plain": [
929 | "0 3\n",
930 | "1 1\n",
931 | "2 1\n",
932 | "3 1\n",
933 | "4 4\n",
934 | "5 3\n",
935 | "6 4\n",
936 | "7 4\n",
937 | "8 -1\n",
938 | "9 3\n",
939 | "dtype: int8"
940 | ]
941 | },
942 | "execution_count": 67,
943 | "metadata": {},
944 | "output_type": "execute_result"
945 | }
946 | ],
947 | "source": [
948 | "df['overall'].cat.codes"
949 | ]
950 | },
951 | {
952 | "cell_type": "code",
953 | "execution_count": 68,
954 | "metadata": {},
955 | "outputs": [
956 | {
957 | "data": {
958 | "text/plain": [
959 | "3.6666666666666665"
960 | ]
961 | },
962 | "execution_count": 68,
963 | "metadata": {},
964 | "output_type": "execute_result"
965 | }
966 | ],
967 | "source": [
968 | "df['overall_n'].mean()"
969 | ]
970 | },
971 | {
972 | "cell_type": "code",
973 | "execution_count": 69,
974 | "metadata": {},
975 | "outputs": [
976 | {
977 | "data": {
978 | "text/plain": [
979 | "3.3"
980 | ]
981 | },
982 | "execution_count": 69,
983 | "metadata": {},
984 | "output_type": "execute_result"
985 | }
986 | ],
987 | "source": [
988 | "(df['overall'].cat.codes + 1).mean()"
989 | ]
990 | },
991 | {
992 | "cell_type": "code",
993 | "execution_count": 70,
994 | "metadata": {},
995 | "outputs": [
996 | {
997 | "data": {
998 | "text/html": [
999 | "\n",
1000 | "\n",
1013 | "
\n",
1014 | " \n",
1015 | " \n",
1016 | " | \n",
1017 | " timestamp | \n",
1018 | " gender | \n",
1019 | " age | \n",
1020 | " skills | \n",
1021 | " product | \n",
1022 | " price | \n",
1023 | " service | \n",
1024 | " overall | \n",
1025 | " product_n | \n",
1026 | " product_cat | \n",
1027 | " price_n | \n",
1028 | " price_cat | \n",
1029 | " service_n | \n",
1030 | " service_cat | \n",
1031 | " overall_n | \n",
1032 | " overall_cat | \n",
1033 | "
\n",
1034 | " \n",
1035 | " \n",
1036 | " \n",
1037 | " | 0 | \n",
1038 | " 2021-02-19 22:36:35.982 | \n",
1039 | " M | \n",
1040 | " 24.0 | \n",
1041 | " Word, Excel, PowerPoint, Access | \n",
1042 | " very dissatisfied | \n",
1043 | " dissatisfied | \n",
1044 | " neutral | \n",
1045 | " satisfied | \n",
1046 | " 1.0 | \n",
1047 | " 1 | \n",
1048 | " 2.0 | \n",
1049 | " 2 | \n",
1050 | " 3.0 | \n",
1051 | " 3 | \n",
1052 | " 4.0 | \n",
1053 | " 4 | \n",
1054 | "
\n",
1055 | " \n",
1056 | " | 4 | \n",
1057 | " 2021-02-20 08:27:19.584 | \n",
1058 | " F | \n",
1059 | " 27.0 | \n",
1060 | " Word, Excel, PowerPoint | \n",
1061 | " neutral | \n",
1062 | " satisfied | \n",
1063 | " very satisfied | \n",
1064 | " very satisfied | \n",
1065 | " 3.0 | \n",
1066 | " 3 | \n",
1067 | " 4.0 | \n",
1068 | " 4 | \n",
1069 | " 5.0 | \n",
1070 | " 5 | \n",
1071 | " 5.0 | \n",
1072 | " 5 | \n",
1073 | "
\n",
1074 | " \n",
1075 | " | 5 | \n",
1076 | " 2021-02-20 08:27:36.059 | \n",
1077 | " F | \n",
1078 | " 19.0 | \n",
1079 | " Excel, Python, R | \n",
1080 | " satisfied | \n",
1081 | " satisfied | \n",
1082 | " satisfied | \n",
1083 | " satisfied | \n",
1084 | " 4.0 | \n",
1085 | " 4 | \n",
1086 | " 4.0 | \n",
1087 | " 4 | \n",
1088 | " 4.0 | \n",
1089 | " 4 | \n",
1090 | " 4.0 | \n",
1091 | " 4 | \n",
1092 | "
\n",
1093 | " \n",
1094 | " | 6 | \n",
1095 | " 2021-02-20 08:28:19.725 | \n",
1096 | " X | \n",
1097 | " 30.0 | \n",
1098 | " SQL, R, SPSS, SAS | \n",
1099 | " dissatisfied | \n",
1100 | " satisfied | \n",
1101 | " neutral | \n",
1102 | " very satisfied | \n",
1103 | " 2.0 | \n",
1104 | " 2 | \n",
1105 | " 4.0 | \n",
1106 | " 4 | \n",
1107 | " 3.0 | \n",
1108 | " 3 | \n",
1109 | " 5.0 | \n",
1110 | " 5 | \n",
1111 | "
\n",
1112 | " \n",
1113 | " | 7 | \n",
1114 | " 2021-02-20 09:16:02.448 | \n",
1115 | " X | \n",
1116 | " 24.0 | \n",
1117 | " PowerPoint | \n",
1118 | " satisfied | \n",
1119 | " very satisfied | \n",
1120 | " satisfied | \n",
1121 | " very satisfied | \n",
1122 | " 4.0 | \n",
1123 | " 4 | \n",
1124 | " 5.0 | \n",
1125 | " 5 | \n",
1126 | " 4.0 | \n",
1127 | " 4 | \n",
1128 | " 5.0 | \n",
1129 | " 5 | \n",
1130 | "
\n",
1131 | " \n",
1132 | " | 9 | \n",
1133 | " 2021-02-20 09:23:22.259 | \n",
1134 | " NaN | \n",
1135 | " NaN | \n",
1136 | " Excel | \n",
1137 | " NaN | \n",
1138 | " NaN | \n",
1139 | " NaN | \n",
1140 | " satisfied | \n",
1141 | " NaN | \n",
1142 | " 0 | \n",
1143 | " NaN | \n",
1144 | " 0 | \n",
1145 | " NaN | \n",
1146 | " 0 | \n",
1147 | " 4.0 | \n",
1148 | " 4 | \n",
1149 | "
\n",
1150 | " \n",
1151 | "
\n",
1152 | "
"
1153 | ],
1154 | "text/plain": [
1155 | " timestamp gender age skills \\\n",
1156 | "0 2021-02-19 22:36:35.982 M 24.0 Word, Excel, PowerPoint, Access \n",
1157 | "4 2021-02-20 08:27:19.584 F 27.0 Word, Excel, PowerPoint \n",
1158 | "5 2021-02-20 08:27:36.059 F 19.0 Excel, Python, R \n",
1159 | "6 2021-02-20 08:28:19.725 X 30.0 SQL, R, SPSS, SAS \n",
1160 | "7 2021-02-20 09:16:02.448 X 24.0 PowerPoint \n",
1161 | "9 2021-02-20 09:23:22.259 NaN NaN Excel \n",
1162 | "\n",
1163 | " product price service overall \\\n",
1164 | "0 very dissatisfied dissatisfied neutral satisfied \n",
1165 | "4 neutral satisfied very satisfied very satisfied \n",
1166 | "5 satisfied satisfied satisfied satisfied \n",
1167 | "6 dissatisfied satisfied neutral very satisfied \n",
1168 | "7 satisfied very satisfied satisfied very satisfied \n",
1169 | "9 NaN NaN NaN satisfied \n",
1170 | "\n",
1171 | " product_n product_cat price_n price_cat service_n service_cat \\\n",
1172 | "0 1.0 1 2.0 2 3.0 3 \n",
1173 | "4 3.0 3 4.0 4 5.0 5 \n",
1174 | "5 4.0 4 4.0 4 4.0 4 \n",
1175 | "6 2.0 2 4.0 4 3.0 3 \n",
1176 | "7 4.0 4 5.0 5 4.0 4 \n",
1177 | "9 NaN 0 NaN 0 NaN 0 \n",
1178 | "\n",
1179 | " overall_n overall_cat \n",
1180 | "0 4.0 4 \n",
1181 | "4 5.0 5 \n",
1182 | "5 4.0 4 \n",
1183 | "6 5.0 5 \n",
1184 | "7 5.0 5 \n",
1185 | "9 4.0 4 "
1186 | ]
1187 | },
1188 | "execution_count": 70,
1189 | "metadata": {},
1190 | "output_type": "execute_result"
1191 | }
1192 | ],
1193 | "source": [
1194 | "df[df['overall'] > 'neutral']"
1195 | ]
1196 | },
1197 | {
1198 | "cell_type": "code",
1199 | "execution_count": 71,
1200 | "metadata": {},
1201 | "outputs": [
1202 | {
1203 | "data": {
1204 | "text/html": [
1205 | "\n",
1206 | "\n",
1219 | "
\n",
1220 | " \n",
1221 | " \n",
1222 | " | \n",
1223 | " timestamp | \n",
1224 | " gender | \n",
1225 | " age | \n",
1226 | " skills | \n",
1227 | " product | \n",
1228 | " price | \n",
1229 | " service | \n",
1230 | " overall | \n",
1231 | " product_n | \n",
1232 | " product_cat | \n",
1233 | " price_n | \n",
1234 | " price_cat | \n",
1235 | " service_n | \n",
1236 | " service_cat | \n",
1237 | " overall_n | \n",
1238 | " overall_cat | \n",
1239 | "
\n",
1240 | " \n",
1241 | " \n",
1242 | " \n",
1243 | " | 0 | \n",
1244 | " 2021-02-19 22:36:35.982 | \n",
1245 | " M | \n",
1246 | " 24.0 | \n",
1247 | " Word, Excel, PowerPoint, Access | \n",
1248 | " very dissatisfied | \n",
1249 | " dissatisfied | \n",
1250 | " neutral | \n",
1251 | " satisfied | \n",
1252 | " 1.0 | \n",
1253 | " 1 | \n",
1254 | " 2.0 | \n",
1255 | " 2 | \n",
1256 | " 3.0 | \n",
1257 | " 3 | \n",
1258 | " 4.0 | \n",
1259 | " 4 | \n",
1260 | "
\n",
1261 | " \n",
1262 | " | 4 | \n",
1263 | " 2021-02-20 08:27:19.584 | \n",
1264 | " F | \n",
1265 | " 27.0 | \n",
1266 | " Word, Excel, PowerPoint | \n",
1267 | " neutral | \n",
1268 | " satisfied | \n",
1269 | " very satisfied | \n",
1270 | " very satisfied | \n",
1271 | " 3.0 | \n",
1272 | " 3 | \n",
1273 | " 4.0 | \n",
1274 | " 4 | \n",
1275 | " 5.0 | \n",
1276 | " 5 | \n",
1277 | " 5.0 | \n",
1278 | " 5 | \n",
1279 | "
\n",
1280 | " \n",
1281 | " | 5 | \n",
1282 | " 2021-02-20 08:27:36.059 | \n",
1283 | " F | \n",
1284 | " 19.0 | \n",
1285 | " Excel, Python, R | \n",
1286 | " satisfied | \n",
1287 | " satisfied | \n",
1288 | " satisfied | \n",
1289 | " satisfied | \n",
1290 | " 4.0 | \n",
1291 | " 4 | \n",
1292 | " 4.0 | \n",
1293 | " 4 | \n",
1294 | " 4.0 | \n",
1295 | " 4 | \n",
1296 | " 4.0 | \n",
1297 | " 4 | \n",
1298 | "
\n",
1299 | " \n",
1300 | " | 6 | \n",
1301 | " 2021-02-20 08:28:19.725 | \n",
1302 | " X | \n",
1303 | " 30.0 | \n",
1304 | " SQL, R, SPSS, SAS | \n",
1305 | " dissatisfied | \n",
1306 | " satisfied | \n",
1307 | " neutral | \n",
1308 | " very satisfied | \n",
1309 | " 2.0 | \n",
1310 | " 2 | \n",
1311 | " 4.0 | \n",
1312 | " 4 | \n",
1313 | " 3.0 | \n",
1314 | " 3 | \n",
1315 | " 5.0 | \n",
1316 | " 5 | \n",
1317 | "
\n",
1318 | " \n",
1319 | " | 7 | \n",
1320 | " 2021-02-20 09:16:02.448 | \n",
1321 | " X | \n",
1322 | " 24.0 | \n",
1323 | " PowerPoint | \n",
1324 | " satisfied | \n",
1325 | " very satisfied | \n",
1326 | " satisfied | \n",
1327 | " very satisfied | \n",
1328 | " 4.0 | \n",
1329 | " 4 | \n",
1330 | " 5.0 | \n",
1331 | " 5 | \n",
1332 | " 4.0 | \n",
1333 | " 4 | \n",
1334 | " 5.0 | \n",
1335 | " 5 | \n",
1336 | "
\n",
1337 | " \n",
1338 | " | 9 | \n",
1339 | " 2021-02-20 09:23:22.259 | \n",
1340 | " NaN | \n",
1341 | " NaN | \n",
1342 | " Excel | \n",
1343 | " NaN | \n",
1344 | " NaN | \n",
1345 | " NaN | \n",
1346 | " satisfied | \n",
1347 | " NaN | \n",
1348 | " 0 | \n",
1349 | " NaN | \n",
1350 | " 0 | \n",
1351 | " NaN | \n",
1352 | " 0 | \n",
1353 | " 4.0 | \n",
1354 | " 4 | \n",
1355 | "
\n",
1356 | " \n",
1357 | "
\n",
1358 | "
"
1359 | ],
1360 | "text/plain": [
1361 | " timestamp gender age skills \\\n",
1362 | "0 2021-02-19 22:36:35.982 M 24.0 Word, Excel, PowerPoint, Access \n",
1363 | "4 2021-02-20 08:27:19.584 F 27.0 Word, Excel, PowerPoint \n",
1364 | "5 2021-02-20 08:27:36.059 F 19.0 Excel, Python, R \n",
1365 | "6 2021-02-20 08:28:19.725 X 30.0 SQL, R, SPSS, SAS \n",
1366 | "7 2021-02-20 09:16:02.448 X 24.0 PowerPoint \n",
1367 | "9 2021-02-20 09:23:22.259 NaN NaN Excel \n",
1368 | "\n",
1369 | " product price service overall \\\n",
1370 | "0 very dissatisfied dissatisfied neutral satisfied \n",
1371 | "4 neutral satisfied very satisfied very satisfied \n",
1372 | "5 satisfied satisfied satisfied satisfied \n",
1373 | "6 dissatisfied satisfied neutral very satisfied \n",
1374 | "7 satisfied very satisfied satisfied very satisfied \n",
1375 | "9 NaN NaN NaN satisfied \n",
1376 | "\n",
1377 | " product_n product_cat price_n price_cat service_n service_cat \\\n",
1378 | "0 1.0 1 2.0 2 3.0 3 \n",
1379 | "4 3.0 3 4.0 4 5.0 5 \n",
1380 | "5 4.0 4 4.0 4 4.0 4 \n",
1381 | "6 2.0 2 4.0 4 3.0 3 \n",
1382 | "7 4.0 4 5.0 5 4.0 4 \n",
1383 | "9 NaN 0 NaN 0 NaN 0 \n",
1384 | "\n",
1385 | " overall_n overall_cat \n",
1386 | "0 4.0 4 \n",
1387 | "4 5.0 5 \n",
1388 | "5 4.0 4 \n",
1389 | "6 5.0 5 \n",
1390 | "7 5.0 5 \n",
1391 | "9 4.0 4 "
1392 | ]
1393 | },
1394 | "execution_count": 71,
1395 | "metadata": {},
1396 | "output_type": "execute_result"
1397 | }
1398 | ],
1399 | "source": [
1400 | "df[df['overall_n'] > 3]"
1401 | ]
1402 | },
1403 | {
1404 | "cell_type": "code",
1405 | "execution_count": 72,
1406 | "metadata": {},
1407 | "outputs": [
1408 | {
1409 | "data": {
1410 | "text/plain": [
1411 | "0 1\n",
1412 | "1 5\n",
1413 | "2 1\n",
1414 | "3 5\n",
1415 | "4 3\n",
1416 | "5 4\n",
1417 | "6 2\n",
1418 | "7 4\n",
1419 | "8 1\n",
1420 | "9 0\n",
1421 | "Name: product_cat, dtype: int8"
1422 | ]
1423 | },
1424 | "execution_count": 72,
1425 | "metadata": {},
1426 | "output_type": "execute_result"
1427 | }
1428 | ],
1429 | "source": [
1430 | "df['product_cat']"
1431 | ]
1432 | },
1433 | {
1434 | "cell_type": "code",
1435 | "execution_count": 73,
1436 | "metadata": {},
1437 | "outputs": [
1438 | {
1439 | "data": {
1440 | "text/plain": [
1441 | "0 0\n",
1442 | "1 4\n",
1443 | "2 0\n",
1444 | "3 4\n",
1445 | "4 2\n",
1446 | "5 3\n",
1447 | "6 1\n",
1448 | "7 3\n",
1449 | "8 0\n",
1450 | "9 -1\n",
1451 | "dtype: int8"
1452 | ]
1453 | },
1454 | "execution_count": 73,
1455 | "metadata": {},
1456 | "output_type": "execute_result"
1457 | }
1458 | ],
1459 | "source": [
1460 | "df['product'].cat.codes"
1461 | ]
1462 | },
1463 | {
1464 | "cell_type": "code",
1465 | "execution_count": 74,
1466 | "metadata": {},
1467 | "outputs": [
1468 | {
1469 | "data": {
1470 | "text/plain": [
1471 | "'very dissatisfied'"
1472 | ]
1473 | },
1474 | "execution_count": 74,
1475 | "metadata": {},
1476 | "output_type": "execute_result"
1477 | }
1478 | ],
1479 | "source": [
1480 | "df['product'][0]"
1481 | ]
1482 | },
1483 | {
1484 | "cell_type": "code",
1485 | "execution_count": 75,
1486 | "metadata": {},
1487 | "outputs": [
1488 | {
1489 | "data": {
1490 | "text/html": [
1491 | "\n",
1492 | "\n",
1505 | "
\n",
1506 | " \n",
1507 | " \n",
1508 | " | \n",
1509 | " Access | \n",
1510 | " Excel | \n",
1511 | " JavaScript | \n",
1512 | " PowerPoint | \n",
1513 | " Python | \n",
1514 | " R | \n",
1515 | " SAS | \n",
1516 | " SPSS | \n",
1517 | " SQL | \n",
1518 | " Word | \n",
1519 | "
\n",
1520 | " \n",
1521 | " \n",
1522 | " \n",
1523 | " | 0 | \n",
1524 | " 1 | \n",
1525 | " 1 | \n",
1526 | " 0 | \n",
1527 | " 1 | \n",
1528 | " 0 | \n",
1529 | " 0 | \n",
1530 | " 0 | \n",
1531 | " 0 | \n",
1532 | " 0 | \n",
1533 | " 1 | \n",
1534 | "
\n",
1535 | " \n",
1536 | " | 1 | \n",
1537 | " 0 | \n",
1538 | " 1 | \n",
1539 | " 0 | \n",
1540 | " 0 | \n",
1541 | " 0 | \n",
1542 | " 0 | \n",
1543 | " 0 | \n",
1544 | " 0 | \n",
1545 | " 1 | \n",
1546 | " 0 | \n",
1547 | "
\n",
1548 | " \n",
1549 | " | 2 | \n",
1550 | " 0 | \n",
1551 | " 1 | \n",
1552 | " 0 | \n",
1553 | " 1 | \n",
1554 | " 0 | \n",
1555 | " 0 | \n",
1556 | " 0 | \n",
1557 | " 1 | \n",
1558 | " 0 | \n",
1559 | " 1 | \n",
1560 | "
\n",
1561 | " \n",
1562 | " | 3 | \n",
1563 | " 0 | \n",
1564 | " 1 | \n",
1565 | " 1 | \n",
1566 | " 1 | \n",
1567 | " 1 | \n",
1568 | " 1 | \n",
1569 | " 0 | \n",
1570 | " 0 | \n",
1571 | " 1 | \n",
1572 | " 1 | \n",
1573 | "
\n",
1574 | " \n",
1575 | " | 4 | \n",
1576 | " 0 | \n",
1577 | " 1 | \n",
1578 | " 0 | \n",
1579 | " 1 | \n",
1580 | " 0 | \n",
1581 | " 0 | \n",
1582 | " 0 | \n",
1583 | " 0 | \n",
1584 | " 0 | \n",
1585 | " 1 | \n",
1586 | "
\n",
1587 | " \n",
1588 | " | 5 | \n",
1589 | " 0 | \n",
1590 | " 1 | \n",
1591 | " 0 | \n",
1592 | " 0 | \n",
1593 | " 1 | \n",
1594 | " 1 | \n",
1595 | " 0 | \n",
1596 | " 0 | \n",
1597 | " 0 | \n",
1598 | " 0 | \n",
1599 | "
\n",
1600 | " \n",
1601 | " | 6 | \n",
1602 | " 0 | \n",
1603 | " 0 | \n",
1604 | " 0 | \n",
1605 | " 0 | \n",
1606 | " 0 | \n",
1607 | " 1 | \n",
1608 | " 1 | \n",
1609 | " 1 | \n",
1610 | " 1 | \n",
1611 | " 0 | \n",
1612 | "
\n",
1613 | " \n",
1614 | " | 7 | \n",
1615 | " 0 | \n",
1616 | " 0 | \n",
1617 | " 0 | \n",
1618 | " 1 | \n",
1619 | " 0 | \n",
1620 | " 0 | \n",
1621 | " 0 | \n",
1622 | " 0 | \n",
1623 | " 0 | \n",
1624 | " 0 | \n",
1625 | "
\n",
1626 | " \n",
1627 | " | 8 | \n",
1628 | " 0 | \n",
1629 | " 0 | \n",
1630 | " 0 | \n",
1631 | " 0 | \n",
1632 | " 0 | \n",
1633 | " 0 | \n",
1634 | " 0 | \n",
1635 | " 0 | \n",
1636 | " 0 | \n",
1637 | " 1 | \n",
1638 | "
\n",
1639 | " \n",
1640 | " | 9 | \n",
1641 | " 0 | \n",
1642 | " 1 | \n",
1643 | " 0 | \n",
1644 | " 0 | \n",
1645 | " 0 | \n",
1646 | " 0 | \n",
1647 | " 0 | \n",
1648 | " 0 | \n",
1649 | " 0 | \n",
1650 | " 0 | \n",
1651 | "
\n",
1652 | " \n",
1653 | "
\n",
1654 | "
"
1655 | ],
1656 | "text/plain": [
1657 | " Access Excel JavaScript PowerPoint Python R SAS SPSS SQL Word\n",
1658 | "0 1 1 0 1 0 0 0 0 0 1\n",
1659 | "1 0 1 0 0 0 0 0 0 1 0\n",
1660 | "2 0 1 0 1 0 0 0 1 0 1\n",
1661 | "3 0 1 1 1 1 1 0 0 1 1\n",
1662 | "4 0 1 0 1 0 0 0 0 0 1\n",
1663 | "5 0 1 0 0 1 1 0 0 0 0\n",
1664 | "6 0 0 0 0 0 1 1 1 1 0\n",
1665 | "7 0 0 0 1 0 0 0 0 0 0\n",
1666 | "8 0 0 0 0 0 0 0 0 0 1\n",
1667 | "9 0 1 0 0 0 0 0 0 0 0"
1668 | ]
1669 | },
1670 | "execution_count": 75,
1671 | "metadata": {},
1672 | "output_type": "execute_result"
1673 | }
1674 | ],
1675 | "source": [
1676 | "ds=df.skills.str.get_dummies(', ')\n",
1677 | "ds"
1678 | ]
1679 | },
1680 | {
1681 | "cell_type": "code",
1682 | "execution_count": 76,
1683 | "metadata": {},
1684 | "outputs": [
1685 | {
1686 | "data": {
1687 | "text/html": [
1688 | "\n",
1689 | "\n",
1702 | "
\n",
1703 | " \n",
1704 | " \n",
1705 | " | \n",
1706 | " timestamp | \n",
1707 | " gender | \n",
1708 | " age | \n",
1709 | " skills | \n",
1710 | " product | \n",
1711 | " price | \n",
1712 | " service | \n",
1713 | " overall | \n",
1714 | " product_n | \n",
1715 | " product_cat | \n",
1716 | " ... | \n",
1717 | " Access | \n",
1718 | " Excel | \n",
1719 | " JavaScript | \n",
1720 | " PowerPoint | \n",
1721 | " Python | \n",
1722 | " R | \n",
1723 | " SAS | \n",
1724 | " SPSS | \n",
1725 | " SQL | \n",
1726 | " Word | \n",
1727 | "
\n",
1728 | " \n",
1729 | " \n",
1730 | " \n",
1731 | " | 0 | \n",
1732 | " 2021-02-19 22:36:35.982 | \n",
1733 | " M | \n",
1734 | " 24.0 | \n",
1735 | " Word, Excel, PowerPoint, Access | \n",
1736 | " very dissatisfied | \n",
1737 | " dissatisfied | \n",
1738 | " neutral | \n",
1739 | " satisfied | \n",
1740 | " 1.0 | \n",
1741 | " 1 | \n",
1742 | " ... | \n",
1743 | " 1 | \n",
1744 | " 1 | \n",
1745 | " 0 | \n",
1746 | " 1 | \n",
1747 | " 0 | \n",
1748 | " 0 | \n",
1749 | " 0 | \n",
1750 | " 0 | \n",
1751 | " 0 | \n",
1752 | " 1 | \n",
1753 | "
\n",
1754 | " \n",
1755 | " | 1 | \n",
1756 | " 2021-02-19 22:38:17.913 | \n",
1757 | " M | \n",
1758 | " 22.0 | \n",
1759 | " Excel, SQL | \n",
1760 | " very satisfied | \n",
1761 | " satisfied | \n",
1762 | " neutral | \n",
1763 | " dissatisfied | \n",
1764 | " 5.0 | \n",
1765 | " 5 | \n",
1766 | " ... | \n",
1767 | " 0 | \n",
1768 | " 1 | \n",
1769 | " 0 | \n",
1770 | " 0 | \n",
1771 | " 0 | \n",
1772 | " 0 | \n",
1773 | " 0 | \n",
1774 | " 0 | \n",
1775 | " 1 | \n",
1776 | " 0 | \n",
1777 | "
\n",
1778 | " \n",
1779 | " | 2 | \n",
1780 | " 2021-02-19 22:49:14.802 | \n",
1781 | " X | \n",
1782 | " 35.0 | \n",
1783 | " Word, Excel, PowerPoint, SPSS | \n",
1784 | " very dissatisfied | \n",
1785 | " dissatisfied | \n",
1786 | " neutral | \n",
1787 | " dissatisfied | \n",
1788 | " 1.0 | \n",
1789 | " 1 | \n",
1790 | " ... | \n",
1791 | " 0 | \n",
1792 | " 1 | \n",
1793 | " 0 | \n",
1794 | " 1 | \n",
1795 | " 0 | \n",
1796 | " 0 | \n",
1797 | " 0 | \n",
1798 | " 1 | \n",
1799 | " 0 | \n",
1800 | " 1 | \n",
1801 | "
\n",
1802 | " \n",
1803 | " | 3 | \n",
1804 | " 2021-02-19 23:01:24.057 | \n",
1805 | " F | \n",
1806 | " 23.0 | \n",
1807 | " Word, Excel, PowerPoint, SQL, Python, R, JavaS... | \n",
1808 | " very satisfied | \n",
1809 | " satisfied | \n",
1810 | " neutral | \n",
1811 | " dissatisfied | \n",
1812 | " 5.0 | \n",
1813 | " 5 | \n",
1814 | " ... | \n",
1815 | " 0 | \n",
1816 | " 1 | \n",
1817 | " 1 | \n",
1818 | " 1 | \n",
1819 | " 1 | \n",
1820 | " 1 | \n",
1821 | " 0 | \n",
1822 | " 0 | \n",
1823 | " 1 | \n",
1824 | " 1 | \n",
1825 | "
\n",
1826 | " \n",
1827 | " | 4 | \n",
1828 | " 2021-02-20 08:27:19.584 | \n",
1829 | " F | \n",
1830 | " 27.0 | \n",
1831 | " Word, Excel, PowerPoint | \n",
1832 | " neutral | \n",
1833 | " satisfied | \n",
1834 | " very satisfied | \n",
1835 | " very satisfied | \n",
1836 | " 3.0 | \n",
1837 | " 3 | \n",
1838 | " ... | \n",
1839 | " 0 | \n",
1840 | " 1 | \n",
1841 | " 0 | \n",
1842 | " 1 | \n",
1843 | " 0 | \n",
1844 | " 0 | \n",
1845 | " 0 | \n",
1846 | " 0 | \n",
1847 | " 0 | \n",
1848 | " 1 | \n",
1849 | "
\n",
1850 | " \n",
1851 | " | 5 | \n",
1852 | " 2021-02-20 08:27:36.059 | \n",
1853 | " F | \n",
1854 | " 19.0 | \n",
1855 | " Excel, Python, R | \n",
1856 | " satisfied | \n",
1857 | " satisfied | \n",
1858 | " satisfied | \n",
1859 | " satisfied | \n",
1860 | " 4.0 | \n",
1861 | " 4 | \n",
1862 | " ... | \n",
1863 | " 0 | \n",
1864 | " 1 | \n",
1865 | " 0 | \n",
1866 | " 0 | \n",
1867 | " 1 | \n",
1868 | " 1 | \n",
1869 | " 0 | \n",
1870 | " 0 | \n",
1871 | " 0 | \n",
1872 | " 0 | \n",
1873 | "
\n",
1874 | " \n",
1875 | " | 6 | \n",
1876 | " 2021-02-20 08:28:19.725 | \n",
1877 | " X | \n",
1878 | " 30.0 | \n",
1879 | " SQL, R, SPSS, SAS | \n",
1880 | " dissatisfied | \n",
1881 | " satisfied | \n",
1882 | " neutral | \n",
1883 | " very satisfied | \n",
1884 | " 2.0 | \n",
1885 | " 2 | \n",
1886 | " ... | \n",
1887 | " 0 | \n",
1888 | " 0 | \n",
1889 | " 0 | \n",
1890 | " 0 | \n",
1891 | " 0 | \n",
1892 | " 1 | \n",
1893 | " 1 | \n",
1894 | " 1 | \n",
1895 | " 1 | \n",
1896 | " 0 | \n",
1897 | "
\n",
1898 | " \n",
1899 | " | 7 | \n",
1900 | " 2021-02-20 09:16:02.448 | \n",
1901 | " X | \n",
1902 | " 24.0 | \n",
1903 | " PowerPoint | \n",
1904 | " satisfied | \n",
1905 | " very satisfied | \n",
1906 | " satisfied | \n",
1907 | " very satisfied | \n",
1908 | " 4.0 | \n",
1909 | " 4 | \n",
1910 | " ... | \n",
1911 | " 0 | \n",
1912 | " 0 | \n",
1913 | " 0 | \n",
1914 | " 1 | \n",
1915 | " 0 | \n",
1916 | " 0 | \n",
1917 | " 0 | \n",
1918 | " 0 | \n",
1919 | " 0 | \n",
1920 | " 0 | \n",
1921 | "
\n",
1922 | " \n",
1923 | " | 8 | \n",
1924 | " 2021-02-20 09:22:23.851 | \n",
1925 | " M | \n",
1926 | " NaN | \n",
1927 | " Word | \n",
1928 | " very dissatisfied | \n",
1929 | " dissatisfied | \n",
1930 | " neutral | \n",
1931 | " NaN | \n",
1932 | " 1.0 | \n",
1933 | " 1 | \n",
1934 | " ... | \n",
1935 | " 0 | \n",
1936 | " 0 | \n",
1937 | " 0 | \n",
1938 | " 0 | \n",
1939 | " 0 | \n",
1940 | " 0 | \n",
1941 | " 0 | \n",
1942 | " 0 | \n",
1943 | " 0 | \n",
1944 | " 1 | \n",
1945 | "
\n",
1946 | " \n",
1947 | " | 9 | \n",
1948 | " 2021-02-20 09:23:22.259 | \n",
1949 | " NaN | \n",
1950 | " NaN | \n",
1951 | " Excel | \n",
1952 | " NaN | \n",
1953 | " NaN | \n",
1954 | " NaN | \n",
1955 | " satisfied | \n",
1956 | " NaN | \n",
1957 | " 0 | \n",
1958 | " ... | \n",
1959 | " 0 | \n",
1960 | " 1 | \n",
1961 | " 0 | \n",
1962 | " 0 | \n",
1963 | " 0 | \n",
1964 | " 0 | \n",
1965 | " 0 | \n",
1966 | " 0 | \n",
1967 | " 0 | \n",
1968 | " 0 | \n",
1969 | "
\n",
1970 | " \n",
1971 | "
\n",
1972 | "
10 rows × 26 columns
\n",
1973 | "
"
1974 | ],
1975 | "text/plain": [
1976 | " timestamp gender age \\\n",
1977 | "0 2021-02-19 22:36:35.982 M 24.0 \n",
1978 | "1 2021-02-19 22:38:17.913 M 22.0 \n",
1979 | "2 2021-02-19 22:49:14.802 X 35.0 \n",
1980 | "3 2021-02-19 23:01:24.057 F 23.0 \n",
1981 | "4 2021-02-20 08:27:19.584 F 27.0 \n",
1982 | "5 2021-02-20 08:27:36.059 F 19.0 \n",
1983 | "6 2021-02-20 08:28:19.725 X 30.0 \n",
1984 | "7 2021-02-20 09:16:02.448 X 24.0 \n",
1985 | "8 2021-02-20 09:22:23.851 M NaN \n",
1986 | "9 2021-02-20 09:23:22.259 NaN NaN \n",
1987 | "\n",
1988 | " skills product \\\n",
1989 | "0 Word, Excel, PowerPoint, Access very dissatisfied \n",
1990 | "1 Excel, SQL very satisfied \n",
1991 | "2 Word, Excel, PowerPoint, SPSS very dissatisfied \n",
1992 | "3 Word, Excel, PowerPoint, SQL, Python, R, JavaS... very satisfied \n",
1993 | "4 Word, Excel, PowerPoint neutral \n",
1994 | "5 Excel, Python, R satisfied \n",
1995 | "6 SQL, R, SPSS, SAS dissatisfied \n",
1996 | "7 PowerPoint satisfied \n",
1997 | "8 Word very dissatisfied \n",
1998 | "9 Excel NaN \n",
1999 | "\n",
2000 | " price service overall product_n product_cat \\\n",
2001 | "0 dissatisfied neutral satisfied 1.0 1 \n",
2002 | "1 satisfied neutral dissatisfied 5.0 5 \n",
2003 | "2 dissatisfied neutral dissatisfied 1.0 1 \n",
2004 | "3 satisfied neutral dissatisfied 5.0 5 \n",
2005 | "4 satisfied very satisfied very satisfied 3.0 3 \n",
2006 | "5 satisfied satisfied satisfied 4.0 4 \n",
2007 | "6 satisfied neutral very satisfied 2.0 2 \n",
2008 | "7 very satisfied satisfied very satisfied 4.0 4 \n",
2009 | "8 dissatisfied neutral NaN 1.0 1 \n",
2010 | "9 NaN NaN satisfied NaN 0 \n",
2011 | "\n",
2012 | " ... Access Excel JavaScript PowerPoint Python R SAS SPSS SQL Word \n",
2013 | "0 ... 1 1 0 1 0 0 0 0 0 1 \n",
2014 | "1 ... 0 1 0 0 0 0 0 0 1 0 \n",
2015 | "2 ... 0 1 0 1 0 0 0 1 0 1 \n",
2016 | "3 ... 0 1 1 1 1 1 0 0 1 1 \n",
2017 | "4 ... 0 1 0 1 0 0 0 0 0 1 \n",
2018 | "5 ... 0 1 0 0 1 1 0 0 0 0 \n",
2019 | "6 ... 0 0 0 0 0 1 1 1 1 0 \n",
2020 | "7 ... 0 0 0 1 0 0 0 0 0 0 \n",
2021 | "8 ... 0 0 0 0 0 0 0 0 0 1 \n",
2022 | "9 ... 0 1 0 0 0 0 0 0 0 0 \n",
2023 | "\n",
2024 | "[10 rows x 26 columns]"
2025 | ]
2026 | },
2027 | "execution_count": 76,
2028 | "metadata": {},
2029 | "output_type": "execute_result"
2030 | }
2031 | ],
2032 | "source": [
2033 | "dt=pd.concat([df, ds], axis=1).copy()\n",
2034 | "dt"
2035 | ]
2036 | },
2037 | {
2038 | "cell_type": "code",
2039 | "execution_count": 77,
2040 | "metadata": {},
2041 | "outputs": [
2042 | {
2043 | "data": {
2044 | "text/plain": [
2045 | "Index(['timestamp', 'gender', 'age', 'skills', 'product', 'price', 'service',\n",
2046 | " 'overall', 'product_n', 'product_cat', 'price_n', 'price_cat',\n",
2047 | " 'service_n', 'service_cat', 'overall_n', 'overall_cat', 'Access',\n",
2048 | " 'Excel', 'JavaScript', 'PowerPoint', 'Python', 'R', 'SAS', 'SPSS',\n",
2049 | " 'SQL', 'Word'],\n",
2050 | " dtype='object')"
2051 | ]
2052 | },
2053 | "execution_count": 77,
2054 | "metadata": {},
2055 | "output_type": "execute_result"
2056 | }
2057 | ],
2058 | "source": [
2059 | "dt.columns"
2060 | ]
2061 | },
2062 | {
2063 | "cell_type": "code",
2064 | "execution_count": 78,
2065 | "metadata": {},
2066 | "outputs": [
2067 | {
2068 | "data": {
2069 | "text/html": [
2070 | "\n",
2071 | "\n",
2084 | "
\n",
2085 | " \n",
2086 | " \n",
2087 | " | \n",
2088 | " count | \n",
2089 | " mean | \n",
2090 | " std | \n",
2091 | " min | \n",
2092 | " 25% | \n",
2093 | " 50% | \n",
2094 | " 75% | \n",
2095 | " max | \n",
2096 | "
\n",
2097 | " \n",
2098 | " \n",
2099 | " \n",
2100 | " | age | \n",
2101 | " 8.0 | \n",
2102 | " 25.500000 | \n",
2103 | " 5.042675 | \n",
2104 | " 19.0 | \n",
2105 | " 22.75 | \n",
2106 | " 24.0 | \n",
2107 | " 27.75 | \n",
2108 | " 35.0 | \n",
2109 | "
\n",
2110 | " \n",
2111 | " | product_n | \n",
2112 | " 9.0 | \n",
2113 | " 2.888889 | \n",
2114 | " 1.691482 | \n",
2115 | " 1.0 | \n",
2116 | " 1.00 | \n",
2117 | " 3.0 | \n",
2118 | " 4.00 | \n",
2119 | " 5.0 | \n",
2120 | "
\n",
2121 | " \n",
2122 | " | product_cat | \n",
2123 | " 10.0 | \n",
2124 | " 2.600000 | \n",
2125 | " 1.837873 | \n",
2126 | " 0.0 | \n",
2127 | " 1.00 | \n",
2128 | " 2.5 | \n",
2129 | " 4.00 | \n",
2130 | " 5.0 | \n",
2131 | "
\n",
2132 | " \n",
2133 | " | price_n | \n",
2134 | " 9.0 | \n",
2135 | " 3.444444 | \n",
2136 | " 1.130388 | \n",
2137 | " 2.0 | \n",
2138 | " 2.00 | \n",
2139 | " 4.0 | \n",
2140 | " 4.00 | \n",
2141 | " 5.0 | \n",
2142 | "
\n",
2143 | " \n",
2144 | " | price_cat | \n",
2145 | " 10.0 | \n",
2146 | " 3.100000 | \n",
2147 | " 1.523884 | \n",
2148 | " 0.0 | \n",
2149 | " 2.00 | \n",
2150 | " 4.0 | \n",
2151 | " 4.00 | \n",
2152 | " 5.0 | \n",
2153 | "
\n",
2154 | " \n",
2155 | " | service_n | \n",
2156 | " 9.0 | \n",
2157 | " 3.444444 | \n",
2158 | " 0.726483 | \n",
2159 | " 3.0 | \n",
2160 | " 3.00 | \n",
2161 | " 3.0 | \n",
2162 | " 4.00 | \n",
2163 | " 5.0 | \n",
2164 | "
\n",
2165 | " \n",
2166 | " | service_cat | \n",
2167 | " 10.0 | \n",
2168 | " 3.100000 | \n",
2169 | " 1.286684 | \n",
2170 | " 0.0 | \n",
2171 | " 3.00 | \n",
2172 | " 3.0 | \n",
2173 | " 3.75 | \n",
2174 | " 5.0 | \n",
2175 | "
\n",
2176 | " \n",
2177 | " | overall_n | \n",
2178 | " 9.0 | \n",
2179 | " 3.666667 | \n",
2180 | " 1.322876 | \n",
2181 | " 2.0 | \n",
2182 | " 2.00 | \n",
2183 | " 4.0 | \n",
2184 | " 5.00 | \n",
2185 | " 5.0 | \n",
2186 | "
\n",
2187 | " \n",
2188 | " | overall_cat | \n",
2189 | " 10.0 | \n",
2190 | " 3.300000 | \n",
2191 | " 1.702939 | \n",
2192 | " 0.0 | \n",
2193 | " 2.00 | \n",
2194 | " 4.0 | \n",
2195 | " 4.75 | \n",
2196 | " 5.0 | \n",
2197 | "
\n",
2198 | " \n",
2199 | " | Access | \n",
2200 | " 10.0 | \n",
2201 | " 0.100000 | \n",
2202 | " 0.316228 | \n",
2203 | " 0.0 | \n",
2204 | " 0.00 | \n",
2205 | " 0.0 | \n",
2206 | " 0.00 | \n",
2207 | " 1.0 | \n",
2208 | "
\n",
2209 | " \n",
2210 | " | Excel | \n",
2211 | " 10.0 | \n",
2212 | " 0.700000 | \n",
2213 | " 0.483046 | \n",
2214 | " 0.0 | \n",
2215 | " 0.25 | \n",
2216 | " 1.0 | \n",
2217 | " 1.00 | \n",
2218 | " 1.0 | \n",
2219 | "
\n",
2220 | " \n",
2221 | " | JavaScript | \n",
2222 | " 10.0 | \n",
2223 | " 0.100000 | \n",
2224 | " 0.316228 | \n",
2225 | " 0.0 | \n",
2226 | " 0.00 | \n",
2227 | " 0.0 | \n",
2228 | " 0.00 | \n",
2229 | " 1.0 | \n",
2230 | "
\n",
2231 | " \n",
2232 | " | PowerPoint | \n",
2233 | " 10.0 | \n",
2234 | " 0.500000 | \n",
2235 | " 0.527046 | \n",
2236 | " 0.0 | \n",
2237 | " 0.00 | \n",
2238 | " 0.5 | \n",
2239 | " 1.00 | \n",
2240 | " 1.0 | \n",
2241 | "
\n",
2242 | " \n",
2243 | " | Python | \n",
2244 | " 10.0 | \n",
2245 | " 0.200000 | \n",
2246 | " 0.421637 | \n",
2247 | " 0.0 | \n",
2248 | " 0.00 | \n",
2249 | " 0.0 | \n",
2250 | " 0.00 | \n",
2251 | " 1.0 | \n",
2252 | "
\n",
2253 | " \n",
2254 | " | R | \n",
2255 | " 10.0 | \n",
2256 | " 0.300000 | \n",
2257 | " 0.483046 | \n",
2258 | " 0.0 | \n",
2259 | " 0.00 | \n",
2260 | " 0.0 | \n",
2261 | " 0.75 | \n",
2262 | " 1.0 | \n",
2263 | "
\n",
2264 | " \n",
2265 | " | SAS | \n",
2266 | " 10.0 | \n",
2267 | " 0.100000 | \n",
2268 | " 0.316228 | \n",
2269 | " 0.0 | \n",
2270 | " 0.00 | \n",
2271 | " 0.0 | \n",
2272 | " 0.00 | \n",
2273 | " 1.0 | \n",
2274 | "
\n",
2275 | " \n",
2276 | " | SPSS | \n",
2277 | " 10.0 | \n",
2278 | " 0.200000 | \n",
2279 | " 0.421637 | \n",
2280 | " 0.0 | \n",
2281 | " 0.00 | \n",
2282 | " 0.0 | \n",
2283 | " 0.00 | \n",
2284 | " 1.0 | \n",
2285 | "
\n",
2286 | " \n",
2287 | " | SQL | \n",
2288 | " 10.0 | \n",
2289 | " 0.300000 | \n",
2290 | " 0.483046 | \n",
2291 | " 0.0 | \n",
2292 | " 0.00 | \n",
2293 | " 0.0 | \n",
2294 | " 0.75 | \n",
2295 | " 1.0 | \n",
2296 | "
\n",
2297 | " \n",
2298 | " | Word | \n",
2299 | " 10.0 | \n",
2300 | " 0.500000 | \n",
2301 | " 0.527046 | \n",
2302 | " 0.0 | \n",
2303 | " 0.00 | \n",
2304 | " 0.5 | \n",
2305 | " 1.00 | \n",
2306 | " 1.0 | \n",
2307 | "
\n",
2308 | " \n",
2309 | "
\n",
2310 | "
"
2311 | ],
2312 | "text/plain": [
2313 | " count mean std min 25% 50% 75% max\n",
2314 | "age 8.0 25.500000 5.042675 19.0 22.75 24.0 27.75 35.0\n",
2315 | "product_n 9.0 2.888889 1.691482 1.0 1.00 3.0 4.00 5.0\n",
2316 | "product_cat 10.0 2.600000 1.837873 0.0 1.00 2.5 4.00 5.0\n",
2317 | "price_n 9.0 3.444444 1.130388 2.0 2.00 4.0 4.00 5.0\n",
2318 | "price_cat 10.0 3.100000 1.523884 0.0 2.00 4.0 4.00 5.0\n",
2319 | "service_n 9.0 3.444444 0.726483 3.0 3.00 3.0 4.00 5.0\n",
2320 | "service_cat 10.0 3.100000 1.286684 0.0 3.00 3.0 3.75 5.0\n",
2321 | "overall_n 9.0 3.666667 1.322876 2.0 2.00 4.0 5.00 5.0\n",
2322 | "overall_cat 10.0 3.300000 1.702939 0.0 2.00 4.0 4.75 5.0\n",
2323 | "Access 10.0 0.100000 0.316228 0.0 0.00 0.0 0.00 1.0\n",
2324 | "Excel 10.0 0.700000 0.483046 0.0 0.25 1.0 1.00 1.0\n",
2325 | "JavaScript 10.0 0.100000 0.316228 0.0 0.00 0.0 0.00 1.0\n",
2326 | "PowerPoint 10.0 0.500000 0.527046 0.0 0.00 0.5 1.00 1.0\n",
2327 | "Python 10.0 0.200000 0.421637 0.0 0.00 0.0 0.00 1.0\n",
2328 | "R 10.0 0.300000 0.483046 0.0 0.00 0.0 0.75 1.0\n",
2329 | "SAS 10.0 0.100000 0.316228 0.0 0.00 0.0 0.00 1.0\n",
2330 | "SPSS 10.0 0.200000 0.421637 0.0 0.00 0.0 0.00 1.0\n",
2331 | "SQL 10.0 0.300000 0.483046 0.0 0.00 0.0 0.75 1.0\n",
2332 | "Word 10.0 0.500000 0.527046 0.0 0.00 0.5 1.00 1.0"
2333 | ]
2334 | },
2335 | "execution_count": 78,
2336 | "metadata": {},
2337 | "output_type": "execute_result"
2338 | }
2339 | ],
2340 | "source": [
2341 | "dt.describe().T"
2342 | ]
2343 | },
2344 | {
2345 | "cell_type": "code",
2346 | "execution_count": 79,
2347 | "metadata": {},
2348 | "outputs": [
2349 | {
2350 | "data": {
2351 | "text/plain": [
2352 | "Access 1\n",
2353 | "Excel 7\n",
2354 | "JavaScript 1\n",
2355 | "PowerPoint 5\n",
2356 | "Python 2\n",
2357 | "R 3\n",
2358 | "SAS 1\n",
2359 | "SPSS 2\n",
2360 | "SQL 3\n",
2361 | "Word 5\n",
2362 | "dtype: int64"
2363 | ]
2364 | },
2365 | "execution_count": 79,
2366 | "metadata": {},
2367 | "output_type": "execute_result"
2368 | }
2369 | ],
2370 | "source": [
2371 | "dt.loc[:, 'Access':'Word'].sum()"
2372 | ]
2373 | },
2374 | {
2375 | "cell_type": "code",
2376 | "execution_count": 80,
2377 | "metadata": {},
2378 | "outputs": [
2379 | {
2380 | "data": {
2381 | "text/plain": [
2382 | "0 4\n",
2383 | "1 2\n",
2384 | "2 4\n",
2385 | "3 7\n",
2386 | "4 3\n",
2387 | "5 3\n",
2388 | "6 4\n",
2389 | "7 1\n",
2390 | "8 1\n",
2391 | "9 1\n",
2392 | "dtype: int64"
2393 | ]
2394 | },
2395 | "execution_count": 80,
2396 | "metadata": {},
2397 | "output_type": "execute_result"
2398 | }
2399 | ],
2400 | "source": [
2401 | "dt.loc[:, 'Access':'Word'].sum(axis=1)"
2402 | ]
2403 | }
2404 | ],
2405 | "metadata": {
2406 | "kernelspec": {
2407 | "display_name": "Python 3",
2408 | "language": "python",
2409 | "name": "python3"
2410 | },
2411 | "language_info": {
2412 | "codemirror_mode": {
2413 | "name": "ipython",
2414 | "version": 3
2415 | },
2416 | "file_extension": ".py",
2417 | "mimetype": "text/x-python",
2418 | "name": "python",
2419 | "nbconvert_exporter": "python",
2420 | "pygments_lexer": "ipython3",
2421 | "version": "3.7.9"
2422 | },
2423 | "widgets": {
2424 | "application/vnd.jupyter.widget-state+json": {
2425 | "state": {},
2426 | "version_major": 2,
2427 | "version_minor": 0
2428 | }
2429 | }
2430 | },
2431 | "nbformat": 4,
2432 | "nbformat_minor": 4
2433 | }
2434 |
--------------------------------------------------------------------------------