├── data_go_th_api_key.ipynb ├── read_tis-620 file.ipynb ├── bangkok open data.ipynb ├── data_go_th_api.ipynb ├── README.md └── pandas_transform_google_form_data2.ipynb /data_go_th_api_key.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# data.go.th: Data API\n", 8 | "\n", 9 | "web: https://opendata.data.go.th/\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "---\n", 17 | "* author: [Prasert Kanawattanachai](prasert.k@chula.ac.th)\n", 18 | "* YouTube: https://www.youtube.com/prasertcbs\n", 19 | "* github: https://github.com/prasertcbs/\n", 20 | "* kaggle: https://www.kaggle.com/prasertk/\n", 21 | "* [Chulalongkorn Business School](https://www.cbs.chula.ac.th/en/)\n", 22 | "---\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "

Play YouTube Video

" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 1, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "import pandas as pd\n", 39 | "import requests\n" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": { 46 | "colab": { 47 | "base_uri": "https://localhost:8080/", 48 | "height": 64 49 | }, 50 | "colab_type": "code", 51 | "id": "fdr0pYIf7P-_", 52 | "outputId": "c919deae-c99b-44b0-8924-4d2355ca0b63" 53 | }, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "pandas version: 1.4.2\n", 60 | "requests version: 2.28.0\n" 61 | ] 62 | }, 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "Timestamp('2022-07-12 19:44:50.631955')" 67 | ] 68 | }, 69 | "execution_count": 2, 70 | "metadata": {}, 71 | "output_type": "execute_result" 72 | } 73 | ], 74 | "source": [ 75 | "print(f\"pandas version: {pd.__version__}\")\n", 76 | "print(f\"requests version: {requests.__version__}\")\n", 77 | "\n", 78 | "pd.Timestamp.now()\n", 79 | "# pd.Timestamp.now().strftime('%Y-%m-%d')\n" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "## Data API\n" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 3, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "from getpass import getpass\n", 96 | "api_key = getpass()" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "- ข้อมูลโรงแรมที่เป็นมิตรกับสิ่งแวดล้อม (Green Hotel)\n", 104 | " * https://opendata.data.go.th/dataset/db0103-002\n", 105 | " * https://opend.data.go.th/get-ckan/datastore_search?resource_id=d13e3be5-9020-4a78-9a7e-760b2dcbf6be&limit=5\n", 106 | "- จำนวนคนทำงานในที่พักแรม\n", 107 | " * https://opendata.data.go.th/dataset/os_17_00010" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 4, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "data": { 117 | "text/html": [ 118 | "
\n", 119 | "\n", 132 | "\n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | "
_idNoAgencyProjectBudget YearAwardTumbolAmphurProvincePostcodeTelephoneStart_DateEnd_Date
011โรงแรมดุสิตธานี พัทยา ชลบุรีGreen Hotel2556ระดับดีเยี่ยม (ทอง)นาเกลือบางละมุงชลบุรี20150038 425 61125562558
122โรงแรมสยามเบย์ ชอว์ พัทยา ชลบุรีGreen Hotel2556ระดับดีเยี่ยม (ทอง)บางละมุงชลบุรี20150038 428 67825562558
233โรงแรมไทยการ์เด้นรีสอร์ท พัทยา ชลบุรีGreen Hotel2556ระดับดีเยี่ยม (ทอง)หนองปรือบางละมุงชลบุรี20150038 370 61425562558
344โรงแรมลองบีช รีสอร์ท พัทยา ชลบุรีGreen Hotel2556ระดับดีเยี่ยม (ทอง)บางละมุงชลบุรี20150038-414-616-2625562558
455โรงแรมบ้านอัมพวารีสอร์ทแอนด์สปา สมุทรสงครามGreen Hotel2556ระดับดีเยี่ยม (ทอง)อัมพวาอัมพวาสมุทรสงคราม75110034 752 22225562558
566โรงแรมบ้านทะเลดาว รีสอร์ท ประจวบคีรีขันธ์Green Hotel2556ระดับดีเยี่ยม (ทอง)หัวหินประจวบคีรีขันธ์77110083 253 602425562558
677โรงแรมบ้านบาหยัน รีสอร์ท หัวหิน ประจวบคีรีขันธ์Green Hotel2556ระดับดีเยี่ยม (ทอง)หัวหินประจวบคีรีขันธ์77110032 533 54425562558
788โรงแรมสยามเคมเปนสกี้ กรุงเทพฯGreen Hotel2556ระดับดีเยี่ยม (ทอง)ปทุมวันปทุมวันกรุงเทพมหานคร1033002 162 900025562558
899โรงแรมอมารีดอนเมือง แอร์พอร์ต กรุงเทพฯGreen Hotel2556ระดับดีเยี่ยม (ทอง)สีกันดอนเมืองกรุงเทพมหานคร1021002 566 194125562558
91010โรงแรมสยามเบย์วิว พัทยา ชลบุรีGreen Hotel2556ระดับดีมาก (เงิน)หนองปรือบางละมุงชลบุรี20150038 423 87125562558
\n", 314 | "
" 315 | ], 316 | "text/plain": [ 317 | " _id No Agency Project \\\n", 318 | "0 1 1 โรงแรมดุสิตธานี พัทยา ชลบุรี Green Hotel \n", 319 | "1 2 2 โรงแรมสยามเบย์ ชอว์ พัทยา ชลบุรี Green Hotel \n", 320 | "2 3 3 โรงแรมไทยการ์เด้นรีสอร์ท พัทยา ชลบุรี Green Hotel \n", 321 | "3 4 4 โรงแรมลองบีช รีสอร์ท พัทยา ชลบุรี Green Hotel \n", 322 | "4 5 5 โรงแรมบ้านอัมพวารีสอร์ทแอนด์สปา สมุทรสงคราม Green Hotel \n", 323 | "5 6 6 โรงแรมบ้านทะเลดาว รีสอร์ท ประจวบคีรีขันธ์ Green Hotel \n", 324 | "6 7 7 โรงแรมบ้านบาหยัน รีสอร์ท หัวหิน ประจวบคีรีขันธ์ Green Hotel \n", 325 | "7 8 8 โรงแรมสยามเคมเปนสกี้ กรุงเทพฯ Green Hotel \n", 326 | "8 9 9 โรงแรมอมารีดอนเมือง แอร์พอร์ต กรุงเทพฯ Green Hotel \n", 327 | "9 10 10 โรงแรมสยามเบย์วิว พัทยา ชลบุรี Green Hotel \n", 328 | "\n", 329 | " Budget Year Award Tumbol Amphur Province \\\n", 330 | "0 2556 ระดับดีเยี่ยม (ทอง) นาเกลือ บางละมุง ชลบุรี \n", 331 | "1 2556 ระดับดีเยี่ยม (ทอง) บางละมุง ชลบุรี \n", 332 | "2 2556 ระดับดีเยี่ยม (ทอง) หนองปรือ บางละมุง ชลบุรี \n", 333 | "3 2556 ระดับดีเยี่ยม (ทอง) บางละมุง ชลบุรี \n", 334 | "4 2556 ระดับดีเยี่ยม (ทอง) อัมพวา อัมพวา สมุทรสงคราม \n", 335 | "5 2556 ระดับดีเยี่ยม (ทอง) หัวหิน ประจวบคีรีขันธ์ \n", 336 | "6 2556 ระดับดีเยี่ยม (ทอง) หัวหิน ประจวบคีรีขันธ์ \n", 337 | "7 2556 ระดับดีเยี่ยม (ทอง) ปทุมวัน ปทุมวัน กรุงเทพมหานคร \n", 338 | "8 2556 ระดับดีเยี่ยม (ทอง) สีกัน ดอนเมือง กรุงเทพมหานคร \n", 339 | "9 2556 ระดับดีมาก (เงิน) หนองปรือ บางละมุง ชลบุรี \n", 340 | "\n", 341 | " Postcode Telephone Start_Date End_Date \n", 342 | "0 20150 038 425 611 2556 2558 \n", 343 | "1 20150 038 428 678 2556 2558 \n", 344 | "2 20150 038 370 614 2556 2558 \n", 345 | "3 20150 038-414-616-26 2556 2558 \n", 346 | "4 75110 034 752 222 2556 2558 \n", 347 | "5 77110 083 253 6024 2556 2558 \n", 348 | "6 77110 032 533 544 2556 2558 \n", 349 | "7 10330 02 162 9000 2556 2558 \n", 350 | "8 10210 02 566 1941 2556 2558 \n", 351 | "9 20150 038 423 871 2556 2558 " 352 | ] 353 | }, 354 | "execution_count": 4, 355 | "metadata": {}, 356 | "output_type": "execute_result" 357 | } 358 | ], 359 | "source": [ 360 | "# Request headers\n", 361 | "headers = {\n", 362 | " \"api-key\": api_key,\n", 363 | "}\n", 364 | "\n", 365 | "params = {\"resource_id\": \"d13e3be5-9020-4a78-9a7e-760b2dcbf6be\", \"limit\": 10}\n", 366 | "r = requests.get(\n", 367 | " \"https://opend.data.go.th/get-ckan/datastore_search\", params, headers=headers\n", 368 | ")\n", 369 | "if r.ok:\n", 370 | " j = r.json()\n", 371 | " records = j[\"result\"][\"records\"]\n", 372 | "df = pd.DataFrame(records)\n", 373 | "df\n" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "metadata": {}, 380 | "outputs": [], 381 | "source": [ 382 | "r.json()" 383 | ] 384 | } 385 | ], 386 | "metadata": { 387 | "kernelspec": { 388 | "display_name": "Python 3.9.12 ('base')", 389 | "language": "python", 390 | "name": "python3" 391 | }, 392 | "language_info": { 393 | "codemirror_mode": { 394 | "name": "ipython", 395 | "version": 3 396 | }, 397 | "file_extension": ".py", 398 | "mimetype": "text/x-python", 399 | "name": "python", 400 | "nbconvert_exporter": "python", 401 | "pygments_lexer": "ipython3", 402 | "version": "3.9.12" 403 | }, 404 | "vscode": { 405 | "interpreter": { 406 | "hash": "629cb9f199f624aadf5cdcebfeb0fdc9652b7a7e432d484384b677cab7914fce" 407 | } 408 | }, 409 | "widgets": { 410 | "application/vnd.jupyter.widget-state+json": { 411 | "state": {}, 412 | "version_major": 2, 413 | "version_minor": 0 414 | } 415 | } 416 | }, 417 | "nbformat": 4, 418 | "nbformat_minor": 4 419 | } 420 | -------------------------------------------------------------------------------- /read_tis-620 file.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "86e1940f", 6 | "metadata": { 7 | "toc-hr-collapsed": false 8 | }, 9 | "source": [ 10 | "# read/write TIS-620 file\n", 11 | "* standard encoding: https://docs.python.org/3.7/library/codecs.html#standard-encodings\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "---\n", 19 | "* author: [Prasert Kanawattanachai](prasert.k@chula.ac.th)\n", 20 | "* YouTube: https://www.youtube.com/prasertcbs\n", 21 | "* github: https://github.com/prasertcbs/\n", 22 | "* kaggle: https://www.kaggle.com/prasertk/\n", 23 | "* [Chulalongkorn Business School](https://www.cbs.chula.ac.th/en/)\n", 24 | "---\n" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 1, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "import pandas as pd\n" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "id": "86e1940f", 39 | "metadata": { 40 | "toc-hr-collapsed": false 41 | }, 42 | "source": [ 43 | "# pandas read TIS-620 file from URL\n" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "text/html": [ 54 | "
\n", 55 | "\n", 68 | "\n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | "
abbrprovince_thprovince_en
0กจกาญจนบุรีKanchanaburi
1กทมกรุงเทพมหานครBangkok
2จบจันทบุรีChanthaburi
3ฉชฉะเชิงเทราChachoengsao
4ชนชัยนาทChainat
............
72ยลยะลาYala
73รนระนองRanong
74สขสงขลาSongkhla
75สฎสุราษฎร์ธานีSurat Thani
76สตสตูลSatun
\n", 146 | "

77 rows × 3 columns

\n", 147 | "
" 148 | ], 149 | "text/plain": [ 150 | " abbr province_th province_en\n", 151 | "0 กจ กาญจนบุรี Kanchanaburi\n", 152 | "1 กทม กรุงเทพมหานคร Bangkok\n", 153 | "2 จบ จันทบุรี Chanthaburi\n", 154 | "3 ฉช ฉะเชิงเทรา Chachoengsao\n", 155 | "4 ชน ชัยนาท Chainat\n", 156 | ".. ... ... ...\n", 157 | "72 ยล ยะลา Yala\n", 158 | "73 รน ระนอง Ranong\n", 159 | "74 สข สงขลา Songkhla\n", 160 | "75 สฎ สุราษฎร์ธานี Surat Thani\n", 161 | "76 สต สตูล Satun\n", 162 | "\n", 163 | "[77 rows x 3 columns]" 164 | ] 165 | }, 166 | "metadata": {}, 167 | "output_type": "display_data" 168 | } 169 | ], 170 | "source": [ 171 | "url = \"https://raw.githubusercontent.com/prasertcbs/basic-dataset/master/province_tis.csv\"\n", 172 | "# url = \"https://data.go.th/dataset/c24ec42f-db0d-4a33-9e13-a98cd22de74d/resource/3d222c43-dea2-43f5-ad0d-7f0e58d4ad54/download/flightrule_11_2021.csv\"\n", 173 | "# df = pd.read_csv(url, encoding=\"utf-8\")\n", 174 | "df = pd.read_csv(url, encoding=\"iso8859_11\")\n", 175 | "# df = pd.read_csv(url, encoding=\"tis-620\")\n", 176 | "# df = pd.read_csv(url, encoding=\"thai\")\n", 177 | "# df = pd.read_csv(url, encoding=\"cp874\")\n", 178 | "df\n" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 9, 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "df.to_csv(\"province_utf-8.csv\", index=False)\n", 188 | "\n" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "# Save as TIS-620\n" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 5, 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [ 204 | "df.to_csv(\"province_tis-620.csv\", index=False, encoding=\"iso8859_11\")\n", 205 | "\n" 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "metadata": {}, 211 | "source": [ 212 | "# Read TIS-620 file from local\n" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 6, 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "name": "stdout", 222 | "output_type": "stream", 223 | "text": [ 224 | "abbr,province_th,province_en\n", 225 | "กจ,กาญจนบุรี,Kanchanaburi\n", 226 | "กทม,กรุงเทพมหานคร,Bangkok\n", 227 | "จบ,จันทบุรี,Chanthaburi\n", 228 | "ฉช,ฉะเชิงเทรา,Chachoengsao\n", 229 | "ชน,ชัยนาท,Chainat\n", 230 | "ชบ,ชลบุรี,Chonburi\n", 231 | "ตร,ตราด,Trat\n", 232 | "นฐ,นครปฐม,Nakhon Pathom\n", 233 | "นบ,นนทบุรี,Nonthaburi\n", 234 | "นย,นครนายก,Nakhon Nayok\n", 235 | "ปข,ประจวบคีรีขันธ์,Prachuap Khiri Khan\n", 236 | "ปจ,ปราจีนบุรี,Prachinburi\n", 237 | "ปท,ปทุมธานี,Pathum Thani\n", 238 | "พบ,เพชรบุรี,Phetchaburi\n", 239 | "รบ,ราชบุรี,Ratchaburi\n", 240 | "รย,ระยอง,Rayong\n", 241 | "ลบ,ลพบุรี,Lopburi\n", 242 | "สก,สระแก้ว,Sa Kaeo\n", 243 | "สค,สมุทรสาคร,Samut Sakhon\n", 244 | "สบ,สระบุรี,Saraburi\n", 245 | "สป,สมุทรปราการ,Samut Prakan\n", 246 | "สพ,สุพรรณบุรี,Suphan Buri\n", 247 | "สส,สมุทรสงคราม,Samut Songkhram\n", 248 | "สห,สิงห์บุรี,Sing Buri\n", 249 | "อท,อ่างทอง,Ang Thong\n", 250 | "อย,พระนครศรีอยุธยา,Phra Nakhon Si Ayutthaya\n", 251 | "กพ,กำแพงเพชร,Kamphaeng Phet\n", 252 | "ชม,เชียงใหม่,Chiang Mai\n", 253 | "ชร,เชียงราย,Chiang Rai\n", 254 | "ตก,ตาก,Tak\n", 255 | "นน,น่าน,Nan\n", 256 | "นว,นครสวรรค์,Nakhon Sawan\n", 257 | "พจ,พิจิตร,Phichit\n", 258 | "พช,เพชรบูรณ์,Phetchabun\n", 259 | "พย,พะเยา,Phayao\n", 260 | "พร,แพร่,Phrae\n", 261 | "พล,พิษณุโลก,Phitsanulok\n", 262 | "มส,แม่ฮ่องสอน,Mae Hong Son\n", 263 | "ลป,ลำปาง,Lampang\n", 264 | "ลพ,ลำพูน,Lamphun\n", 265 | "สท,สุโขทัย,Sukhothai\n", 266 | "อต,อุตรดิตถ์,Uttaradit\n", 267 | "อน,อุทัยธานี,Uthai Thani\n", 268 | "กส,กาฬสินธุ์,Kalasin\n", 269 | "ขก,ขอนแก่น,Khon Kaen\n", 270 | "ชย,ชัยภูมิ,Chaiyaphum\n", 271 | "นค,หนองคาย,Nong Khai\n", 272 | "นพ,นครพนม,Nakhon Phanom\n", 273 | "นภ,หนองบัวลำภู,Nong Bua Lamphu\n", 274 | "นม,นครราชสีมา,Nakhon Ratchasima\n", 275 | "บก,บึงกาฬ,Bueng Kan\n", 276 | "บร,บุรีรัมย์,Buriram\n", 277 | "มค,มหาสารคาม,Maha Sarakham\n", 278 | "มห,มุกดาหาร,Mukdahan\n", 279 | "ยส,ยโสธร,Yasothon\n", 280 | "รอ,ร้อยเอ็ด,Roi Et\n", 281 | "ลย,เลย,Loei\n", 282 | "ศก,ศรีสะเกษ,Sisaket\n", 283 | "สน,สกลนคร,Sakon Nakhon\n", 284 | "สร,สุรินทร์,Surin\n", 285 | "อจ,อำนาจเจริญ,Amnat Charoen\n", 286 | "อด,อุดรธานี,Udon Thani\n", 287 | "อบ,อุบลราชธานี,Ubon Ratchathani\n", 288 | "กบ,กระบี่,Krabi\n", 289 | "ชพ,ชุมพร,Chumphon\n", 290 | "ตง,ตรัง,Trang\n", 291 | "นธ,นราธิวาส,Narathiwat\n", 292 | "นศ,นครศรีธรรมราช,Nakhon Si Thammarat\n", 293 | "ปน,ปัตตานี,Pattani\n", 294 | "พง,พังงา,Phang Nga\n", 295 | "พท,พัทลุง,Phattalung\n", 296 | "ภก,ภูเก็ต,Phuket\n", 297 | "ยล,ยะลา,Yala\n", 298 | "รน,ระนอง,Ranong\n", 299 | "สข,สงขลา,Songkhla\n", 300 | "สฎ,สุราษฎร์ธานี,Surat Thani\n", 301 | "สต,สตูล,Satun\n", 302 | "\n" 303 | ] 304 | } 305 | ], 306 | "source": [ 307 | "with open(\"province_tis-620.csv\", \"r\", encoding=\"iso8859_11\") as f:\n", 308 | " print(f.read())\n", 309 | "\n" 310 | ] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "metadata": {}, 315 | "source": [ 316 | "# read text file from URL\n" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": null, 322 | "metadata": {}, 323 | "outputs": [ 324 | { 325 | "name": "stdout", 326 | "output_type": "stream", 327 | "text": [ 328 | "p_id,name,name_en,region,area_km2\n", 329 | "กจ,กาญจนบุรี,Kanchanaburi,C,19483.148\n", 330 | "กทม,กรุงเทพมหานคร,Bangkok,C,1568.737\n", 331 | "จบ,จันทบุรี,Chanthaburi,C,6338\n", 332 | "ฉช,ฉะเชิงเทรา,Chachoengsao,C,5351\n", 333 | "ชน,ชัยนาท,Chainat,C,2469.746\n", 334 | "ชบ,ชลบุรี,Chonburi,C,4611.829\n", 335 | "ตร,ตราด,Trat,C,2819\n", 336 | "นฐ,นครปฐม,Nakhon Pathom,C,2168.327\n", 337 | "นบ,นนทบุรี,Nonthaburi,C,622.303\n", 338 | "นย,นครนายก,Nakhon Nayok,C,2122\n", 339 | "ปข,ประจวบคีรีขันธ์,Prachuap Khiri Khan,C,6367.62\n", 340 | "ปจ,ปราจีนบุรี,Prachinburi,C,4762.362\n", 341 | "ปท,ปทุมธานี,Pathum Thani,C,1525.856\n", 342 | "พบ,เพชรบุรี,Phetchaburi,C,6225.138\n", 343 | "รบ,ราชบุรี,Ratchaburi,C,5196.462\n", 344 | "รย,ระยอง,Rayong,C,3552\n", 345 | "ลบ,ลพบุรี,Lopburi,C,6199.753\n", 346 | "สก,สระแก้ว,Sa Kaeo,C,7195.436\n", 347 | "สค,สมุทรสาคร,Samut Sakhon,C,872.347\n", 348 | "สบ,สระบุรี,Saraburi,C,3576.486\n", 349 | "สป,สมุทรปราการ,Samut Prakan,C,1004.092\n", 350 | "สพ,สุพรรณบุรี,Suphan Buri,C,5358.008\n", 351 | "สส,สมุทรสงคราม,Samut Songkhram,C,416.707\n", 352 | "สห,สิงห์บุรี,Sing Buri,C,822.478\n", 353 | "อท,อ่างทอง,Ang Thong,C,968.372\n", 354 | "อย,พระนครศรีอยุธยา,Phra Nakhon Si Ayutthaya,C,2556.64\n", 355 | "กพ,กำแพงเพชร,Kamphaeng Phet,N,8607.49\n", 356 | "ชม,เชียงใหม่,Chiang Mai,N,20107.057\n", 357 | "ชร,เชียงราย,Chiang Rai,N,11678.369\n", 358 | "ตก,ตาก,Tak,N,16406.65\n", 359 | "นน,น่าน,Nan,N,11472.072\n", 360 | "นว,นครสวรรค์,Nakhon Sawan,N,9597.677\n", 361 | "พจ,พิจิตร,Phichit,N,4531.013\n", 362 | "พช,เพชรบูรณ์,Phetchabun,N,12668.416\n", 363 | "พย,พะเยา,Phayao,N,6335.06\n", 364 | "พร,แพร่,Phrae,N,6538.598\n", 365 | "พล,พิษณุโลก,Phitsanulok,N,10815.854\n", 366 | "มส,แม่ฮ่องสอน,Mae Hong Son,N,12681.259\n", 367 | "ลป,ลำปาง,Lampang,N,12533.961\n", 368 | "ลพ,ลำพูน,Lamphun,N,4505.882\n", 369 | "สท,สุโขทัย,Sukhothai,N,6596.092\n", 370 | "อต,อุตรดิตถ์,Uttaradit,N,7838.592\n", 371 | "อน,อุทัยธานี,Uthai Thani,N,6730.246\n", 372 | "กส,กาฬสินธุ์,Kalasin,NE,6946.746\n", 373 | "ขก,ขอนแก่น,Khon Kaen,NE,10885.991\n", 374 | "ชย,ชัยภูมิ,Chaiyaphum,NE,12778.287\n", 375 | "นค,หนองคาย,Nong Khai,NE,3027.28\n", 376 | "นพ,นครพนม,Nakhon Phanom,NE,5512.668\n", 377 | "นภ,หนองบัวลำภู,Nong Bua Lamphu,NE,3859.086\n", 378 | "นม,นครราชสีมา,Nakhon Ratchasima,NE,20493.964\n", 379 | "บก,บึงกาฬ,Bueng Kan,NE,4305\n", 380 | "บร,บุรีรัมย์,Buriram,NE,10322.885\n", 381 | "มค,มหาสารคาม,Maha Sarakham,NE,5291.683\n", 382 | "มห,มุกดาหาร,Mukdahan,NE,4339.83\n", 383 | "ยส,ยโสธร,Yasothon,NE,4161.664\n", 384 | "รอ,ร้อยเอ็ด,Roi Et,NE,8299.449\n", 385 | "ลย,เลย,Loei,NE,11424.612\n", 386 | "ศก,ศรีสะเกษ,Sisaket,NE,8839.976\n", 387 | "สน,สกลนคร,Sakon Nakhon,NE,9605.764\n", 388 | "สร,สุรินทร์,Surin,NE,8124.056\n", 389 | "อจ,อำนาจเจริญ,Amnat Charoen,NE,3161.248\n", 390 | "อด,อุดรธานี,Udon Thani,NE,11730.302\n", 391 | "อบ,อุบลราชธานี,Ubon Ratchathani,NE,16112.65\n", 392 | "กบ,กระบี่,Krabi,S,4708.512\n", 393 | "ชพ,ชุมพร,Chumphon,S,6010.849\n", 394 | "ตง,ตรัง,Trang,S,4917.519\n", 395 | "นธ,นราธิวาส,Narathiwat,S,4475.43\n", 396 | "นศ,นครศรีธรรมราช,Nakhon Si Thammarat,S,9942.502\n", 397 | "ปน,ปัตตานี,Pattani,S,1940.356\n", 398 | "พง,พังงา,Phang Nga,S,4170.895\n", 399 | "พท,พัทลุง,Phattalung,S,3424.473\n", 400 | "ภก,ภูเก็ต,Phuket,S,543.034\n", 401 | "ยล,ยะลา,Yala,S,4521.078\n", 402 | "รน,ระนอง,Ranong,S,3298.045\n", 403 | "สข,สงขลา,Songkhla,S,7393.889\n", 404 | "สฎ,สุราษฎร์ธานี,Surat Thani,S,12891.469\n", 405 | "สต,สตูล,Satun,S,2478.977\n", 406 | "\n" 407 | ] 408 | } 409 | ], 410 | "source": [ 411 | "import requests\n", 412 | "\n", 413 | "url = \"https://raw.githubusercontent.com/prasertcbs/basic-dataset/master/province.csv\"\n", 414 | "# url=\"https://data.go.th/dataset/78e7405b-947e-4752-a10c-67c4b819ec74/resource/1cdb37ad-f52b-4a94-a829-c1876421ea83/download/usage_64_sep.csv\"\n", 415 | "r = requests.get(url) # r -> response\n", 416 | "if r.ok:\n", 417 | " s = r.text\n", 418 | " print(s)\n", 419 | " # print(s.splitlines())\n" 420 | ] 421 | }, 422 | { 423 | "cell_type": "markdown", 424 | "metadata": {}, 425 | "source": [ 426 | "# read TIS-620 from URL\n", 427 | "\n", 428 | "- standard encoding: https://docs.python.org/3.7/library/codecs.html#standard-encodings\n" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": null, 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [ 437 | "import requests\n", 438 | "\n", 439 | "# url = \"https://raw.githubusercontent.com/prasertcbs/basic-dataset/master/province_tis.csv\"\n", 440 | "url = \"https://data.go.th/dataset/78e7405b-947e-4752-a10c-67c4b819ec74/resource/1cdb37ad-f52b-4a94-a829-c1876421ea83/download/usage_64_sep.csv\"\n", 441 | "\n", 442 | "r = requests.get(url)\n", 443 | "if r.ok:\n", 444 | " s = r.content.decode(\"iso8859_11\")\n", 445 | " # s = r.content.decode('cp874') # code page 874\n", 446 | " # s = r.content.decode(\"thai\")\n", 447 | " # s = r.content.decode('tis-620')\n", 448 | " print(s)\n", 449 | " # print(s.splitlines())\n" 450 | ] 451 | } 452 | ], 453 | "metadata": { 454 | "interpreter": { 455 | "hash": "eefe6ca76f6e878a3e3929bbec9156982baa217b1f16ff8dc984bf661b4791cc" 456 | }, 457 | "kernelspec": { 458 | "display_name": "Python 3.8.12 ('base')", 459 | "language": "python", 460 | "name": "python3" 461 | }, 462 | "language_info": { 463 | "codemirror_mode": { 464 | "name": "ipython", 465 | "version": 3 466 | }, 467 | "file_extension": ".py", 468 | "mimetype": "text/x-python", 469 | "name": "python", 470 | "nbconvert_exporter": "python", 471 | "pygments_lexer": "ipython3", 472 | "version": "3.8.13" 473 | }, 474 | "orig_nbformat": 4 475 | }, 476 | "nbformat": 4, 477 | "nbformat_minor": 2 478 | } 479 | -------------------------------------------------------------------------------- /bangkok open data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# [Bangkok open data](https://data.bangkok.go.th/dataset/)\n", 8 | "\n", 9 | "---\n", 10 | "* author: [Prasert Kanawattanachai](prasert.k@chula.ac.th)\n", 11 | "* YouTube: https://www.youtube.com/prasertcbs\n", 12 | "* github: https://github.com/prasertcbs/\n", 13 | "* kaggle: https://www.kaggle.com/prasertk/\n", 14 | "* [Chulalongkorn Business School](https://www.cbs.chula.ac.th/en/)\n", 15 | "---\n" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "import requests\n", 25 | "import pandas as pd\n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 10, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "data": { 35 | "text/html": [ 36 | "
\n", 37 | "\n", 50 | "\n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | "
idstation_areapm10_minpm10_maxpm10_overstdpm10_countno2_minno2_maxno2_overstdno2_count...o38h_mino38h_maxo38h_overstdo38h_countpm2_5_minpm2_5_maxpm2_5_overstdpm2_5_countadddateupdatedate
06ค่ามาตรฐาน--120---170-...--70---50-2020-05-232020-09-15
15สรุป131855914340158.4033142...079.498665976163542020-05-232020-09-15
24ราชเทวี181251354210008372...079.498665976163542020-05-232020-09-15
33ราษฎร์บูรณะ36165163610116.208355...--------2020-05-232020-09-15
42พระโขนง13155143620158.408286...--------2020-05-232020-09-15
51ดินแดง29185283570136.608129...--------2020-05-232020-10-15
\n", 224 | "

6 rows × 32 columns

\n", 225 | "
" 226 | ], 227 | "text/plain": [ 228 | " id station_area pm10_min pm10_max pm10_overstd pm10_count no2_min no2_max \\\n", 229 | "0 6 ค่ามาตรฐาน - - 120 - - - \n", 230 | "1 5 สรุป 13 185 59 1434 0 158.4 \n", 231 | "2 4 ราชเทวี 18 125 1 354 2 100 \n", 232 | "3 3 ราษฎร์บูรณะ 36 165 16 361 0 116.2 \n", 233 | "4 2 พระโขนง 13 155 14 362 0 158.4 \n", 234 | "5 1 ดินแดง 29 185 28 357 0 136.6 \n", 235 | "\n", 236 | " no2_overstd no2_count ... o38h_min o38h_max o38h_overstd o38h_count \\\n", 237 | "0 170 - ... - - 70 - \n", 238 | "1 0 33142 ... 0 79.4 9 8665 \n", 239 | "2 0 8372 ... 0 79.4 9 8665 \n", 240 | "3 0 8355 ... - - - - \n", 241 | "4 0 8286 ... - - - - \n", 242 | "5 0 8129 ... - - - - \n", 243 | "\n", 244 | " pm2_5_min pm2_5_max pm2_5_overstd pm2_5_count adddate updatedate \n", 245 | "0 - - 50 - 2020-05-23 2020-09-15 \n", 246 | "1 9 76 16 354 2020-05-23 2020-09-15 \n", 247 | "2 9 76 16 354 2020-05-23 2020-09-15 \n", 248 | "3 - - - - 2020-05-23 2020-09-15 \n", 249 | "4 - - - - 2020-05-23 2020-09-15 \n", 250 | "5 - - - - 2020-05-23 2020-10-15 \n", 251 | "\n", 252 | "[6 rows x 32 columns]" 253 | ] 254 | }, 255 | "execution_count": 10, 256 | "metadata": {}, 257 | "output_type": "execute_result" 258 | } 259 | ], 260 | "source": [ 261 | "url=\"https://data.bangkok.go.th/dataset/52a5da69-c086-425a-bcb3-fccfadd824f5/resource/1aecc616-c570-4efb-9398-7dd4e39356b2/download/env_5ec8dad38033f.csv\"\n", 262 | "df=pd.read_csv(url)\n", 263 | "df" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 5, 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "data": { 273 | "text/html": [ 274 | "
\n", 275 | "\n", 288 | "\n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | "
yearNo.Provinceแหล่งน้ำWQI scoreType
0256164กรุงเทพมหานครแม่น้ำเจ้าพระยา354
1256264กรุงเทพมหานครแม่น้ำเจ้าพระยา364
2256364กรุงเทพมหานครแม่น้ำเจ้าพระยา384
\n", 330 | "
" 331 | ], 332 | "text/plain": [ 333 | " year No. Province แหล่งน้ำ WQI score Type\n", 334 | "0 2561 64 กรุงเทพมหานคร แม่น้ำเจ้าพระยา 35 4\n", 335 | "1 2562 64 กรุงเทพมหานคร แม่น้ำเจ้าพระยา 36 4\n", 336 | "2 2563 64 กรุงเทพมหานคร แม่น้ำเจ้าพระยา 38 4" 337 | ] 338 | }, 339 | "execution_count": 5, 340 | "metadata": {}, 341 | "output_type": "execute_result" 342 | } 343 | ], 344 | "source": [ 345 | "url=\"https://data.bangkok.go.th/dataset/d7139135-3988-40fa-8703-d546eb480960/resource/70c4f872-6e4b-42db-b8c4-474ef29e0245/download/wq-4-wqi-..xlsx\"\n", 346 | "df=pd.read_excel(url)\n", 347 | "df" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": 17, 353 | "metadata": {}, 354 | "outputs": [ 355 | { 356 | "data": { 357 | "text/html": [ 358 | "
\n", 359 | "\n", 372 | "\n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | "
_iddrag_g_iddcodescodecommunity_namenum_volunteerspopulationamount_waste_kggeneral_waste_kgorganic_waste_kgtoxic_waste_kginfectious_waste_kgrecycle_waste_kgsum_monthsum_yearremark
0110250011025102504ชุมชนวัดดาวดึงษาราม2209536266174081088072507253725640
1210250021025102501ชุมชนวัดเทพนารี1990204259804612840904085725640
2310250031025102502ชุมชนเติมสุข1440201159655603540204023725640
3410250041025102502ชุมชนริมคลองบางพลัด15342554812263766451105110725640
4510250051025102502ชุมชนวัดฉัตรแก้วจงกลนี11498192159223576538403843725640
5610250061025102502ชุมชนแสงทอง1192237914181991137475807583725640
6710250071025102501ชุมชนสวนปรก1276189009072567037803780725640
\n", 530 | "
" 531 | ], 532 | "text/plain": [ 533 | " _id drag_g_id dcode scode community_name num_volunteers \\\n", 534 | "0 1 1025001 1025 102504 ชุมชนวัดดาวดึงษาราม 2 \n", 535 | "1 2 1025002 1025 102501 ชุมชนวัดเทพนารี 1 \n", 536 | "2 3 1025003 1025 102502 ชุมชนเติมสุข 1 \n", 537 | "3 4 1025004 1025 102502 ชุมชนริมคลองบางพลัด 1 \n", 538 | "4 5 1025005 1025 102502 ชุมชนวัดฉัตรแก้วจงกลนี 1 \n", 539 | "5 6 1025006 1025 102502 ชุมชนแสงทอง 1 \n", 540 | "6 7 1025007 1025 102501 ชุมชนสวนปรก 1 \n", 541 | "\n", 542 | " population amount_waste_kg general_waste_kg organic_waste_kg \\\n", 543 | "0 2095 36266 17408 10880 \n", 544 | "1 990 20425 9804 6128 \n", 545 | "2 440 20115 9655 6035 \n", 546 | "3 534 25548 12263 7664 \n", 547 | "4 1498 19215 9223 5765 \n", 548 | "5 1922 37914 18199 11374 \n", 549 | "6 276 18900 9072 5670 \n", 550 | "\n", 551 | " toxic_waste_kg infectious_waste_kg recycle_waste_kg sum_month sum_year \\\n", 552 | "0 725 0 7253 7 2564 \n", 553 | "1 409 0 4085 7 2564 \n", 554 | "2 402 0 4023 7 2564 \n", 555 | "3 511 0 5110 7 2564 \n", 556 | "4 384 0 3843 7 2564 \n", 557 | "5 758 0 7583 7 2564 \n", 558 | "6 378 0 3780 7 2564 \n", 559 | "\n", 560 | " remark \n", 561 | "0 0 \n", 562 | "1 0 \n", 563 | "2 0 \n", 564 | "3 0 \n", 565 | "4 0 \n", 566 | "5 0 \n", 567 | "6 0 " 568 | ] 569 | }, 570 | "execution_count": 17, 571 | "metadata": {}, 572 | "output_type": "execute_result" 573 | } 574 | ], 575 | "source": [ 576 | "def bkkapi(resource_id):\n", 577 | "\turl=f\"https://data.bangkok.go.th/api/3/action/datastore_search?resource_id={resource_id}\"\n", 578 | "\tr=requests.get(url)\n", 579 | "\tj=r.json()\n", 580 | "\t# j['result']['records']\n", 581 | "\tdf=pd.DataFrame(j['result']['records'])\n", 582 | "\treturn df\n", 583 | "\n", 584 | "# https://data.bangkok.go.th/dataset/65eb2ee6-efc6-4045-8e3e-ef451b844953/resource/95bd1379-177e-4dd8-9374-c0ce307bede9/download/-..-64.csv\n", 585 | "dt=bkkapi(\"95bd1379-177e-4dd8-9374-c0ce307bede9\")\n", 586 | "dt" 587 | ] 588 | }, 589 | { 590 | "cell_type": "markdown", 591 | "metadata": {}, 592 | "source": [ 593 | "## read CSV\n", 594 | "- ข้อมูลสถิติการจัดเก็บภาษีเปรียบเทียบยอดประมาณการ\n", 595 | " - https://data.bangkok.go.th/dataset/statvat0864/resource/5cd749fe-11a8-457d-b88f-2151ac33fe27\n" 596 | ] 597 | }, 598 | { 599 | "cell_type": "code", 600 | "execution_count": null, 601 | "metadata": {}, 602 | "outputs": [], 603 | "source": [ 604 | "url = \"https://data.bangkok.go.th/dataset/c051d4a7-8418-49c1-86b0-5a5ea8511c33/resource/5cd749fe-11a8-457d-b88f-2151ac33fe27/download/fin_600ea1e3c81a7.csv\"\n", 605 | "df = pd.read_csv(url)\n", 606 | "df\n" 607 | ] 608 | }, 609 | { 610 | "cell_type": "markdown", 611 | "metadata": {}, 612 | "source": [ 613 | "## read Excel\n", 614 | "\n", 615 | "- รายงานปริมาณน้ำที่ผ่านการบำบัดแล้วกลับมาใช้ประโยชน์ (พ.ศ.2561-2563)\n", 616 | " - https://data.bangkok.go.th/dataset/kpisbangkok_1100/resource/272b8fbc-6146-40d6-be7c-335ea30c2f7e\n" 617 | ] 618 | }, 619 | { 620 | "cell_type": "code", 621 | "execution_count": null, 622 | "metadata": {}, 623 | "outputs": [], 624 | "source": [ 625 | "url = \"https://data.bangkok.go.th/dataset/e53ef05f-32b2-4b06-b800-cad2702f426e/resource/272b8fbc-6146-40d6-be7c-335ea30c2f7e/download/wq-2-reused_water-2561-2563.xlsx\"\n", 626 | "print(url)\n", 627 | "df = pd.read_excel(url)\n", 628 | "df[:5]\n" 629 | ] 630 | }, 631 | { 632 | "cell_type": "code", 633 | "execution_count": null, 634 | "metadata": {}, 635 | "outputs": [], 636 | "source": [ 637 | "def download(url):\n", 638 | "\tif (url[-4:] == \".xls\") or (url[-5:] == \".xlsx\"):\n", 639 | "\t\tdf = pd.read_excel(url)\n", 640 | "\telse:\n", 641 | "\t\tdf = pd.read_csv(url)\n", 642 | "\treturn df\n", 643 | "\n", 644 | "# url = \"https://data.bangkok.go.th/dataset/c051d4a7-8418-49c1-86b0-5a5ea8511c33/resource/5cd749fe-11a8-457d-b88f-2151ac33fe27/download/fin_600ea1e3c81a7.csv\"\n", 645 | "url = \"https://data.bangkok.go.th/dataset/e53ef05f-32b2-4b06-b800-cad2702f426e/resource/272b8fbc-6146-40d6-be7c-335ea30c2f7e/download/wq-2-reused_water-2561-2563.xlsx\"\n", 646 | "df=download(url)\n", 647 | "df[:5]" 648 | ] 649 | }, 650 | { 651 | "cell_type": "markdown", 652 | "metadata": {}, 653 | "source": [ 654 | "## CKAN API\n", 655 | "- รายงานปริมาณน้ำที่ผ่านการบำบัดแล้วกลับมาใช้ประโยชน์ (พ.ศ.2561-2563)\n", 656 | " - https://data.bangkok.go.th/dataset/kpisbangkok_1100/resource/272b8fbc-6146-40d6-be7c-335ea30c2f7e\n" 657 | ] 658 | }, 659 | { 660 | "cell_type": "code", 661 | "execution_count": null, 662 | "metadata": {}, 663 | "outputs": [], 664 | "source": [ 665 | "# resource_id=\"5cd749fe-11a8-457d-b88f-2151ac33fe27\"\n", 666 | "resource_id = \"272b8fbc-6146-40d6-be7c-335ea30c2f7e\"\n", 667 | "url = f\"https://data.bangkok.go.th/api/3/action/datastore_search?resource_id={resource_id}\"\n", 668 | "print(url)\n", 669 | "r = requests.get(url)\n", 670 | "\n", 671 | "j = r.json()\n", 672 | "# j\n", 673 | "j[\"result\"][\"records\"]\n", 674 | "\n" 675 | ] 676 | }, 677 | { 678 | "cell_type": "code", 679 | "execution_count": null, 680 | "metadata": {}, 681 | "outputs": [], 682 | "source": [ 683 | "df = pd.DataFrame(j[\"result\"][\"records\"])\n", 684 | "df\n" 685 | ] 686 | }, 687 | { 688 | "cell_type": "code", 689 | "execution_count": null, 690 | "metadata": {}, 691 | "outputs": [], 692 | "source": [ 693 | "df.loc[:, 'Year':]" 694 | ] 695 | }, 696 | { 697 | "cell_type": "code", 698 | "execution_count": null, 699 | "metadata": {}, 700 | "outputs": [], 701 | "source": [] 702 | } 703 | ], 704 | "metadata": { 705 | "kernelspec": { 706 | "display_name": "Python 3.9.12 ('base')", 707 | "language": "python", 708 | "name": "python3" 709 | }, 710 | "language_info": { 711 | "codemirror_mode": { 712 | "name": "ipython", 713 | "version": 3 714 | }, 715 | "file_extension": ".py", 716 | "mimetype": "text/x-python", 717 | "name": "python", 718 | "nbconvert_exporter": "python", 719 | "pygments_lexer": "ipython3", 720 | "version": "3.9.12" 721 | }, 722 | "orig_nbformat": 4, 723 | "vscode": { 724 | "interpreter": { 725 | "hash": "629cb9f199f624aadf5cdcebfeb0fdc9652b7a7e432d484384b677cab7914fce" 726 | } 727 | } 728 | }, 729 | "nbformat": 4, 730 | "nbformat_minor": 2 731 | } 732 | -------------------------------------------------------------------------------- /data_go_th_api.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# data.go.th\n", 8 | "\n", 9 | "web: https://data.go.th/\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "---\n", 17 | "* author: [Prasert Kanawattanachai](prasert.k@chula.ac.th)\n", 18 | "* YouTube: https://www.youtube.com/prasertcbs\n", 19 | "* github: https://github.com/prasertcbs/\n", 20 | "* kaggle: https://www.kaggle.com/prasertk/\n", 21 | "* [Chulalongkorn Business School](https://www.cbs.chula.ac.th/en/)\n", 22 | "---\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "

Play YouTube Video

" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 1, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "import pandas as pd\n", 39 | "import requests\n" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": { 46 | "colab": { 47 | "base_uri": "https://localhost:8080/", 48 | "height": 64 49 | }, 50 | "colab_type": "code", 51 | "id": "fdr0pYIf7P-_", 52 | "outputId": "c919deae-c99b-44b0-8924-4d2355ca0b63" 53 | }, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "pandas version: 1.4.2\n", 60 | "requests version: 2.28.0\n" 61 | ] 62 | }, 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "Timestamp('2022-07-13 05:48:50.328872')" 67 | ] 68 | }, 69 | "execution_count": 2, 70 | "metadata": {}, 71 | "output_type": "execute_result" 72 | } 73 | ], 74 | "source": [ 75 | "print(f\"pandas version: {pd.__version__}\")\n", 76 | "print(f\"requests version: {requests.__version__}\")\n", 77 | "\n", 78 | "pd.Timestamp.now()\n", 79 | "# pd.Timestamp.now().strftime('%Y-%m-%d')\n" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "# read CSV\n", 87 | "* https://data.go.th/dataset/mrta-crmk\n" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 3, 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "data": { 97 | "text/html": [ 98 | "
\n", 99 | "\n", 112 | "\n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | "
เดือนปีโครงการจำนวนผู้โดยสารรวมจำนวนผู้โดยสารเฉลี่ยรายวันจำนวนผู้โดยสารเฉลี่ยรายวันธรรมดาจำนวนผู้โดยสารเฉลี่ยรายวันหยุด
0มกราคม2557โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล8,613,608277,858316,029184,553
1กุมภาพันธ์2557โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล8,212,613293,308329,204217,526
2มีนาคม2557โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล8,123,428262,046287,668208,239
3เมษายน2557โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล6,990,396233,013285,863153,738
4พฤษภาคม2557โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล7,265,594234,374276,809175,618
........................
157มกราคม2565โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม949885306413609320729
158กุมภาพันธ์2565โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม980551350204047723499
159มีนาคม2565โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม949885326843629222313
160เมษายน2565โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม903,89930,13037,81520,080
161พฤษภาคม2565โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม1198303386554759726274
\n", 238 | "

162 rows × 7 columns

\n", 239 | "
" 240 | ], 241 | "text/plain": [ 242 | " เดือน ปี โครงการ จำนวนผู้โดยสารรวม \\\n", 243 | "0 มกราคม 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล 8,613,608 \n", 244 | "1 กุมภาพันธ์ 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล 8,212,613 \n", 245 | "2 มีนาคม 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล 8,123,428 \n", 246 | "3 เมษายน 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล 6,990,396 \n", 247 | "4 พฤษภาคม 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล 7,265,594 \n", 248 | ".. ... ... ... ... \n", 249 | "157 มกราคม 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม 949885 \n", 250 | "158 กุมภาพันธ์ 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม 980551 \n", 251 | "159 มีนาคม 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม 949885 \n", 252 | "160 เมษายน 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม 903,899 \n", 253 | "161 พฤษภาคม 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม 1198303 \n", 254 | "\n", 255 | " จำนวนผู้โดยสารเฉลี่ยรายวัน จำนวนผู้โดยสารเฉลี่ยรายวันธรรมดา \\\n", 256 | "0 277,858 316,029 \n", 257 | "1 293,308 329,204 \n", 258 | "2 262,046 287,668 \n", 259 | "3 233,013 285,863 \n", 260 | "4 234,374 276,809 \n", 261 | ".. ... ... \n", 262 | "157 30641 36093 \n", 263 | "158 35020 40477 \n", 264 | "159 32684 36292 \n", 265 | "160 30,130 37,815 \n", 266 | "161 38655 47597 \n", 267 | "\n", 268 | " จำนวนผู้โดยสารเฉลี่ยรายวันหยุด \n", 269 | "0 184,553 \n", 270 | "1 217,526 \n", 271 | "2 208,239 \n", 272 | "3 153,738 \n", 273 | "4 175,618 \n", 274 | ".. ... \n", 275 | "157 20729 \n", 276 | "158 23499 \n", 277 | "159 22313 \n", 278 | "160 20,080 \n", 279 | "161 26274 \n", 280 | "\n", 281 | "[162 rows x 7 columns]" 282 | ] 283 | }, 284 | "execution_count": 3, 285 | "metadata": {}, 286 | "output_type": "execute_result" 287 | } 288 | ], 289 | "source": [ 290 | "url = \"https://data.go.th/dataset/fd781923-6c64-4cbc-90b8-83ad77c96ecd/resource/a542d7d4-bc27-4c03-81ef-bef0a5213210/download/-..-65.csv\"\n", 291 | "df = pd.read_csv(url)\n", 292 | "df\n" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "# read CSV (TIS-620, cp874, Windows-874)\n", 300 | "* https://data.go.th/dataset/bangkok-fir-may-2565" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 14, 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "data": { 310 | "text/html": [ 311 | "
\n", 312 | "\n", 325 | "\n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | "
flight_ruleเที่ยวบินรายเดือนเที่ยวบินสะสมเฉลี่ยต่อวันการเปลี่ยนแปลง
0VFR564347737225.178.31
1IFR343092247191.0018.34
2TOTAL39952272456226.1726.65
\n", 363 | "
" 364 | ], 365 | "text/plain": [ 366 | " flight_rule เที่ยวบินรายเดือน เที่ยวบินสะสม เฉลี่ยต่อวัน การเปลี่ยนแปลง\n", 367 | "0 VFR 5643 47737 225.17 8.31\n", 368 | "1 IFR 34309 224719 1.00 18.34\n", 369 | "2 TOTAL 39952 272456 226.17 26.65" 370 | ] 371 | }, 372 | "execution_count": 14, 373 | "metadata": {}, 374 | "output_type": "execute_result" 375 | } 376 | ], 377 | "source": [ 378 | "url = \"https://data.go.th/dataset/31170cfe-eaf5-4583-83aa-000e4de39064/resource/0899889b-ae8e-44d7-a2ba-da81bcdb24a9/download/flightrule_05_2022.csv\"\n", 379 | "\n", 380 | "# df = pd.read_csv(url)\n", 381 | "\n", 382 | "# df = pd.read_csv(url, encoding=\"iso8859-11\")\n", 383 | "df = pd.read_csv(url, encoding=\"tis-620\")\n", 384 | "# df = pd.read_csv(url, encoding=\"cp874\")\n", 385 | "# df = pd.read_csv(url, encoding=\"thai\")\n", 386 | "\n", 387 | "df\n" 388 | ] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": {}, 393 | "source": [ 394 | "# read excel from URL\n", 395 | "* https://opendata.data.go.th/dataset/mrta-crmk" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": 15, 401 | "metadata": {}, 402 | "outputs": [ 403 | { 404 | "data": { 405 | "text/html": [ 406 | "
\n", 407 | "\n", 420 | "\n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | "
เดือนปีโครงการจำนวนผู้โดยสารรวมจำนวนผู้โดยสารเฉลี่ยรายวันจำนวนผู้โดยสารเฉลี่ยรายวันธรรมดาจำนวนผู้โดยสารเฉลี่ยรายวันหยุด
0มกราคม2557โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล8613608277858316029184553
1กุมภาพันธ์2557โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล8212613293308329204217526
2มีนาคม2557โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล8123428262046287668208239
3เมษายน2557โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล6990396233013285863153738
4พฤษภาคม2557โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล7265594234374276809175618
........................
166มกราคม2565โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม949885306413609320729
167กุมภาพันธ์2565โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม980551350204047723499
168มีนาคม2565โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม1013287326843629222313
169เมษายน2565โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม903899301303781520080
170พฤษภาคม2565โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม1198303386554759726274
\n", 546 | "

171 rows × 7 columns

\n", 547 | "
" 548 | ], 549 | "text/plain": [ 550 | " เดือน ปี โครงการ \\\n", 551 | "0 มกราคม 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล \n", 552 | "1 กุมภาพันธ์ 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล \n", 553 | "2 มีนาคม 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล \n", 554 | "3 เมษายน 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล \n", 555 | "4 พฤษภาคม 2557 โครงการรถไฟฟ้ามหานคร สายเฉลิมรัชมงคล \n", 556 | ".. ... ... ... \n", 557 | "166 มกราคม 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม \n", 558 | "167 กุมภาพันธ์ 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม \n", 559 | "168 มีนาคม 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม \n", 560 | "169 เมษายน 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม \n", 561 | "170 พฤษภาคม 2565 โครงการรถไฟฟ้ามหานคร สายฉลองรัชธรรม \n", 562 | "\n", 563 | " จำนวนผู้โดยสารรวม จำนวนผู้โดยสารเฉลี่ยรายวัน \\\n", 564 | "0 8613608 277858 \n", 565 | "1 8212613 293308 \n", 566 | "2 8123428 262046 \n", 567 | "3 6990396 233013 \n", 568 | "4 7265594 234374 \n", 569 | ".. ... ... \n", 570 | "166 949885 30641 \n", 571 | "167 980551 35020 \n", 572 | "168 1013287 32684 \n", 573 | "169 903899 30130 \n", 574 | "170 1198303 38655 \n", 575 | "\n", 576 | " จำนวนผู้โดยสารเฉลี่ยรายวันธรรมดา จำนวนผู้โดยสารเฉลี่ยรายวันหยุด \n", 577 | "0 316029 184553 \n", 578 | "1 329204 217526 \n", 579 | "2 287668 208239 \n", 580 | "3 285863 153738 \n", 581 | "4 276809 175618 \n", 582 | ".. ... ... \n", 583 | "166 36093 20729 \n", 584 | "167 40477 23499 \n", 585 | "168 36292 22313 \n", 586 | "169 37815 20080 \n", 587 | "170 47597 26274 \n", 588 | "\n", 589 | "[171 rows x 7 columns]" 590 | ] 591 | }, 592 | "execution_count": 15, 593 | "metadata": {}, 594 | "output_type": "execute_result" 595 | } 596 | ], 597 | "source": [ 598 | "url = \"https://data.go.th/dataset/fd781923-6c64-4cbc-90b8-83ad77c96ecd/resource/8364f303-8f2f-4693-aa0c-2c96dc7cb075/download/-..-65.xlsx\"\n", 599 | "df = pd.read_excel(url)\n", 600 | "df\n" 601 | ] 602 | }, 603 | { 604 | "cell_type": "code", 605 | "execution_count": 16, 606 | "metadata": {}, 607 | "outputs": [], 608 | "source": [ 609 | "df.to_csv('mrt.csv', index=False)\n", 610 | "df.to_excel('mrt.xlsx')" 611 | ] 612 | }, 613 | { 614 | "cell_type": "markdown", 615 | "metadata": {}, 616 | "source": [ 617 | "## Data API\n" 618 | ] 619 | }, 620 | { 621 | "cell_type": "code", 622 | "execution_count": null, 623 | "metadata": {}, 624 | "outputs": [], 625 | "source": [ 626 | "from getpass import getpass\n", 627 | "api_key = getpass()" 628 | ] 629 | }, 630 | { 631 | "cell_type": "markdown", 632 | "metadata": {}, 633 | "source": [ 634 | "- ข้อมูลโรงแรมที่เป็นมิตรกับสิ่งแวดล้อม (Green Hotel)\n", 635 | " * https://opendata.data.go.th/dataset/db0103-002\n", 636 | "- จำนวนคนทำงานในที่พักแรม\n", 637 | " * https://opendata.data.go.th/dataset/os_17_00010" 638 | ] 639 | }, 640 | { 641 | "cell_type": "code", 642 | "execution_count": null, 643 | "metadata": {}, 644 | "outputs": [], 645 | "source": [ 646 | "# Request headers\n", 647 | "headers = {\n", 648 | " \"api-key\": api_key,\n", 649 | "}\n", 650 | "\n", 651 | "params = {\"resource_id\": \"6c865fd0-93e4-4642-8e57-7063b3162896\", \"limit\": 10}\n", 652 | "r = requests.get(\n", 653 | " \"https://opend.data.go.th/get-ckan/datastore_search\", params, headers=headers\n", 654 | ")\n", 655 | "if r.ok:\n", 656 | " j = r.json()\n", 657 | " records = j[\"result\"][\"records\"]\n", 658 | "df = pd.DataFrame(records)\n", 659 | "df\n" 660 | ] 661 | }, 662 | { 663 | "cell_type": "markdown", 664 | "metadata": {}, 665 | "source": [ 666 | "# functions\n" 667 | ] 668 | }, 669 | { 670 | "cell_type": "code", 671 | "execution_count": null, 672 | "metadata": {}, 673 | "outputs": [], 674 | "source": [ 675 | "import requests\n", 676 | "import pandas as pd\n", 677 | "\n", 678 | "def gov_open_data(api_key, resource_id: str, nrows=10000):\n", 679 | " \"\"\"\n", 680 | " >>> gov_open_data(\"your api-key\", \"ce7f4a78-71db-4754-9084-edca971903bd\", nrows=3)\n", 681 | " \"\"\"\n", 682 | " # Request headers\n", 683 | " headers = {\n", 684 | " \"api-key\": api_key,\n", 685 | " }\n", 686 | "\n", 687 | " params = {\"resource_id\": resource_id, \"limit\": nrows}\n", 688 | " r = requests.get(\n", 689 | " \"https://opend.data.go.th/get-ckan/datastore_search\", params, headers=headers\n", 690 | " )\n", 691 | "\n", 692 | " j = r.json()\n", 693 | " records = j[\"result\"][\"records\"]\n", 694 | " return pd.DataFrame(records)\n", 695 | "\n" 696 | ] 697 | }, 698 | { 699 | "cell_type": "code", 700 | "execution_count": null, 701 | "metadata": {}, 702 | "outputs": [], 703 | "source": [ 704 | "# ข้อมูลพิกัด LAT/LONG ที่ตั้งตำบล\n", 705 | "# https://data.go.th/dataset/item_c6d42e1b-3219-47e1-b6b7-dfe914f27910\n", 706 | "gov_open_data(api_key, \"48039a2a-2f01-448c-b2a2-bb0d541dedcd\", nrows=3)\n" 707 | ] 708 | }, 709 | { 710 | "cell_type": "code", 711 | "execution_count": null, 712 | "metadata": {}, 713 | "outputs": [], 714 | "source": [] 715 | } 716 | ], 717 | "metadata": { 718 | "kernelspec": { 719 | "display_name": "Python 3.9.12 ('base')", 720 | "language": "python", 721 | "name": "python3" 722 | }, 723 | "language_info": { 724 | "codemirror_mode": { 725 | "name": "ipython", 726 | "version": 3 727 | }, 728 | "file_extension": ".py", 729 | "mimetype": "text/x-python", 730 | "name": "python", 731 | "nbconvert_exporter": "python", 732 | "pygments_lexer": "ipython3", 733 | "version": "3.9.12" 734 | }, 735 | "vscode": { 736 | "interpreter": { 737 | "hash": "629cb9f199f624aadf5cdcebfeb0fdc9652b7a7e432d484384b677cab7914fce" 738 | } 739 | }, 740 | "widgets": { 741 | "application/vnd.jupyter.widget-state+json": { 742 | "state": {}, 743 | "version_major": 2, 744 | "version_minor": 0 745 | } 746 | } 747 | }, 748 | "nbformat": 4, 749 | "nbformat_minor": 4 750 | } 751 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pandas for Data Science 2 | | YouTube | Title | 3 | |:------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------| 4 | |   | สอน Python สำหรับ Data science: การติดตั้ง Anaconda Python และ Jupyter Notebook บน Windows | 5 | |   | สอน Jupyter notebook: รู้จัก Jupyter notebook | 6 | |   | สอน Jupyter notebook: แนะนำหลักการและชุดคำสั่งพื้นฐาน | 7 | |   | สอน Jupyter notebook: การใช้คำสั่งเบื้องต้น ตอนที่ 1 | 8 | |   | สอน Jupyter notebook: การใช้คำสั่งเบื้องต้น ตอนที่ 2 | 9 | |   | สอน Jupyter notebook: การใช้ markdown ตอนที่ 1 | 10 | |   | สอน Jupyter notebook: การใช้ markdown ตอนที่ 2 | 11 | |   | สอน Jupyter notebook: วิธีการเขียน Markdown เพื่อสร้างสารบัญให้กับ Jupyter Notebook | 12 | |   | เทคนิคการใช้ IPython.display เพื่อเปิดไฟล์เสียง แสดงรูป JSON และ YouTube ใน Jupyter Notebook | 13 | |   | สอน Jupyter notebook: การแสดงหน้าเว็บ รูปภาพและ YouTube ใน notebook | 14 | |   | สอน Jupyter notebook: เทคนิคการใช้ autocomplete และ help เพื่อเขียนโค้ด | 15 | |   | สอน Jupyter notebook: เทคนิคการ start Jupyter notebook บน Windows | 16 | |   | สอน Jupyter notebook: การ run Jupyter notebook ใน browser ที่ต้องการ | 17 | |   | สอน Jupyter Notebook: การ run notebook cell ที่มี Python Prompt (เครื่องหมายมากกว่า 3 ตัว) | 18 | |   | สอน Jupyter notebook: การ copy cells ข้าม notebooks | 19 | |   | สอน Jupyter notebook: การเก็บและแชร์ notebooks บน github | 20 | |   | สอน Jupyter Notebook: ลองใช้ Jupyter notebook บน Cloud ด้วย Google Colaboratory (Colab) | 21 | |   | การใช้ pip เพื่อจัดการ Python packages ใน Google Colaboratory | 22 | |   | เทคนิคการใช้ Jupyter notebook บน Visual Studio Code | 23 | |   | สอน Python สำหรับ Data science: รู้จักกับ pandas | 24 | |   | สอน Python สำหรับ Data science: pandas.Series | 25 | |   | สอน Python สำหรับ Data science: รู้จักกับ pandas DataFrame | 26 | |   | สอน Python สำหรับ Data science: การสำรวจข้อมูลเบื้องต้นด้วย pandas | 27 | |   | สอน Python สำหรับ Data science: การแสดงแถวข้อมูลใน pandas โดยใช้ head, tail และ sample | 28 | |   | สอน Python สำหรับ Data science: ประเภทข้อมูล (data types) ใน pandas | 29 | |   | การกำหนด max_rows, max_columns, float_format เพื่อแสดงข้อมูลใน pandas DataFrame | 30 | |   | การสร้างกราฟด้วย pandas: การสร้างฮีสโตแกรม (histogram) และ density plot | 31 | |   | การสร้างกราฟด้วย pandas: การสร้าง boxplot | 32 | |   | การสร้างกราฟด้วย pandas: การสร้าง boxplot หลายรูปพร้อมกัน | 33 | |   | การสร้างกราฟด้วย pandas: การสร้างกราฟ XY หรือ Scatter Plot | 34 | |   | การสร้างกราฟด้วย pandas: การสร้างกราฟแท่ง (bar graph) | 35 | |   | การสร้างกราฟด้วย pandas: การสร้างกราฟ stacked bar | 36 | |   | การสร้างกราฟด้วย pandas: กราฟเส้น (line graph) | 37 | |   | การสร้างกราฟด้วย pandas: การสร้างกราฟเส้นแสดงการเปลี่ยนแปลงเป็น % เปรียบเทียบ | 38 | |   | การสร้างกราฟด้วย pandas: การสร้างกราฟเส้นหลายเส้นและหลายรูปพร้อม ๆ กัน | 39 | |   | การสร้างกราฟด้วย pandas: การสร้างกราฟที่มีตัวอักษรไทยด้วย pandas | 40 | |   | การสร้างกราฟด้วย pandas: การกำหนด style ให้กับกราฟ | 41 | |   | สอน pandas: การสร้าง correlation matrix พร้อมแสดงผลแบบ heatmap | 42 | |   | สอน Python สำหรับ Data science: การอ่านไฟล์ CSV, TSV, FWF, Excel, Zip มาสร้างเป็น pandas DataFrame | 43 | |   | สอน Python สำหรับ Data science: การอ่านไฟล์ข้อมูลจาก local, web, dropbox, github ด้วย pandas | 44 | |   | สอน pandas: การอ่านไฟล์ที่แชร์จาก OneDrive มาสร้างเป็น DataFrame | 45 | |   | สอน Python สำหรับ Data science: การอ่านไฟล์ที่ไม่มี header row | 46 | |   | สอน Python สำหรับ Data science: การสร้าง pandas DataFrame จาก Clipboard | 47 | |   | สอน Python สำหรับ Data science: การอ่านไฟล์ที่มีข้อมูลวันเวลา (date/time data) ด้วย pandas | 48 | |   | สอน Python สำหรับ Data science: การอ่านและรวมไฟล์ CSV หลาย ๆ ไฟล์ | 49 | |   | สอน Python สำหรับ Data science: การ save pandas DataFrame เป็นไฟล์ CSV | 50 | |   | สอน Python สำหรับ Data science: การอ่านไฟล์ Excel ด้วย pandas | 51 | |   | สอน Python สำหรับ Data science: การอ่านชีทหลาย ๆ ชีทจาก Excel มาสร้างเป็น DataFrame (ตอนที่ 1) | 52 | |   | สอน Python สำหรับ Data science: การอ่านชีทหลาย ๆ ชีทจาก Excel มาสร้างเป็น DataFrame (ตอนที่ 2) | 53 | |   | สอน Python สำหรับ Data science: การ save หลาย ๆ pandas DataFrame ให้เป็นไฟล์ Excel | 54 | |   | สอน Python สำหรับ Data science: การอ่านข้อมูลจาก google sheets มาสร้างเป็น pandas DataFrame | 55 | |   | สอน pandas: การอ่านไฟล์ csv, tsv, json, excel ที่แชร์บน Google Drive | 56 | |   | สอน pandas: การอ่านและแปลงข้อมูลจาก Google Form เช่น แปลงข้อความ "พอใจมาก" ให้เป็นเลข 4 | 57 | |   | การดึงข้อมูลตัวชี้วัดจาก World Bank ด้วย pandas | 58 | |   | สอน Python สำหรับ Data science: การอ่านข้อมูลจาก web ที่อยู่ในรูปตาราง html ด้วย read_html | 59 | |   | สอน Python สำหรับ Data science: การอ่านไฟล์ JSON มาสร้างเป็น pandas dataframe | 60 | |   | การใช้ GET requests ในการดึงข้อมูล text file (CSV, JSON) และรูปภาพจาก web URL | 61 | |   | สอน Python สำหรับ data science: การใช้ Pandas ดึงข้อมูลจาก Bank Of Thailand API (REST API) | 62 | |   | การอ่าน R datasets มาเป็น pandas DataFrame | 63 | |   | สอน pandas: การอ่านและเขียนไฟล์ SPSS (read and write SPSS sav and zsav file) | 64 | |   | สอน Python สำหรับ Data science: การอ่านข้อมูลจาก sqlite3 มาเป็น pandas DataFrame | 65 | |   | สอน Python สำหรับ Data science: การแปลง pandas DataFrame ให้เป็นตารางใน sqlite3 | 66 | |   | สอน Python สำหรับ data science: การดึงข้อมูลจาก Microsoft SQL Server มาสร้างเป็น pandas DataFrame | 67 | |   | สอน pandas: save DataFrame ไปเป็นตารางใน MS SQL Server | 68 | |   | สอน pandas: การเชื่อมต่อกับฐานข้อมูล PostgreSQL | 69 | |   | สอน Python สำหรับ data science: การดึงข้อมูลจาก MySQL มาสร้างเป็น pandas DataFrame | 70 | |   | สอน pandas: save DataFrame ไปเป็นตารางใน MySQL, PostgreSQL และ SQLite | 71 | |   | สอน Python สำหรับ Data science: การใช้งาน pandas index เบื้องต้น | 72 | |   | สอน Python สำหรับ Data science: การเลือกคอลัมน์ใน pandas DataFrame ตอนที่ 1 | 73 | |   | สอน Python สำหรับ Data science: การเลือกคอลัมน์ใน pandas DataFrame ตอนที่ 2 | 74 | |   | สอน Python สำหรับ Data science: การเลือกแถวข้อมูลตามเงื่อนไข | 75 | |   | สอน Python สำหรับ Data science: การลบแถวและคอลัมน์ใน DataFrame | 76 | |   | สอน Python สำหรับ Data science: การเปลี่ยนชื่อคอลัมน์ใน pandas DataFrame | 77 | |   | สอน Python สำหรับ Data science: การเรียงลำดับข้อมูลใน pandas ด้วย sort_values และ sort_index | 78 | |   | สอน Python สำหรับ Data science: การใช้ loc เพื่อเลือกแถวและคอลัมน์ | 79 | |   | สอน Python สำหรับ Data science: การใช้ loc ร่วมกับ regex เพื่อเลือกชื่อคอลัมน์ | 80 | |   | สอน Python สำหรับ Data science: การใช้ iloc เพื่อเลือกแถวและคอลัมน์ใน pandas DataFrame | 81 | |   | สอน Python สำหรับ Data science: การใช้ at และ iat ในการเข้าถึงและกำหนดค่าแบบ scalar ใน DataFrame | 82 | |   | สอน Python สำหรับ Data science: การสร้าง two-level index ให้ pandas DataFrame | 83 | |   | สอน Python สำหรับ Data science: การสร้าง three-level index ให้ pandas DataFrame | 84 | |   | สอน Python สำหรับ Data science: การใช้ pandas IndexSlice เพื่อดึงข้อมูล multi-level index ตอนที่ 1 | 85 | |   | สอน Python สำหรับ Data science: การใช้ pandas IndexSlice เพื่อดึงข้อมูล multi-level index ตอนที่ 2 | 86 | |   | การดึงราคาหุ้นในตลาดหุ้นไทยและต่างประเทศด้วย pandas (get daily stock price) | 87 | |   | สอน pandas: เข้าใจการทำงานของ multilevel column names เช่น ข้อมูลราคาหุ้นรายวัน | 88 | |   | สอน Python สำหรับ Data science: การทำงานกับข้อมูลวันและเวลาใน pandas | 89 | |   | สอน Python สำหรับ Data science: การจัดการข้อมูลแบบ TimeSeries (DatetimeIndex) | 90 | |   | สอน Python สำหรับ Data science: การใช้ groupby เพื่อจัดกลุ่มข้อมูลใน pandas DataFrame | 91 | |   | สอน Python สำหรับ Data science: การใช้ Series.str.replace เพื่อ clean ข้อมูล | 92 | |   | สอน Python สำหรับ Data science: การปรับแต่งข้อมูลในคอลัมน์ เช่น ลบส่วนที่ไม่ใช่เลข 0-9 ออกจากข้อมูล | 93 | |   | สอน pandas: การแปลงตัวเลขที่มีสัญลักษณ์สกุลเงินและหน่วย (clean currency text) เช่น $70K เป็น 70000 | 94 | |   | สอน Python สำหรับ Data science: การใช้ pandas drop_duplicates เพื่อลบแถวซ้ำ | 95 | |   | สอน Python สำหรับ Data science: การจัดการกับ missing values ตอนที่ 1 | 96 | |   | สอน Python สำหรับ Data science: การจัดการกับ missing values ตอนที่ 2 | 97 | |   | สอน Python สำหรับ Data science: การแทนที่ missing values | 98 | |   | สอน Python สำหรับ Data science: ตัวแปรแบบ category เบื้องต้น | 99 | |   | สอน Python สำหรับ Data science: การสร้าง ordered category ใน pandas DataFrame | 100 | |   | สอน Python สำหรับ Data science: การใช้ cut เพื่อแบ่งข้อมูลออกเป็น category | 101 | |   | สอน Python สำหรับ Data science: การรวมหลาย ๆ DataFrame เข้าด้วยกันด้วย pandas.concat | 102 | |   | สอน Python สำหรับ Data science: การใช้ aggregate function ใน Series และ DataFrame | 103 | |   | สอน pandas: เข้าใจการทำงาน axis=0 (by index) และ axis=1 (by columns) | 104 | |   | สอน Python สำหรับ Data science: การสร้างตารางไขว้ (crosstab) ด้วย pandas | 105 | |   | สอน Python สำหรับ Data science: การสร้าง pivot table ด้วย pandas เบื้องต้น | 106 | |   | สอน Python สำหรับ Data science: การ melt/unpivot pandas DataFrame | 107 | |   | สอน Python สำหรับ Data science: การสร้าง pandas pivot table แสดงความถี่ และการแสดง missing values | 108 | |   | สอน Python สำหรับ Data science: การใช้ aggregate function ใน pivot table ของ pandas | 109 | |   | สอน Python สำหรับ Data science: การใช้ diff() และ pct_change() เพื่อหาผลต่างระหว่างสองแถว | 110 | |   | สอน Python สำหรับ Data science: การใช้ diff() และ pct_change() ร่วมกับ groupby ใน pandas DataFrame | 111 | |   | สอน Python สำหรับ Data science: การหาค่าแรกและค่าสุดท้ายของแต่ละ group ใน pandas DataFrame | 112 | |   | สอน pandas: การคำนวณราคาหุ้นเทียบกับวันที่ใช้เป็นฐานด้วย groupby().first() | 113 | |   | สอน pandas การหาแถวที่มีค่าสูงสุดและต่ำสุดด้วย nlargest, nsmallest | 114 | |   | สอน Python สำหรับ Data science: สร้างคอลัมน์แสดงลำดับ (rank) ด้วย pandas | 115 | |   | สอน Python สำหรับ Data science: การ standardize data (z-score) ด้วย pandas | 116 | |   | การสร้าง dummy variable และใช้งานกับ Linear Regression ของ scikit-learn และ statsmodels | 117 | |   | สอน Python สำหรับ Data science: การ merge DataFrames แบบ inner join | 118 | |   | สอน Python สำหรับ Data science: การ merge DataFrames แบบ left join | 119 | |   | สอน Python สำหรับ Data science: รู้จักกับ lambda function | 120 | |   | สอน Python สำหรับ Data science: การใช้ lambda function กับ pandas Series และ DataFrame | 121 | |   | สอน Python สำหรับ Data science: การใช้ map ใน pandas | 122 | |   | สอน Python สำหรับ Data science: การใช้ applymap ใน pandas | 123 | |   | สอน Python สำหรับ Data science: การใช้ apply ใน pandas | 124 | |   | สอน pandas: จัดการข้อมูลแยกตามกลุ่มด้วยหลักการ Split-Apply-Combine | 125 | |   | สอน Python สำหรับ Data science: การใช้ query ในการกรองข้อมูลใน pandas DataFrame | 126 | |   | สอน Python สำหรับ Data science: การใช้ any() และ all() ในการตรวจสอบ True/False | 127 | |   | สอน Python สำหรับ Data science: การสลับแถว (shuffle) ใน pandas DataFrame | 128 | |   | สอน Python สำหรับ Data science: การสุ่มแถวจากข้อมูลที่แบ่งออกเป็นกลุ่ม ๆ | 129 | |   | สอน Python สำหรับ Data science: การสุ่มแถวจากข้อมูลที่แบ่งออกเป็นกลุ่ม ๆ โดยระบุจำนวนที่ต้องการ | 130 | |   | สอน Python สำหรับ Data science: การแบ่ง DataFrame ออกเป็น training และ test datasets (ตอนที่ 1) | 131 | |   | สอน Python สำหรับ Data science: การแบ่ง DataFrame ออกเป็น training และ test datasets (ตอนที่ 2) | 132 | |   | สอน pandas: การเข้าถึงแต่ละแถวใน DataFrame (iterate rows in dataframe) | 133 | |   | สอน Python สำหรับ Data science: กำหนดจำนวนแถวที่จะแสดงผลและรูปแบบการแสดงตัวเลข | 134 | |   | สอน Python สำหรับ Data science: การปรับแต่ง style ในการแสดงข้อมูล DataFrame เบื้องต้น | 135 | |   | สอน Python สำหรับ Data science: การปรับแต่ง style ในการแสดงข้อมูล DataFrame ด้วย custom function | 136 | |   | สอน Python สำหรับ Data science: การแสดงรูปและ hyperlinkใน pandas DataFrame | 137 | |   | สอนไพธอน Python 3: การดาวน์โหลดรูปภาพจาก URL เพื่อแสดงผลบน jupyter notebook | 138 | |   | สอนทำ web scraping ด้วย Python: การ scrape รูปภาพจากหน้าเว็บด้วย BeautifulSoup | 139 | |   | สอนทำ web scraping ด้วย Python: การ scrape url link และ img เพื่อสร้างเป็น DataFrame ตอนที่ 1 | 140 | |   | สอนทำ web scraping ด้วย Python: การ scrape url link และ img เพื่อสร้างเป็น DataFrame ตอนที่ 2 | 141 | |   | สอน web scraping: การ scape นักเตะทีม Liverpool มาเก็บใน pandas.DataFrame | 142 | |   | สอนการ download รูปภาพจาก url ที่เก็บใน pandas DataFrame | 143 | |   | สอน web scraping ในการดึงข้อความและรูปภาพเพื่อแสดงบน Notebook และบันทึกเป็นไฟล์ | 144 | |   | สอน web scraping ในการดึงข้อความและรูปภาพมาเก็บในฐานข้อมูล MySQL | 145 | |   | สอน pandas: explode (unnest) multivalue column เช่น คอลัมน์ที่เก็บค่าแบบ list | 146 | |   | สอน pandas: ทำความสะอาดข้อมูล JSON | 147 | |   | สอน pandas: ดึงข้อมูล JSON จาก public RESTful APIs เช่น อัตราแลกเปลี่ยน พยากรณ์อากาศ Pokemon | 148 | |   | สอน pandas: รู้จักกับ JSON string format แบบต่าง ๆ ที่ pandas รองรับ | 149 | |   | สอน pandas: การจัดการกับคอลัมน์ใน DataFrame ที่เก็บค่าแบบ dict และ list | 150 | |   | สอน pandas: การดึงข้อมูลดัชนีตลาดหุ้นทั่วโลกแบบรายวัน (get daily stock market index) | 151 | |   | สอน pandas: การอ่านไฟล์ SAS (xport, sas7bdat) | 152 | |   | สอน pandas: การ stack และ unstack DataFrame ที่มี MultiIndex | 153 | |   | สอน pandas: การเชื่อมต่อกับ Azure SQL Database | 154 | |   | สอน pandas: การทำงานกับคอลัมน์ที่มีชื่อเป็นตัวเลข (numeric column name) | 155 | |   | สอน pandas: การจัดรูปแบบการแสดงผลตัวเลช เช่น จำนวนจุดทศนิยม เปอร์เซ็นต์ ใส่ , คั่นทุก 3 หลัก | 156 | |   | สอน pandas: การเลือก row/column label ด้วย filter เช่น เลือกชื่อคอลัมน์ที่มีคำว่า math | 157 | |   | สอน pandas: การรวมชื่อคอลัมน์แบบ MultiIndex เข้าด้วยกัน | 158 | |   | สอน pandas: การดึงอัตราแลกเปลี่ยนเงินตราต่างประเทศจาก RESTful api | 159 | |   | สอน pandas: การหาค่าเฉลี่ยดัชนีตลาดหุ้นแยกตามสัปดาห์ เดือน ไตรมาส และปี | 160 | |   | สอนไพธอน Python: การใช้ tqdm เพื่อสร้าง progress meter | 161 | |   | สอนไพธอน Python: การใช้ io.StringIO เพื่อทำ memory file (อ่าน string ให้เหมือนอ่านไฟล์) | 162 | |   | สอน pandas: การใช้คำสั่ง SQL เพื่อดึงข้อมูลใน DataFrame ด้วย pandasql package | 163 | |   | สอน pandas: การ merge ข้อมูลราคาน้ำมันและทองคำรายเดือน (merge oil and gold prices) | 164 | |   | สอน pandas: การ merge ราคาน้ำมันและดัชนีตลาดหุ้นรายวันเข้าด้วยกัน (merge oil prices and stock index) | 165 | |   | สอน pandas: การอ่านและรวมหลาย ๆ ชีทจาก Google Sheets มาสร้างเป็น DataFrame | 166 | |   | สอน pandas: เทคนิคการใช้ regular expression ในการกรองข้อมูลแบบต่าง ๆ | 167 | |   | สอน data science: การดึงข้อความภาษาไทยด้วย regex (extract Thai characters with regular expression) | 168 | |   | สอน pandas: การทำความสะอาดข้อมูลด้วย regular expression (str.extract และ str.split) | 169 | |   | สอน pandas: การสร้าง dummy/one-hot จาก multivalued column | 170 | |   | การแปลง pandas DataFrame ให้เป็น Sparse เพื่อทำ Market Basket Analysis | 171 | |   | สอน Jupyter notebook: แนะนำ Azure Notebook เพื่อใช้ Jupyter Notebook บน Cloud | 172 | |   | สอน data science: ทำความสะอาดข้อมูลแบบ multilevel ด้วย pandas (clean multilevel table) | 173 | |   | สอน pandas: แปลง DataFrame เป็น HTML, Markdown และ Excel (DataFrame to HTML, Markdown, Excel table) | 174 | |   | สอน pandas: การทำงานกับคอลัมน์ที่เก็บค่าเป็น dictionary | 175 | |   | สอน data science: preprocess ข้อมูลที่ได้จาก Google Form | 176 | -------------------------------------------------------------------------------- /pandas_transform_google_form_data2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# read and transform Google Form data (checkboxes, multiple choice grid items)\n", 8 | "* Google Form: https://forms.gle/7a35kfYmzyJJb5GD9\n", 9 | "* Google Sheets: https://docs.google.com/spreadsheets/d/1W0EaP8WGWaOK8XYDQA3Z4HIZaOQXXYj6U6kzqMuVrUI/edit#gid=1695829581" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "---\n", 17 | "* author: [Prasert Kanawattanachai](prasert.k@chula.ac.th)\n", 18 | "* YouTube: https://www.youtube.com/prasertcbs\n", 19 | "* github: https://github.com/prasertcbs/\n", 20 | "* [Chulalongkorn Business School](https://www.cbs.chula.ac.th/en/)\n", 21 | "---" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "import pandas as pd\n", 31 | "import numpy as np\n", 32 | "import re" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "pandas version: 1.2.1\n", 45 | "numpy version: 1.19.2\n", 46 | "2021-02-20 09:20:15.132975\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "print(f'pandas version: {pd.__version__}')\n", 52 | "print(f'numpy version: {np.__version__}')\n", 53 | "print(pd.Timestamp.now())" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 54, 59 | "metadata": { 60 | "scrolled": true 61 | }, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/html": [ 66 | "
\n", 67 | "\n", 80 | "\n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | "
Timestampgenderage (years)skillssatisfaction [product]satisfaction [price]satisfaction [service]satisfaction [overall]
02021-02-19 22:36:35.982Male24.0Word, Excel, PowerPoint, Accessvery dissatisfieddissatisfiedneutralsatisfied
12021-02-19 22:38:17.913Male22.0Excel, SQLvery satisfiedsatisfiedneutraldissatisfied
22021-02-19 22:49:14.802Prefer not to say35.0Word, Excel, PowerPoint, SPSSvery dissatisfieddissatisfiedneutraldissatisfied
32021-02-19 23:01:24.057Female23.0Word, Excel, PowerPoint, SQL, Python, R, JavaS...very satisfiedsatisfiedneutraldissatisfied
42021-02-20 08:27:19.584Female27.0Word, Excel, PowerPointneutralsatisfiedvery satisfiedvery satisfied
52021-02-20 08:27:36.059Female19.0Excel, Python, Rsatisfiedsatisfiedsatisfiedsatisfied
62021-02-20 08:28:19.725Prefer not to say30.0SQL, R, SPSS, SASdissatisfiedsatisfiedneutralvery satisfied
72021-02-20 09:16:02.448Prefer not to say24.0PowerPointsatisfiedvery satisfiedsatisfiedvery satisfied
82021-02-20 09:22:23.851MaleNaNWordvery dissatisfieddissatisfiedneutralNone
92021-02-20 09:23:22.259NaNNaNExcelNaNNaNNaNsatisfied
\n", 207 | "
" 208 | ], 209 | "text/plain": [ 210 | " Timestamp gender age (years) \\\n", 211 | "0 2021-02-19 22:36:35.982 Male 24.0 \n", 212 | "1 2021-02-19 22:38:17.913 Male 22.0 \n", 213 | "2 2021-02-19 22:49:14.802 Prefer not to say 35.0 \n", 214 | "3 2021-02-19 23:01:24.057 Female 23.0 \n", 215 | "4 2021-02-20 08:27:19.584 Female 27.0 \n", 216 | "5 2021-02-20 08:27:36.059 Female 19.0 \n", 217 | "6 2021-02-20 08:28:19.725 Prefer not to say 30.0 \n", 218 | "7 2021-02-20 09:16:02.448 Prefer not to say 24.0 \n", 219 | "8 2021-02-20 09:22:23.851 Male NaN \n", 220 | "9 2021-02-20 09:23:22.259 NaN NaN \n", 221 | "\n", 222 | " skills satisfaction [product] \\\n", 223 | "0 Word, Excel, PowerPoint, Access very dissatisfied \n", 224 | "1 Excel, SQL very satisfied \n", 225 | "2 Word, Excel, PowerPoint, SPSS very dissatisfied \n", 226 | "3 Word, Excel, PowerPoint, SQL, Python, R, JavaS... very satisfied \n", 227 | "4 Word, Excel, PowerPoint neutral \n", 228 | "5 Excel, Python, R satisfied \n", 229 | "6 SQL, R, SPSS, SAS dissatisfied \n", 230 | "7 PowerPoint satisfied \n", 231 | "8 Word very dissatisfied \n", 232 | "9 Excel NaN \n", 233 | "\n", 234 | " satisfaction [price] satisfaction [service] satisfaction [overall] \n", 235 | "0 dissatisfied neutral satisfied \n", 236 | "1 satisfied neutral dissatisfied \n", 237 | "2 dissatisfied neutral dissatisfied \n", 238 | "3 satisfied neutral dissatisfied \n", 239 | "4 satisfied very satisfied very satisfied \n", 240 | "5 satisfied satisfied satisfied \n", 241 | "6 satisfied neutral very satisfied \n", 242 | "7 very satisfied satisfied very satisfied \n", 243 | "8 dissatisfied neutral None \n", 244 | "9 NaN NaN satisfied " 245 | ] 246 | }, 247 | "execution_count": 54, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "# google sheets generated by google form\n", 254 | "# https://docs.google.com/spreadsheets/d/1W0EaP8WGWaOK8XYDQA3Z4HIZaOQXXYj6U6kzqMuVrUI/edit#gid=1695829581\n", 255 | "# note: replace 'edit#' with 'export?format=xlsx&'\n", 256 | "gs_url='https://docs.google.com/spreadsheets/d/1W0EaP8WGWaOK8XYDQA3Z4HIZaOQXXYj6U6kzqMuVrUI/edit#gid=1695829581'\n", 257 | "url=re.sub('edit#', 'export?format=xlsx&', gs_url)\n", 258 | "# url='https://docs.google.com/spreadsheets/d/1W0EaP8WGWaOK8XYDQA3Z4HIZaOQXXYj6U6kzqMuVrUI/export?format=xlsx&gid=1695829581'\n", 259 | "df = pd.read_excel(url)\n", 260 | "df" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 55, 266 | "metadata": {}, 267 | "outputs": [ 268 | { 269 | "data": { 270 | "text/plain": [ 271 | "Index(['Timestamp', 'gender', 'age (years)', 'skills',\n", 272 | " 'satisfaction [product]', 'satisfaction [price]',\n", 273 | " 'satisfaction [service]', 'satisfaction [overall]'],\n", 274 | " dtype='object')" 275 | ] 276 | }, 277 | "execution_count": 55, 278 | "metadata": {}, 279 | "output_type": "execute_result" 280 | } 281 | ], 282 | "source": [ 283 | "df.columns" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 56, 289 | "metadata": {}, 290 | "outputs": [ 291 | { 292 | "data": { 293 | "text/plain": [ 294 | "Index(['Timestamp', 'gender', 'age (years)', 'skills', 'product', 'price',\n", 295 | " 'service', 'overall'],\n", 296 | " dtype='object')" 297 | ] 298 | }, 299 | "execution_count": 56, 300 | "metadata": {}, 301 | "output_type": "execute_result" 302 | } 303 | ], 304 | "source": [ 305 | "df.columns.str.replace(r'(satisfaction|\\[|\\])', '', regex=True).str.strip()" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": 57, 311 | "metadata": { 312 | "scrolled": true 313 | }, 314 | "outputs": [ 315 | { 316 | "data": { 317 | "text/html": [ 318 | "
\n", 319 | "\n", 332 | "\n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | "
timestampgenderageskillsproductpriceserviceoverall
02021-02-19 22:36:35.982Male24.0Word, Excel, PowerPoint, Accessvery dissatisfieddissatisfiedneutralsatisfied
12021-02-19 22:38:17.913Male22.0Excel, SQLvery satisfiedsatisfiedneutraldissatisfied
22021-02-19 22:49:14.802Prefer not to say35.0Word, Excel, PowerPoint, SPSSvery dissatisfieddissatisfiedneutraldissatisfied
32021-02-19 23:01:24.057Female23.0Word, Excel, PowerPoint, SQL, Python, R, JavaS...very satisfiedsatisfiedneutraldissatisfied
42021-02-20 08:27:19.584Female27.0Word, Excel, PowerPointneutralsatisfiedvery satisfiedvery satisfied
52021-02-20 08:27:36.059Female19.0Excel, Python, Rsatisfiedsatisfiedsatisfiedsatisfied
62021-02-20 08:28:19.725Prefer not to say30.0SQL, R, SPSS, SASdissatisfiedsatisfiedneutralvery satisfied
72021-02-20 09:16:02.448Prefer not to say24.0PowerPointsatisfiedvery satisfiedsatisfiedvery satisfied
82021-02-20 09:22:23.851MaleNaNWordvery dissatisfieddissatisfiedneutralNone
92021-02-20 09:23:22.259NaNNaNExcelNaNNaNNaNsatisfied
\n", 459 | "
" 460 | ], 461 | "text/plain": [ 462 | " timestamp gender age \\\n", 463 | "0 2021-02-19 22:36:35.982 Male 24.0 \n", 464 | "1 2021-02-19 22:38:17.913 Male 22.0 \n", 465 | "2 2021-02-19 22:49:14.802 Prefer not to say 35.0 \n", 466 | "3 2021-02-19 23:01:24.057 Female 23.0 \n", 467 | "4 2021-02-20 08:27:19.584 Female 27.0 \n", 468 | "5 2021-02-20 08:27:36.059 Female 19.0 \n", 469 | "6 2021-02-20 08:28:19.725 Prefer not to say 30.0 \n", 470 | "7 2021-02-20 09:16:02.448 Prefer not to say 24.0 \n", 471 | "8 2021-02-20 09:22:23.851 Male NaN \n", 472 | "9 2021-02-20 09:23:22.259 NaN NaN \n", 473 | "\n", 474 | " skills product \\\n", 475 | "0 Word, Excel, PowerPoint, Access very dissatisfied \n", 476 | "1 Excel, SQL very satisfied \n", 477 | "2 Word, Excel, PowerPoint, SPSS very dissatisfied \n", 478 | "3 Word, Excel, PowerPoint, SQL, Python, R, JavaS... very satisfied \n", 479 | "4 Word, Excel, PowerPoint neutral \n", 480 | "5 Excel, Python, R satisfied \n", 481 | "6 SQL, R, SPSS, SAS dissatisfied \n", 482 | "7 PowerPoint satisfied \n", 483 | "8 Word very dissatisfied \n", 484 | "9 Excel NaN \n", 485 | "\n", 486 | " price service overall \n", 487 | "0 dissatisfied neutral satisfied \n", 488 | "1 satisfied neutral dissatisfied \n", 489 | "2 dissatisfied neutral dissatisfied \n", 490 | "3 satisfied neutral dissatisfied \n", 491 | "4 satisfied very satisfied very satisfied \n", 492 | "5 satisfied satisfied satisfied \n", 493 | "6 satisfied neutral very satisfied \n", 494 | "7 very satisfied satisfied very satisfied \n", 495 | "8 dissatisfied neutral None \n", 496 | "9 NaN NaN satisfied " 497 | ] 498 | }, 499 | "execution_count": 57, 500 | "metadata": {}, 501 | "output_type": "execute_result" 502 | } 503 | ], 504 | "source": [ 505 | "df.columns = ['timestamp', 'gender', 'age', 'skills', 'product', 'price', 'service', 'overall']\n", 506 | "df" 507 | ] 508 | }, 509 | { 510 | "cell_type": "markdown", 511 | "metadata": {}, 512 | "source": [ 513 | "## recode gender " 514 | ] 515 | }, 516 | { 517 | "cell_type": "code", 518 | "execution_count": 58, 519 | "metadata": {}, 520 | "outputs": [ 521 | { 522 | "data": { 523 | "text/plain": [ 524 | "0 M\n", 525 | "1 M\n", 526 | "2 X\n", 527 | "3 F\n", 528 | "4 F\n", 529 | "5 F\n", 530 | "6 X\n", 531 | "7 X\n", 532 | "8 M\n", 533 | "9 NaN\n", 534 | "Name: gender, dtype: object" 535 | ] 536 | }, 537 | "execution_count": 58, 538 | "metadata": {}, 539 | "output_type": "execute_result" 540 | } 541 | ], 542 | "source": [ 543 | "d = {'Female': 'F',\n", 544 | " 'Male': 'M',\n", 545 | " 'Prefer not to say': 'X'}\n", 546 | "\n", 547 | "df.gender.map(d, na_action='ignore')" 548 | ] 549 | }, 550 | { 551 | "cell_type": "code", 552 | "execution_count": 59, 553 | "metadata": {}, 554 | "outputs": [], 555 | "source": [ 556 | "df.gender=df.gender.map(d, na_action='ignore')" 557 | ] 558 | }, 559 | { 560 | "cell_type": "code", 561 | "execution_count": 60, 562 | "metadata": {}, 563 | "outputs": [ 564 | { 565 | "data": { 566 | "text/html": [ 567 | "
\n", 568 | "\n", 581 | "\n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | "
timestampgenderageskillsproductpriceserviceoverall
02021-02-19 22:36:35.982M24.0Word, Excel, PowerPoint, Accessvery dissatisfieddissatisfiedneutralsatisfied
12021-02-19 22:38:17.913M22.0Excel, SQLvery satisfiedsatisfiedneutraldissatisfied
22021-02-19 22:49:14.802X35.0Word, Excel, PowerPoint, SPSSvery dissatisfieddissatisfiedneutraldissatisfied
32021-02-19 23:01:24.057F23.0Word, Excel, PowerPoint, SQL, Python, R, JavaS...very satisfiedsatisfiedneutraldissatisfied
42021-02-20 08:27:19.584F27.0Word, Excel, PowerPointneutralsatisfiedvery satisfiedvery satisfied
52021-02-20 08:27:36.059F19.0Excel, Python, Rsatisfiedsatisfiedsatisfiedsatisfied
62021-02-20 08:28:19.725X30.0SQL, R, SPSS, SASdissatisfiedsatisfiedneutralvery satisfied
72021-02-20 09:16:02.448X24.0PowerPointsatisfiedvery satisfiedsatisfiedvery satisfied
82021-02-20 09:22:23.851MNaNWordvery dissatisfieddissatisfiedneutralNone
92021-02-20 09:23:22.259NaNNaNExcelNaNNaNNaNsatisfied
\n", 708 | "
" 709 | ], 710 | "text/plain": [ 711 | " timestamp gender age \\\n", 712 | "0 2021-02-19 22:36:35.982 M 24.0 \n", 713 | "1 2021-02-19 22:38:17.913 M 22.0 \n", 714 | "2 2021-02-19 22:49:14.802 X 35.0 \n", 715 | "3 2021-02-19 23:01:24.057 F 23.0 \n", 716 | "4 2021-02-20 08:27:19.584 F 27.0 \n", 717 | "5 2021-02-20 08:27:36.059 F 19.0 \n", 718 | "6 2021-02-20 08:28:19.725 X 30.0 \n", 719 | "7 2021-02-20 09:16:02.448 X 24.0 \n", 720 | "8 2021-02-20 09:22:23.851 M NaN \n", 721 | "9 2021-02-20 09:23:22.259 NaN NaN \n", 722 | "\n", 723 | " skills product \\\n", 724 | "0 Word, Excel, PowerPoint, Access very dissatisfied \n", 725 | "1 Excel, SQL very satisfied \n", 726 | "2 Word, Excel, PowerPoint, SPSS very dissatisfied \n", 727 | "3 Word, Excel, PowerPoint, SQL, Python, R, JavaS... very satisfied \n", 728 | "4 Word, Excel, PowerPoint neutral \n", 729 | "5 Excel, Python, R satisfied \n", 730 | "6 SQL, R, SPSS, SAS dissatisfied \n", 731 | "7 PowerPoint satisfied \n", 732 | "8 Word very dissatisfied \n", 733 | "9 Excel NaN \n", 734 | "\n", 735 | " price service overall \n", 736 | "0 dissatisfied neutral satisfied \n", 737 | "1 satisfied neutral dissatisfied \n", 738 | "2 dissatisfied neutral dissatisfied \n", 739 | "3 satisfied neutral dissatisfied \n", 740 | "4 satisfied very satisfied very satisfied \n", 741 | "5 satisfied satisfied satisfied \n", 742 | "6 satisfied neutral very satisfied \n", 743 | "7 very satisfied satisfied very satisfied \n", 744 | "8 dissatisfied neutral None \n", 745 | "9 NaN NaN satisfied " 746 | ] 747 | }, 748 | "execution_count": 60, 749 | "metadata": {}, 750 | "output_type": "execute_result" 751 | } 752 | ], 753 | "source": [ 754 | "df" 755 | ] 756 | }, 757 | { 758 | "cell_type": "code", 759 | "execution_count": 61, 760 | "metadata": {}, 761 | "outputs": [ 762 | { 763 | "data": { 764 | "text/plain": [ 765 | "0 1.0\n", 766 | "1 5.0\n", 767 | "2 1.0\n", 768 | "3 5.0\n", 769 | "4 3.0\n", 770 | "5 4.0\n", 771 | "6 2.0\n", 772 | "7 4.0\n", 773 | "8 1.0\n", 774 | "9 NaN\n", 775 | "Name: product, dtype: float64" 776 | ] 777 | }, 778 | "execution_count": 61, 779 | "metadata": {}, 780 | "output_type": "execute_result" 781 | } 782 | ], 783 | "source": [ 784 | "df['product'].map({'very dissatisfied':1, 'dissatisfied':2, 'neutral':3, 'satisfied':4, 'very satisfied':5}, na_action='ignore')" 785 | ] 786 | }, 787 | { 788 | "cell_type": "code", 789 | "execution_count": 62, 790 | "metadata": {}, 791 | "outputs": [ 792 | { 793 | "data": { 794 | "text/plain": [ 795 | "['very dissatisfied', 'very satisfied', 'very dissatisfied', 'very satisfied', 'neutral', 'satisfied', 'dissatisfied', 'satisfied', 'very dissatisfied', NaN]\n", 796 | "Categories (5, object): ['very dissatisfied' < 'dissatisfied' < 'neutral' < 'satisfied' < 'very satisfied']" 797 | ] 798 | }, 799 | "execution_count": 62, 800 | "metadata": {}, 801 | "output_type": "execute_result" 802 | } 803 | ], 804 | "source": [ 805 | "pd.Categorical(df['product'], \n", 806 | " categories=['very dissatisfied', 'dissatisfied', 'neutral', 'satisfied', 'very satisfied'], ordered=True)" 807 | ] 808 | }, 809 | { 810 | "cell_type": "code", 811 | "execution_count": 63, 812 | "metadata": {}, 813 | "outputs": [ 814 | { 815 | "data": { 816 | "text/plain": [ 817 | "0 very dissatisfied\n", 818 | "1 very satisfied\n", 819 | "2 very dissatisfied\n", 820 | "3 very satisfied\n", 821 | "4 neutral\n", 822 | "5 satisfied\n", 823 | "6 dissatisfied\n", 824 | "7 satisfied\n", 825 | "8 very dissatisfied\n", 826 | "9 NaN\n", 827 | "Name: product, dtype: object" 828 | ] 829 | }, 830 | "execution_count": 63, 831 | "metadata": {}, 832 | "output_type": "execute_result" 833 | } 834 | ], 835 | "source": [ 836 | "df['product']" 837 | ] 838 | }, 839 | { 840 | "cell_type": "code", 841 | "execution_count": 64, 842 | "metadata": {}, 843 | "outputs": [], 844 | "source": [ 845 | "for c in ['product', 'price', 'service', 'overall']:\n", 846 | " df[f'{c}_n']=df[c].map({'very dissatisfied':1, 'dissatisfied':2, 'neutral':3, 'satisfied':4, 'very satisfied':5}) \n", 847 | " df[c]=pd.Categorical(df[c], \n", 848 | " categories=['very dissatisfied', 'dissatisfied', 'neutral', 'satisfied', 'very satisfied'], ordered=True) \n", 849 | " df[f'{c}_cat']=df[c].cat.codes + 1" 850 | ] 851 | }, 852 | { 853 | "cell_type": "code", 854 | "execution_count": 65, 855 | "metadata": {}, 856 | "outputs": [ 857 | { 858 | "name": "stdout", 859 | "output_type": "stream", 860 | "text": [ 861 | "\n", 862 | "RangeIndex: 10 entries, 0 to 9\n", 863 | "Data columns (total 16 columns):\n", 864 | " # Column Non-Null Count Dtype \n", 865 | "--- ------ -------------- ----- \n", 866 | " 0 timestamp 10 non-null datetime64[ns]\n", 867 | " 1 gender 9 non-null object \n", 868 | " 2 age 8 non-null float64 \n", 869 | " 3 skills 10 non-null object \n", 870 | " 4 product 9 non-null category \n", 871 | " 5 price 9 non-null category \n", 872 | " 6 service 9 non-null category \n", 873 | " 7 overall 9 non-null category \n", 874 | " 8 product_n 9 non-null float64 \n", 875 | " 9 product_cat 10 non-null int8 \n", 876 | " 10 price_n 9 non-null float64 \n", 877 | " 11 price_cat 10 non-null int8 \n", 878 | " 12 service_n 9 non-null float64 \n", 879 | " 13 service_cat 10 non-null int8 \n", 880 | " 14 overall_n 9 non-null float64 \n", 881 | " 15 overall_cat 10 non-null int8 \n", 882 | "dtypes: category(4), datetime64[ns](1), float64(5), int8(4), object(2)\n", 883 | "memory usage: 1.7+ KB\n" 884 | ] 885 | } 886 | ], 887 | "source": [ 888 | "df.info()" 889 | ] 890 | }, 891 | { 892 | "cell_type": "code", 893 | "execution_count": 66, 894 | "metadata": {}, 895 | "outputs": [ 896 | { 897 | "data": { 898 | "text/plain": [ 899 | "0 1\n", 900 | "1 5\n", 901 | "2 1\n", 902 | "3 5\n", 903 | "4 3\n", 904 | "5 4\n", 905 | "6 2\n", 906 | "7 4\n", 907 | "8 1\n", 908 | "9 0\n", 909 | "dtype: int8" 910 | ] 911 | }, 912 | "execution_count": 66, 913 | "metadata": {}, 914 | "output_type": "execute_result" 915 | } 916 | ], 917 | "source": [ 918 | "df['product'].cat.codes + 1" 919 | ] 920 | }, 921 | { 922 | "cell_type": "code", 923 | "execution_count": 67, 924 | "metadata": {}, 925 | "outputs": [ 926 | { 927 | "data": { 928 | "text/plain": [ 929 | "0 3\n", 930 | "1 1\n", 931 | "2 1\n", 932 | "3 1\n", 933 | "4 4\n", 934 | "5 3\n", 935 | "6 4\n", 936 | "7 4\n", 937 | "8 -1\n", 938 | "9 3\n", 939 | "dtype: int8" 940 | ] 941 | }, 942 | "execution_count": 67, 943 | "metadata": {}, 944 | "output_type": "execute_result" 945 | } 946 | ], 947 | "source": [ 948 | "df['overall'].cat.codes" 949 | ] 950 | }, 951 | { 952 | "cell_type": "code", 953 | "execution_count": 68, 954 | "metadata": {}, 955 | "outputs": [ 956 | { 957 | "data": { 958 | "text/plain": [ 959 | "3.6666666666666665" 960 | ] 961 | }, 962 | "execution_count": 68, 963 | "metadata": {}, 964 | "output_type": "execute_result" 965 | } 966 | ], 967 | "source": [ 968 | "df['overall_n'].mean()" 969 | ] 970 | }, 971 | { 972 | "cell_type": "code", 973 | "execution_count": 69, 974 | "metadata": {}, 975 | "outputs": [ 976 | { 977 | "data": { 978 | "text/plain": [ 979 | "3.3" 980 | ] 981 | }, 982 | "execution_count": 69, 983 | "metadata": {}, 984 | "output_type": "execute_result" 985 | } 986 | ], 987 | "source": [ 988 | "(df['overall'].cat.codes + 1).mean()" 989 | ] 990 | }, 991 | { 992 | "cell_type": "code", 993 | "execution_count": 70, 994 | "metadata": {}, 995 | "outputs": [ 996 | { 997 | "data": { 998 | "text/html": [ 999 | "
\n", 1000 | "\n", 1013 | "\n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | "
timestampgenderageskillsproductpriceserviceoverallproduct_nproduct_catprice_nprice_catservice_nservice_catoverall_noverall_cat
02021-02-19 22:36:35.982M24.0Word, Excel, PowerPoint, Accessvery dissatisfieddissatisfiedneutralsatisfied1.012.023.034.04
42021-02-20 08:27:19.584F27.0Word, Excel, PowerPointneutralsatisfiedvery satisfiedvery satisfied3.034.045.055.05
52021-02-20 08:27:36.059F19.0Excel, Python, Rsatisfiedsatisfiedsatisfiedsatisfied4.044.044.044.04
62021-02-20 08:28:19.725X30.0SQL, R, SPSS, SASdissatisfiedsatisfiedneutralvery satisfied2.024.043.035.05
72021-02-20 09:16:02.448X24.0PowerPointsatisfiedvery satisfiedsatisfiedvery satisfied4.045.054.045.05
92021-02-20 09:23:22.259NaNNaNExcelNaNNaNNaNsatisfiedNaN0NaN0NaN04.04
\n", 1152 | "
" 1153 | ], 1154 | "text/plain": [ 1155 | " timestamp gender age skills \\\n", 1156 | "0 2021-02-19 22:36:35.982 M 24.0 Word, Excel, PowerPoint, Access \n", 1157 | "4 2021-02-20 08:27:19.584 F 27.0 Word, Excel, PowerPoint \n", 1158 | "5 2021-02-20 08:27:36.059 F 19.0 Excel, Python, R \n", 1159 | "6 2021-02-20 08:28:19.725 X 30.0 SQL, R, SPSS, SAS \n", 1160 | "7 2021-02-20 09:16:02.448 X 24.0 PowerPoint \n", 1161 | "9 2021-02-20 09:23:22.259 NaN NaN Excel \n", 1162 | "\n", 1163 | " product price service overall \\\n", 1164 | "0 very dissatisfied dissatisfied neutral satisfied \n", 1165 | "4 neutral satisfied very satisfied very satisfied \n", 1166 | "5 satisfied satisfied satisfied satisfied \n", 1167 | "6 dissatisfied satisfied neutral very satisfied \n", 1168 | "7 satisfied very satisfied satisfied very satisfied \n", 1169 | "9 NaN NaN NaN satisfied \n", 1170 | "\n", 1171 | " product_n product_cat price_n price_cat service_n service_cat \\\n", 1172 | "0 1.0 1 2.0 2 3.0 3 \n", 1173 | "4 3.0 3 4.0 4 5.0 5 \n", 1174 | "5 4.0 4 4.0 4 4.0 4 \n", 1175 | "6 2.0 2 4.0 4 3.0 3 \n", 1176 | "7 4.0 4 5.0 5 4.0 4 \n", 1177 | "9 NaN 0 NaN 0 NaN 0 \n", 1178 | "\n", 1179 | " overall_n overall_cat \n", 1180 | "0 4.0 4 \n", 1181 | "4 5.0 5 \n", 1182 | "5 4.0 4 \n", 1183 | "6 5.0 5 \n", 1184 | "7 5.0 5 \n", 1185 | "9 4.0 4 " 1186 | ] 1187 | }, 1188 | "execution_count": 70, 1189 | "metadata": {}, 1190 | "output_type": "execute_result" 1191 | } 1192 | ], 1193 | "source": [ 1194 | "df[df['overall'] > 'neutral']" 1195 | ] 1196 | }, 1197 | { 1198 | "cell_type": "code", 1199 | "execution_count": 71, 1200 | "metadata": {}, 1201 | "outputs": [ 1202 | { 1203 | "data": { 1204 | "text/html": [ 1205 | "
\n", 1206 | "\n", 1219 | "\n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | "
timestampgenderageskillsproductpriceserviceoverallproduct_nproduct_catprice_nprice_catservice_nservice_catoverall_noverall_cat
02021-02-19 22:36:35.982M24.0Word, Excel, PowerPoint, Accessvery dissatisfieddissatisfiedneutralsatisfied1.012.023.034.04
42021-02-20 08:27:19.584F27.0Word, Excel, PowerPointneutralsatisfiedvery satisfiedvery satisfied3.034.045.055.05
52021-02-20 08:27:36.059F19.0Excel, Python, Rsatisfiedsatisfiedsatisfiedsatisfied4.044.044.044.04
62021-02-20 08:28:19.725X30.0SQL, R, SPSS, SASdissatisfiedsatisfiedneutralvery satisfied2.024.043.035.05
72021-02-20 09:16:02.448X24.0PowerPointsatisfiedvery satisfiedsatisfiedvery satisfied4.045.054.045.05
92021-02-20 09:23:22.259NaNNaNExcelNaNNaNNaNsatisfiedNaN0NaN0NaN04.04
\n", 1358 | "
" 1359 | ], 1360 | "text/plain": [ 1361 | " timestamp gender age skills \\\n", 1362 | "0 2021-02-19 22:36:35.982 M 24.0 Word, Excel, PowerPoint, Access \n", 1363 | "4 2021-02-20 08:27:19.584 F 27.0 Word, Excel, PowerPoint \n", 1364 | "5 2021-02-20 08:27:36.059 F 19.0 Excel, Python, R \n", 1365 | "6 2021-02-20 08:28:19.725 X 30.0 SQL, R, SPSS, SAS \n", 1366 | "7 2021-02-20 09:16:02.448 X 24.0 PowerPoint \n", 1367 | "9 2021-02-20 09:23:22.259 NaN NaN Excel \n", 1368 | "\n", 1369 | " product price service overall \\\n", 1370 | "0 very dissatisfied dissatisfied neutral satisfied \n", 1371 | "4 neutral satisfied very satisfied very satisfied \n", 1372 | "5 satisfied satisfied satisfied satisfied \n", 1373 | "6 dissatisfied satisfied neutral very satisfied \n", 1374 | "7 satisfied very satisfied satisfied very satisfied \n", 1375 | "9 NaN NaN NaN satisfied \n", 1376 | "\n", 1377 | " product_n product_cat price_n price_cat service_n service_cat \\\n", 1378 | "0 1.0 1 2.0 2 3.0 3 \n", 1379 | "4 3.0 3 4.0 4 5.0 5 \n", 1380 | "5 4.0 4 4.0 4 4.0 4 \n", 1381 | "6 2.0 2 4.0 4 3.0 3 \n", 1382 | "7 4.0 4 5.0 5 4.0 4 \n", 1383 | "9 NaN 0 NaN 0 NaN 0 \n", 1384 | "\n", 1385 | " overall_n overall_cat \n", 1386 | "0 4.0 4 \n", 1387 | "4 5.0 5 \n", 1388 | "5 4.0 4 \n", 1389 | "6 5.0 5 \n", 1390 | "7 5.0 5 \n", 1391 | "9 4.0 4 " 1392 | ] 1393 | }, 1394 | "execution_count": 71, 1395 | "metadata": {}, 1396 | "output_type": "execute_result" 1397 | } 1398 | ], 1399 | "source": [ 1400 | "df[df['overall_n'] > 3]" 1401 | ] 1402 | }, 1403 | { 1404 | "cell_type": "code", 1405 | "execution_count": 72, 1406 | "metadata": {}, 1407 | "outputs": [ 1408 | { 1409 | "data": { 1410 | "text/plain": [ 1411 | "0 1\n", 1412 | "1 5\n", 1413 | "2 1\n", 1414 | "3 5\n", 1415 | "4 3\n", 1416 | "5 4\n", 1417 | "6 2\n", 1418 | "7 4\n", 1419 | "8 1\n", 1420 | "9 0\n", 1421 | "Name: product_cat, dtype: int8" 1422 | ] 1423 | }, 1424 | "execution_count": 72, 1425 | "metadata": {}, 1426 | "output_type": "execute_result" 1427 | } 1428 | ], 1429 | "source": [ 1430 | "df['product_cat']" 1431 | ] 1432 | }, 1433 | { 1434 | "cell_type": "code", 1435 | "execution_count": 73, 1436 | "metadata": {}, 1437 | "outputs": [ 1438 | { 1439 | "data": { 1440 | "text/plain": [ 1441 | "0 0\n", 1442 | "1 4\n", 1443 | "2 0\n", 1444 | "3 4\n", 1445 | "4 2\n", 1446 | "5 3\n", 1447 | "6 1\n", 1448 | "7 3\n", 1449 | "8 0\n", 1450 | "9 -1\n", 1451 | "dtype: int8" 1452 | ] 1453 | }, 1454 | "execution_count": 73, 1455 | "metadata": {}, 1456 | "output_type": "execute_result" 1457 | } 1458 | ], 1459 | "source": [ 1460 | "df['product'].cat.codes" 1461 | ] 1462 | }, 1463 | { 1464 | "cell_type": "code", 1465 | "execution_count": 74, 1466 | "metadata": {}, 1467 | "outputs": [ 1468 | { 1469 | "data": { 1470 | "text/plain": [ 1471 | "'very dissatisfied'" 1472 | ] 1473 | }, 1474 | "execution_count": 74, 1475 | "metadata": {}, 1476 | "output_type": "execute_result" 1477 | } 1478 | ], 1479 | "source": [ 1480 | "df['product'][0]" 1481 | ] 1482 | }, 1483 | { 1484 | "cell_type": "code", 1485 | "execution_count": 75, 1486 | "metadata": {}, 1487 | "outputs": [ 1488 | { 1489 | "data": { 1490 | "text/html": [ 1491 | "
\n", 1492 | "\n", 1505 | "\n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | " \n", 1515 | " \n", 1516 | " \n", 1517 | " \n", 1518 | " \n", 1519 | " \n", 1520 | " \n", 1521 | " \n", 1522 | " \n", 1523 | " \n", 1524 | " \n", 1525 | " \n", 1526 | " \n", 1527 | " \n", 1528 | " \n", 1529 | " \n", 1530 | " \n", 1531 | " \n", 1532 | " \n", 1533 | " \n", 1534 | " \n", 1535 | " \n", 1536 | " \n", 1537 | " \n", 1538 | " \n", 1539 | " \n", 1540 | " \n", 1541 | " \n", 1542 | " \n", 1543 | " \n", 1544 | " \n", 1545 | " \n", 1546 | " \n", 1547 | " \n", 1548 | " \n", 1549 | " \n", 1550 | " \n", 1551 | " \n", 1552 | " \n", 1553 | " \n", 1554 | " \n", 1555 | " \n", 1556 | " \n", 1557 | " \n", 1558 | " \n", 1559 | " \n", 1560 | " \n", 1561 | " \n", 1562 | " \n", 1563 | " \n", 1564 | " \n", 1565 | " \n", 1566 | " \n", 1567 | " \n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | " \n", 1581 | " \n", 1582 | " \n", 1583 | " \n", 1584 | " \n", 1585 | " \n", 1586 | " \n", 1587 | " \n", 1588 | " \n", 1589 | " \n", 1590 | " \n", 1591 | " \n", 1592 | " \n", 1593 | " \n", 1594 | " \n", 1595 | " \n", 1596 | " \n", 1597 | " \n", 1598 | " \n", 1599 | " \n", 1600 | " \n", 1601 | " \n", 1602 | " \n", 1603 | " \n", 1604 | " \n", 1605 | " \n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | " \n", 1613 | " \n", 1614 | " \n", 1615 | " \n", 1616 | " \n", 1617 | " \n", 1618 | " \n", 1619 | " \n", 1620 | " \n", 1621 | " \n", 1622 | " \n", 1623 | " \n", 1624 | " \n", 1625 | " \n", 1626 | " \n", 1627 | " \n", 1628 | " \n", 1629 | " \n", 1630 | " \n", 1631 | " \n", 1632 | " \n", 1633 | " \n", 1634 | " \n", 1635 | " \n", 1636 | " \n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | " \n", 1642 | " \n", 1643 | " \n", 1644 | " \n", 1645 | " \n", 1646 | " \n", 1647 | " \n", 1648 | " \n", 1649 | " \n", 1650 | " \n", 1651 | " \n", 1652 | " \n", 1653 | "
AccessExcelJavaScriptPowerPointPythonRSASSPSSSQLWord
01101000001
10100000010
20101000101
30111110011
40101000001
50100110000
60000011110
70001000000
80000000001
90100000000
\n", 1654 | "
" 1655 | ], 1656 | "text/plain": [ 1657 | " Access Excel JavaScript PowerPoint Python R SAS SPSS SQL Word\n", 1658 | "0 1 1 0 1 0 0 0 0 0 1\n", 1659 | "1 0 1 0 0 0 0 0 0 1 0\n", 1660 | "2 0 1 0 1 0 0 0 1 0 1\n", 1661 | "3 0 1 1 1 1 1 0 0 1 1\n", 1662 | "4 0 1 0 1 0 0 0 0 0 1\n", 1663 | "5 0 1 0 0 1 1 0 0 0 0\n", 1664 | "6 0 0 0 0 0 1 1 1 1 0\n", 1665 | "7 0 0 0 1 0 0 0 0 0 0\n", 1666 | "8 0 0 0 0 0 0 0 0 0 1\n", 1667 | "9 0 1 0 0 0 0 0 0 0 0" 1668 | ] 1669 | }, 1670 | "execution_count": 75, 1671 | "metadata": {}, 1672 | "output_type": "execute_result" 1673 | } 1674 | ], 1675 | "source": [ 1676 | "ds=df.skills.str.get_dummies(', ')\n", 1677 | "ds" 1678 | ] 1679 | }, 1680 | { 1681 | "cell_type": "code", 1682 | "execution_count": 76, 1683 | "metadata": {}, 1684 | "outputs": [ 1685 | { 1686 | "data": { 1687 | "text/html": [ 1688 | "
\n", 1689 | "\n", 1702 | "\n", 1703 | " \n", 1704 | " \n", 1705 | " \n", 1706 | " \n", 1707 | " \n", 1708 | " \n", 1709 | " \n", 1710 | " \n", 1711 | " \n", 1712 | " \n", 1713 | " \n", 1714 | " \n", 1715 | " \n", 1716 | " \n", 1717 | " \n", 1718 | " \n", 1719 | " \n", 1720 | " \n", 1721 | " \n", 1722 | " \n", 1723 | " \n", 1724 | " \n", 1725 | " \n", 1726 | " \n", 1727 | " \n", 1728 | " \n", 1729 | " \n", 1730 | " \n", 1731 | " \n", 1732 | " \n", 1733 | " \n", 1734 | " \n", 1735 | " \n", 1736 | " \n", 1737 | " \n", 1738 | " \n", 1739 | " \n", 1740 | " \n", 1741 | " \n", 1742 | " \n", 1743 | " \n", 1744 | " \n", 1745 | " \n", 1746 | " \n", 1747 | " \n", 1748 | " \n", 1749 | " \n", 1750 | " \n", 1751 | " \n", 1752 | " \n", 1753 | " \n", 1754 | " \n", 1755 | " \n", 1756 | " \n", 1757 | " \n", 1758 | " \n", 1759 | " \n", 1760 | " \n", 1761 | " \n", 1762 | " \n", 1763 | " \n", 1764 | " \n", 1765 | " \n", 1766 | " \n", 1767 | " \n", 1768 | " \n", 1769 | " \n", 1770 | " \n", 1771 | " \n", 1772 | " \n", 1773 | " \n", 1774 | " \n", 1775 | " \n", 1776 | " \n", 1777 | " \n", 1778 | " \n", 1779 | " \n", 1780 | " \n", 1781 | " \n", 1782 | " \n", 1783 | " \n", 1784 | " \n", 1785 | " \n", 1786 | " \n", 1787 | " \n", 1788 | " \n", 1789 | " \n", 1790 | " \n", 1791 | " \n", 1792 | " \n", 1793 | " \n", 1794 | " \n", 1795 | " \n", 1796 | " \n", 1797 | " \n", 1798 | " \n", 1799 | " \n", 1800 | " \n", 1801 | " \n", 1802 | " \n", 1803 | " \n", 1804 | " \n", 1805 | " \n", 1806 | " \n", 1807 | " \n", 1808 | " \n", 1809 | " \n", 1810 | " \n", 1811 | " \n", 1812 | " \n", 1813 | " \n", 1814 | " \n", 1815 | " \n", 1816 | " \n", 1817 | " \n", 1818 | " \n", 1819 | " \n", 1820 | " \n", 1821 | " \n", 1822 | " \n", 1823 | " \n", 1824 | " \n", 1825 | " \n", 1826 | " \n", 1827 | " \n", 1828 | " \n", 1829 | " \n", 1830 | " \n", 1831 | " \n", 1832 | " \n", 1833 | " \n", 1834 | " \n", 1835 | " \n", 1836 | " \n", 1837 | " \n", 1838 | " \n", 1839 | " \n", 1840 | " \n", 1841 | " \n", 1842 | " \n", 1843 | " \n", 1844 | " \n", 1845 | " \n", 1846 | " \n", 1847 | " \n", 1848 | " \n", 1849 | " \n", 1850 | " \n", 1851 | " \n", 1852 | " \n", 1853 | " \n", 1854 | " \n", 1855 | " \n", 1856 | " \n", 1857 | " \n", 1858 | " \n", 1859 | " \n", 1860 | " \n", 1861 | " \n", 1862 | " \n", 1863 | " \n", 1864 | " \n", 1865 | " \n", 1866 | " \n", 1867 | " \n", 1868 | " \n", 1869 | " \n", 1870 | " \n", 1871 | " \n", 1872 | " \n", 1873 | " \n", 1874 | " \n", 1875 | " \n", 1876 | " \n", 1877 | " \n", 1878 | " \n", 1879 | " \n", 1880 | " \n", 1881 | " \n", 1882 | " \n", 1883 | " \n", 1884 | " \n", 1885 | " \n", 1886 | " \n", 1887 | " \n", 1888 | " \n", 1889 | " \n", 1890 | " \n", 1891 | " \n", 1892 | " \n", 1893 | " \n", 1894 | " \n", 1895 | " \n", 1896 | " \n", 1897 | " \n", 1898 | " \n", 1899 | " \n", 1900 | " \n", 1901 | " \n", 1902 | " \n", 1903 | " \n", 1904 | " \n", 1905 | " \n", 1906 | " \n", 1907 | " \n", 1908 | " \n", 1909 | " \n", 1910 | " \n", 1911 | " \n", 1912 | " \n", 1913 | " \n", 1914 | " \n", 1915 | " \n", 1916 | " \n", 1917 | " \n", 1918 | " \n", 1919 | " \n", 1920 | " \n", 1921 | " \n", 1922 | " \n", 1923 | " \n", 1924 | " \n", 1925 | " \n", 1926 | " \n", 1927 | " \n", 1928 | " \n", 1929 | " \n", 1930 | " \n", 1931 | " \n", 1932 | " \n", 1933 | " \n", 1934 | " \n", 1935 | " \n", 1936 | " \n", 1937 | " \n", 1938 | " \n", 1939 | " \n", 1940 | " \n", 1941 | " \n", 1942 | " \n", 1943 | " \n", 1944 | " \n", 1945 | " \n", 1946 | " \n", 1947 | " \n", 1948 | " \n", 1949 | " \n", 1950 | " \n", 1951 | " \n", 1952 | " \n", 1953 | " \n", 1954 | " \n", 1955 | " \n", 1956 | " \n", 1957 | " \n", 1958 | " \n", 1959 | " \n", 1960 | " \n", 1961 | " \n", 1962 | " \n", 1963 | " \n", 1964 | " \n", 1965 | " \n", 1966 | " \n", 1967 | " \n", 1968 | " \n", 1969 | " \n", 1970 | " \n", 1971 | "
timestampgenderageskillsproductpriceserviceoverallproduct_nproduct_cat...AccessExcelJavaScriptPowerPointPythonRSASSPSSSQLWord
02021-02-19 22:36:35.982M24.0Word, Excel, PowerPoint, Accessvery dissatisfieddissatisfiedneutralsatisfied1.01...1101000001
12021-02-19 22:38:17.913M22.0Excel, SQLvery satisfiedsatisfiedneutraldissatisfied5.05...0100000010
22021-02-19 22:49:14.802X35.0Word, Excel, PowerPoint, SPSSvery dissatisfieddissatisfiedneutraldissatisfied1.01...0101000101
32021-02-19 23:01:24.057F23.0Word, Excel, PowerPoint, SQL, Python, R, JavaS...very satisfiedsatisfiedneutraldissatisfied5.05...0111110011
42021-02-20 08:27:19.584F27.0Word, Excel, PowerPointneutralsatisfiedvery satisfiedvery satisfied3.03...0101000001
52021-02-20 08:27:36.059F19.0Excel, Python, Rsatisfiedsatisfiedsatisfiedsatisfied4.04...0100110000
62021-02-20 08:28:19.725X30.0SQL, R, SPSS, SASdissatisfiedsatisfiedneutralvery satisfied2.02...0000011110
72021-02-20 09:16:02.448X24.0PowerPointsatisfiedvery satisfiedsatisfiedvery satisfied4.04...0001000000
82021-02-20 09:22:23.851MNaNWordvery dissatisfieddissatisfiedneutralNaN1.01...0000000001
92021-02-20 09:23:22.259NaNNaNExcelNaNNaNNaNsatisfiedNaN0...0100000000
\n", 1972 | "

10 rows × 26 columns

\n", 1973 | "
" 1974 | ], 1975 | "text/plain": [ 1976 | " timestamp gender age \\\n", 1977 | "0 2021-02-19 22:36:35.982 M 24.0 \n", 1978 | "1 2021-02-19 22:38:17.913 M 22.0 \n", 1979 | "2 2021-02-19 22:49:14.802 X 35.0 \n", 1980 | "3 2021-02-19 23:01:24.057 F 23.0 \n", 1981 | "4 2021-02-20 08:27:19.584 F 27.0 \n", 1982 | "5 2021-02-20 08:27:36.059 F 19.0 \n", 1983 | "6 2021-02-20 08:28:19.725 X 30.0 \n", 1984 | "7 2021-02-20 09:16:02.448 X 24.0 \n", 1985 | "8 2021-02-20 09:22:23.851 M NaN \n", 1986 | "9 2021-02-20 09:23:22.259 NaN NaN \n", 1987 | "\n", 1988 | " skills product \\\n", 1989 | "0 Word, Excel, PowerPoint, Access very dissatisfied \n", 1990 | "1 Excel, SQL very satisfied \n", 1991 | "2 Word, Excel, PowerPoint, SPSS very dissatisfied \n", 1992 | "3 Word, Excel, PowerPoint, SQL, Python, R, JavaS... very satisfied \n", 1993 | "4 Word, Excel, PowerPoint neutral \n", 1994 | "5 Excel, Python, R satisfied \n", 1995 | "6 SQL, R, SPSS, SAS dissatisfied \n", 1996 | "7 PowerPoint satisfied \n", 1997 | "8 Word very dissatisfied \n", 1998 | "9 Excel NaN \n", 1999 | "\n", 2000 | " price service overall product_n product_cat \\\n", 2001 | "0 dissatisfied neutral satisfied 1.0 1 \n", 2002 | "1 satisfied neutral dissatisfied 5.0 5 \n", 2003 | "2 dissatisfied neutral dissatisfied 1.0 1 \n", 2004 | "3 satisfied neutral dissatisfied 5.0 5 \n", 2005 | "4 satisfied very satisfied very satisfied 3.0 3 \n", 2006 | "5 satisfied satisfied satisfied 4.0 4 \n", 2007 | "6 satisfied neutral very satisfied 2.0 2 \n", 2008 | "7 very satisfied satisfied very satisfied 4.0 4 \n", 2009 | "8 dissatisfied neutral NaN 1.0 1 \n", 2010 | "9 NaN NaN satisfied NaN 0 \n", 2011 | "\n", 2012 | " ... Access Excel JavaScript PowerPoint Python R SAS SPSS SQL Word \n", 2013 | "0 ... 1 1 0 1 0 0 0 0 0 1 \n", 2014 | "1 ... 0 1 0 0 0 0 0 0 1 0 \n", 2015 | "2 ... 0 1 0 1 0 0 0 1 0 1 \n", 2016 | "3 ... 0 1 1 1 1 1 0 0 1 1 \n", 2017 | "4 ... 0 1 0 1 0 0 0 0 0 1 \n", 2018 | "5 ... 0 1 0 0 1 1 0 0 0 0 \n", 2019 | "6 ... 0 0 0 0 0 1 1 1 1 0 \n", 2020 | "7 ... 0 0 0 1 0 0 0 0 0 0 \n", 2021 | "8 ... 0 0 0 0 0 0 0 0 0 1 \n", 2022 | "9 ... 0 1 0 0 0 0 0 0 0 0 \n", 2023 | "\n", 2024 | "[10 rows x 26 columns]" 2025 | ] 2026 | }, 2027 | "execution_count": 76, 2028 | "metadata": {}, 2029 | "output_type": "execute_result" 2030 | } 2031 | ], 2032 | "source": [ 2033 | "dt=pd.concat([df, ds], axis=1).copy()\n", 2034 | "dt" 2035 | ] 2036 | }, 2037 | { 2038 | "cell_type": "code", 2039 | "execution_count": 77, 2040 | "metadata": {}, 2041 | "outputs": [ 2042 | { 2043 | "data": { 2044 | "text/plain": [ 2045 | "Index(['timestamp', 'gender', 'age', 'skills', 'product', 'price', 'service',\n", 2046 | " 'overall', 'product_n', 'product_cat', 'price_n', 'price_cat',\n", 2047 | " 'service_n', 'service_cat', 'overall_n', 'overall_cat', 'Access',\n", 2048 | " 'Excel', 'JavaScript', 'PowerPoint', 'Python', 'R', 'SAS', 'SPSS',\n", 2049 | " 'SQL', 'Word'],\n", 2050 | " dtype='object')" 2051 | ] 2052 | }, 2053 | "execution_count": 77, 2054 | "metadata": {}, 2055 | "output_type": "execute_result" 2056 | } 2057 | ], 2058 | "source": [ 2059 | "dt.columns" 2060 | ] 2061 | }, 2062 | { 2063 | "cell_type": "code", 2064 | "execution_count": 78, 2065 | "metadata": {}, 2066 | "outputs": [ 2067 | { 2068 | "data": { 2069 | "text/html": [ 2070 | "
\n", 2071 | "\n", 2084 | "\n", 2085 | " \n", 2086 | " \n", 2087 | " \n", 2088 | " \n", 2089 | " \n", 2090 | " \n", 2091 | " \n", 2092 | " \n", 2093 | " \n", 2094 | " \n", 2095 | " \n", 2096 | " \n", 2097 | " \n", 2098 | " \n", 2099 | " \n", 2100 | " \n", 2101 | " \n", 2102 | " \n", 2103 | " \n", 2104 | " \n", 2105 | " \n", 2106 | " \n", 2107 | " \n", 2108 | " \n", 2109 | " \n", 2110 | " \n", 2111 | " \n", 2112 | " \n", 2113 | " \n", 2114 | " \n", 2115 | " \n", 2116 | " \n", 2117 | " \n", 2118 | " \n", 2119 | " \n", 2120 | " \n", 2121 | " \n", 2122 | " \n", 2123 | " \n", 2124 | " \n", 2125 | " \n", 2126 | " \n", 2127 | " \n", 2128 | " \n", 2129 | " \n", 2130 | " \n", 2131 | " \n", 2132 | " \n", 2133 | " \n", 2134 | " \n", 2135 | " \n", 2136 | " \n", 2137 | " \n", 2138 | " \n", 2139 | " \n", 2140 | " \n", 2141 | " \n", 2142 | " \n", 2143 | " \n", 2144 | " \n", 2145 | " \n", 2146 | " \n", 2147 | " \n", 2148 | " \n", 2149 | " \n", 2150 | " \n", 2151 | " \n", 2152 | " \n", 2153 | " \n", 2154 | " \n", 2155 | " \n", 2156 | " \n", 2157 | " \n", 2158 | " \n", 2159 | " \n", 2160 | " \n", 2161 | " \n", 2162 | " \n", 2163 | " \n", 2164 | " \n", 2165 | " \n", 2166 | " \n", 2167 | " \n", 2168 | " \n", 2169 | " \n", 2170 | " \n", 2171 | " \n", 2172 | " \n", 2173 | " \n", 2174 | " \n", 2175 | " \n", 2176 | " \n", 2177 | " \n", 2178 | " \n", 2179 | " \n", 2180 | " \n", 2181 | " \n", 2182 | " \n", 2183 | " \n", 2184 | " \n", 2185 | " \n", 2186 | " \n", 2187 | " \n", 2188 | " \n", 2189 | " \n", 2190 | " \n", 2191 | " \n", 2192 | " \n", 2193 | " \n", 2194 | " \n", 2195 | " \n", 2196 | " \n", 2197 | " \n", 2198 | " \n", 2199 | " \n", 2200 | " \n", 2201 | " \n", 2202 | " \n", 2203 | " \n", 2204 | " \n", 2205 | " \n", 2206 | " \n", 2207 | " \n", 2208 | " \n", 2209 | " \n", 2210 | " \n", 2211 | " \n", 2212 | " \n", 2213 | " \n", 2214 | " \n", 2215 | " \n", 2216 | " \n", 2217 | " \n", 2218 | " \n", 2219 | " \n", 2220 | " \n", 2221 | " \n", 2222 | " \n", 2223 | " \n", 2224 | " \n", 2225 | " \n", 2226 | " \n", 2227 | " \n", 2228 | " \n", 2229 | " \n", 2230 | " \n", 2231 | " \n", 2232 | " \n", 2233 | " \n", 2234 | " \n", 2235 | " \n", 2236 | " \n", 2237 | " \n", 2238 | " \n", 2239 | " \n", 2240 | " \n", 2241 | " \n", 2242 | " \n", 2243 | " \n", 2244 | " \n", 2245 | " \n", 2246 | " \n", 2247 | " \n", 2248 | " \n", 2249 | " \n", 2250 | " \n", 2251 | " \n", 2252 | " \n", 2253 | " \n", 2254 | " \n", 2255 | " \n", 2256 | " \n", 2257 | " \n", 2258 | " \n", 2259 | " \n", 2260 | " \n", 2261 | " \n", 2262 | " \n", 2263 | " \n", 2264 | " \n", 2265 | " \n", 2266 | " \n", 2267 | " \n", 2268 | " \n", 2269 | " \n", 2270 | " \n", 2271 | " \n", 2272 | " \n", 2273 | " \n", 2274 | " \n", 2275 | " \n", 2276 | " \n", 2277 | " \n", 2278 | " \n", 2279 | " \n", 2280 | " \n", 2281 | " \n", 2282 | " \n", 2283 | " \n", 2284 | " \n", 2285 | " \n", 2286 | " \n", 2287 | " \n", 2288 | " \n", 2289 | " \n", 2290 | " \n", 2291 | " \n", 2292 | " \n", 2293 | " \n", 2294 | " \n", 2295 | " \n", 2296 | " \n", 2297 | " \n", 2298 | " \n", 2299 | " \n", 2300 | " \n", 2301 | " \n", 2302 | " \n", 2303 | " \n", 2304 | " \n", 2305 | " \n", 2306 | " \n", 2307 | " \n", 2308 | " \n", 2309 | "
countmeanstdmin25%50%75%max
age8.025.5000005.04267519.022.7524.027.7535.0
product_n9.02.8888891.6914821.01.003.04.005.0
product_cat10.02.6000001.8378730.01.002.54.005.0
price_n9.03.4444441.1303882.02.004.04.005.0
price_cat10.03.1000001.5238840.02.004.04.005.0
service_n9.03.4444440.7264833.03.003.04.005.0
service_cat10.03.1000001.2866840.03.003.03.755.0
overall_n9.03.6666671.3228762.02.004.05.005.0
overall_cat10.03.3000001.7029390.02.004.04.755.0
Access10.00.1000000.3162280.00.000.00.001.0
Excel10.00.7000000.4830460.00.251.01.001.0
JavaScript10.00.1000000.3162280.00.000.00.001.0
PowerPoint10.00.5000000.5270460.00.000.51.001.0
Python10.00.2000000.4216370.00.000.00.001.0
R10.00.3000000.4830460.00.000.00.751.0
SAS10.00.1000000.3162280.00.000.00.001.0
SPSS10.00.2000000.4216370.00.000.00.001.0
SQL10.00.3000000.4830460.00.000.00.751.0
Word10.00.5000000.5270460.00.000.51.001.0
\n", 2310 | "
" 2311 | ], 2312 | "text/plain": [ 2313 | " count mean std min 25% 50% 75% max\n", 2314 | "age 8.0 25.500000 5.042675 19.0 22.75 24.0 27.75 35.0\n", 2315 | "product_n 9.0 2.888889 1.691482 1.0 1.00 3.0 4.00 5.0\n", 2316 | "product_cat 10.0 2.600000 1.837873 0.0 1.00 2.5 4.00 5.0\n", 2317 | "price_n 9.0 3.444444 1.130388 2.0 2.00 4.0 4.00 5.0\n", 2318 | "price_cat 10.0 3.100000 1.523884 0.0 2.00 4.0 4.00 5.0\n", 2319 | "service_n 9.0 3.444444 0.726483 3.0 3.00 3.0 4.00 5.0\n", 2320 | "service_cat 10.0 3.100000 1.286684 0.0 3.00 3.0 3.75 5.0\n", 2321 | "overall_n 9.0 3.666667 1.322876 2.0 2.00 4.0 5.00 5.0\n", 2322 | "overall_cat 10.0 3.300000 1.702939 0.0 2.00 4.0 4.75 5.0\n", 2323 | "Access 10.0 0.100000 0.316228 0.0 0.00 0.0 0.00 1.0\n", 2324 | "Excel 10.0 0.700000 0.483046 0.0 0.25 1.0 1.00 1.0\n", 2325 | "JavaScript 10.0 0.100000 0.316228 0.0 0.00 0.0 0.00 1.0\n", 2326 | "PowerPoint 10.0 0.500000 0.527046 0.0 0.00 0.5 1.00 1.0\n", 2327 | "Python 10.0 0.200000 0.421637 0.0 0.00 0.0 0.00 1.0\n", 2328 | "R 10.0 0.300000 0.483046 0.0 0.00 0.0 0.75 1.0\n", 2329 | "SAS 10.0 0.100000 0.316228 0.0 0.00 0.0 0.00 1.0\n", 2330 | "SPSS 10.0 0.200000 0.421637 0.0 0.00 0.0 0.00 1.0\n", 2331 | "SQL 10.0 0.300000 0.483046 0.0 0.00 0.0 0.75 1.0\n", 2332 | "Word 10.0 0.500000 0.527046 0.0 0.00 0.5 1.00 1.0" 2333 | ] 2334 | }, 2335 | "execution_count": 78, 2336 | "metadata": {}, 2337 | "output_type": "execute_result" 2338 | } 2339 | ], 2340 | "source": [ 2341 | "dt.describe().T" 2342 | ] 2343 | }, 2344 | { 2345 | "cell_type": "code", 2346 | "execution_count": 79, 2347 | "metadata": {}, 2348 | "outputs": [ 2349 | { 2350 | "data": { 2351 | "text/plain": [ 2352 | "Access 1\n", 2353 | "Excel 7\n", 2354 | "JavaScript 1\n", 2355 | "PowerPoint 5\n", 2356 | "Python 2\n", 2357 | "R 3\n", 2358 | "SAS 1\n", 2359 | "SPSS 2\n", 2360 | "SQL 3\n", 2361 | "Word 5\n", 2362 | "dtype: int64" 2363 | ] 2364 | }, 2365 | "execution_count": 79, 2366 | "metadata": {}, 2367 | "output_type": "execute_result" 2368 | } 2369 | ], 2370 | "source": [ 2371 | "dt.loc[:, 'Access':'Word'].sum()" 2372 | ] 2373 | }, 2374 | { 2375 | "cell_type": "code", 2376 | "execution_count": 80, 2377 | "metadata": {}, 2378 | "outputs": [ 2379 | { 2380 | "data": { 2381 | "text/plain": [ 2382 | "0 4\n", 2383 | "1 2\n", 2384 | "2 4\n", 2385 | "3 7\n", 2386 | "4 3\n", 2387 | "5 3\n", 2388 | "6 4\n", 2389 | "7 1\n", 2390 | "8 1\n", 2391 | "9 1\n", 2392 | "dtype: int64" 2393 | ] 2394 | }, 2395 | "execution_count": 80, 2396 | "metadata": {}, 2397 | "output_type": "execute_result" 2398 | } 2399 | ], 2400 | "source": [ 2401 | "dt.loc[:, 'Access':'Word'].sum(axis=1)" 2402 | ] 2403 | } 2404 | ], 2405 | "metadata": { 2406 | "kernelspec": { 2407 | "display_name": "Python 3", 2408 | "language": "python", 2409 | "name": "python3" 2410 | }, 2411 | "language_info": { 2412 | "codemirror_mode": { 2413 | "name": "ipython", 2414 | "version": 3 2415 | }, 2416 | "file_extension": ".py", 2417 | "mimetype": "text/x-python", 2418 | "name": "python", 2419 | "nbconvert_exporter": "python", 2420 | "pygments_lexer": "ipython3", 2421 | "version": "3.7.9" 2422 | }, 2423 | "widgets": { 2424 | "application/vnd.jupyter.widget-state+json": { 2425 | "state": {}, 2426 | "version_major": 2, 2427 | "version_minor": 0 2428 | } 2429 | } 2430 | }, 2431 | "nbformat": 4, 2432 | "nbformat_minor": 4 2433 | } 2434 | --------------------------------------------------------------------------------