├── cases ├── .gitignore ├── spotify │ └── spotify_dataset.docx ├── leaflet_map.ipynb └── mysql.ipynb ├── .gitignore ├── src01.xlsx ├── files ├── 通訊錄.xlsx └── 垃圾車點位資訊.csv ├── python_data_analysis.docx ├── python_data_analysis.pdf ├── sql ├── 5-1 建立資料庫.sql ├── 5-4 手動還原資料.sql ├── 5-3 新增資料.sql └── 5-2 建立資料表.sql ├── README.md ├── 2-6 Pandas 建立 DataFrame.ipynb ├── 2-4 Numpy 簡單運算.ipynb ├── 2-5 Pandas 使用 Series.ipynb ├── 2-1 Numpy 建立陣列.ipynb ├── 2-2 Numpy 一維陣列.ipynb ├── 2-3 Numpy 二維陣列.ipynb └── 3-1 Pandas 檔案輸入與輸出.ipynb /cases/.gitignore: -------------------------------------------------------------------------------- 1 | *複製* -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb* 2 | *.csv 3 | test* -------------------------------------------------------------------------------- /src01.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/telunyang/python_data_analysis/HEAD/src01.xlsx -------------------------------------------------------------------------------- /files/通訊錄.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/telunyang/python_data_analysis/HEAD/files/通訊錄.xlsx -------------------------------------------------------------------------------- /files/垃圾車點位資訊.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/telunyang/python_data_analysis/HEAD/files/垃圾車點位資訊.csv -------------------------------------------------------------------------------- /python_data_analysis.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/telunyang/python_data_analysis/HEAD/python_data_analysis.docx -------------------------------------------------------------------------------- /python_data_analysis.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/telunyang/python_data_analysis/HEAD/python_data_analysis.pdf -------------------------------------------------------------------------------- /cases/spotify/spotify_dataset.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/telunyang/python_data_analysis/HEAD/cases/spotify/spotify_dataset.docx -------------------------------------------------------------------------------- /sql/5-1 建立資料庫.sql: -------------------------------------------------------------------------------- 1 | /* 如果資料庫存在就刪除 */ 2 | DROP DATABASE IF EXISTS `my_db`; 3 | 4 | /* 若資料庫不存在則新增,預設字元集為 utf8mb4,定序為 utf8mb4_unicode_ci */ 5 | CREATE DATABASE IF NOT EXISTS `my_db` 6 | DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; 7 | 8 | /* 指定資料庫 */ 9 | USE `my_db`; -------------------------------------------------------------------------------- /sql/5-4 手動還原資料.sql: -------------------------------------------------------------------------------- 1 | /* 將老師編號為 T001 的姓名,改成 曾○○ */ 2 | UPDATE `teachers` 3 | SET `tName` = '曾○○' 4 | WHERE `tId` = 'T001'; 5 | 6 | /* 新增一筆資料,學生編號為 088、課程編號為 C004,成績為 94 */ 7 | INSERT INTO `scores` 8 | (`sId`, `cId`, `score`) 9 | VALUES 10 | ('088', 'C004', 94); 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # python_data_analysis 2 | Python 資料分析 3 | 4 | ## 提問原則 5 | - 通則 6 | - 「課程期間」或「結業前」可提問、討論,要把多餘時間和資源,留給當前上課的學員。 7 | - 寫信 8 | - E-mail: darren@darreninfo.cc 9 | - 信件標題寫上你的**班別和姓名**,或是在哪裡參與我的課程。 10 | - 提問的內容要與本專案有關,**其它課程的部分,去請益原本授課的老師**。 11 | - **不要把程式碼寄給我**,可能沒時間看,討論儘量以解決問題的方向為主。 12 | - 不符合以上幾點,將**直接刪除**,敬請見諒。 13 | - 指導時間 14 | - 有些課的班導會提供指導時間,例如某一天的晚上,學員們可以先整理問題,然後碰面時討論。 15 | - 或是整理給班導,班導會找我討論。 16 | - 社群 17 | - 可以在 Instagram 或 LinkedIn 加我好友,然後透過傳訊來討論。 18 | - 記得跟我說你是哪一班,路人我就只好略過了,我只把時間留給學生 (茶~ 19 | 20 | ## 聯絡方式 21 | - [LinkedIn](https://www.linkedin.com/in/telunyang/) 22 | - [Instagram](https://www.instagram.com/darreninfo.cc/) -------------------------------------------------------------------------------- /sql/5-3 新增資料.sql: -------------------------------------------------------------------------------- 1 | /* 新增學生資料 */ 2 | INSERT INTO `students` 3 | (`sId`,`sName`,`sGender`,`sNickname`) 4 | VALUES 5 | ('003', '王○○', '男', '小王'), 6 | ('004', '江○○', '女', '小江'), 7 | ('005', '周○○', '女', '小周'), 8 | ('006', '黃○○', '男', '小黃'), 9 | ('007', '丁○○', '男', '小丁'), 10 | ('008', '鄭○○', '男', '小鄭'), 11 | ('087', '楊○○', '男', '好人'), 12 | ('088', '陳○○', '女', '小白'); 13 | 14 | /* 新增課程資料 */ 15 | INSERT INTO `courses` 16 | (`cId`, `cName`, `credit`, `isCompulsory`, `tId`) 17 | VALUES 18 | ('C001', '程式設計', 4, 1, 'T001'), 19 | ('C002', '網頁設計', 3, 1, 'T002'), 20 | ('C003', '視覺設計', 2, 1, 'T003'), 21 | ('C004', '網路教學', 4, 1, 'T005'); 22 | 23 | /* 新增成績資料 */ 24 | INSERT INTO `scores` 25 | (`sId`, `cId`, `score`) 26 | VALUES 27 | ('087', 'C001', 74), 28 | ('087', 'C002', 93), 29 | ('088', 'C002', 63), 30 | ('088', 'C003', 82), 31 | ('088', 'C004', 94); 32 | 33 | 34 | /* 新增老師資料 */ 35 | INSERT INTO `teachers` 36 | (`tId`, `tName`) 37 | VALUES 38 | ('T001', '曾○○'), 39 | ('T002', '林○○'), 40 | ('T003', '王○○'), 41 | ('T005', '謝○○'); 42 | -------------------------------------------------------------------------------- /sql/5-2 建立資料表.sql: -------------------------------------------------------------------------------- 1 | /* students(學生資料表) */ 2 | CREATE TABLE `my_db`.`students` ( 3 | `sId` VARCHAR(3) NOT NULL COMMENT '學生編號', 4 | `sName` VARCHAR(20) NOT NULL COMMENT '學生姓名', 5 | `sGender` VARCHAR(1) NOT NULL COMMENT '學生性別', 6 | `sNickname` VARCHAR(50) NOT NULL COMMENT '學生暱稱', 7 | PRIMARY KEY (`sId`) 8 | ) COMMENT = '學生資料表'; 9 | 10 | 11 | /* scores(成績資料表) */ 12 | CREATE TABLE `my_db`.`scores` ( 13 | `sId` VARCHAR(3) NOT NULL COMMENT '學生編號', 14 | `cId` VARCHAR(4) NOT NULL COMMENT '課程編號', 15 | `score` TINYINT(3) NOT NULL COMMENT '成績', 16 | PRIMARY KEY (`sId`,`cId`) 17 | ) COMMENT = '成績資料表'; 18 | 19 | 20 | /* teachers(老師資料表) */ 21 | CREATE TABLE `my_db`.`teachers` ( 22 | `tId` VARCHAR(4) NOT NULL COMMENT '老師編號' , 23 | `tName` VARCHAR(10) NOT NULL COMMENT '老師姓名', 24 | PRIMARY KEY (`tId`) 25 | ) COMMENT = '老師資料表'; 26 | 27 | 28 | /* courses(課程資料表) */ 29 | CREATE TABLE `my_db`.`courses` ( 30 | `cId` VARCHAR(4) NOT NULL COMMENT '課程編號', 31 | `cName` VARCHAR(10) NOT NULL COMMENT '課程名稱', 32 | `credit` TINYINT(1) NOT NULL COMMENT '學分', 33 | `isCompulsory` TINYINT(1) NOT NULL COMMENT '是否必修', 34 | `tId` VARCHAR(4) NOT NULL COMMENT '老師編號', 35 | PRIMARY KEY (`cId`, `tId`) 36 | ) COMMENT = '課程資料表'; 37 | 38 | -------------------------------------------------------------------------------- /cases/leaflet_map.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 安裝 ipyleaflet \n", 8 | "- 進入 anaconda prompt,切換到對應的環境,輸入以下指令:\n", 9 | " - `conda install -c conda-forge ipyleaflet`\n", 10 | "- 會需要用到 requests\n", 11 | " - `pip install requests`\n", 12 | "- ipyleaflet 首頁\n", 13 | " - [連結](https://ipyleaflet.readthedocs.io/en/latest/index.html)\n", 14 | "- Google Map\n", 15 | " - [連結](https://www.google.com.tw/maps)" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "# 匯入套件\n", 25 | "from ipyleaflet import Map, Marker, basemaps, basemap_to_tiles\n", 26 | "from ipywidgets import Layout, HTML\n", 27 | "import requests" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "'''\n", 37 | "基本操作\n", 38 | "\n", 39 | "參考連結:\n", 40 | "https://ipyleaflet.readthedocs.io/en/latest/usage/index.html\n", 41 | "'''\n", 42 | "# 設定地圖中心點\n", 43 | "center = (25.04882, 121.51375, 17)\n", 44 | "\n", 45 | "# 取得地圖物件\n", 46 | "m = Map(center=center, zoom=15)\n", 47 | "\n", 48 | "# 取得標記物件\n", 49 | "marker = Marker(location=center, draggable=True)\n", 50 | "\n", 51 | "# 將標記物件加入地圖\n", 52 | "m.add(marker)\n", 53 | "\n", 54 | "# 顯示地圖\n", 55 | "display(m)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "'''\n", 65 | "更換地圖\n", 66 | "\n", 67 | "參考連結:\n", 68 | "https://ipyleaflet.readthedocs.io/en/latest/map_and_basemaps/basemaps.html\n", 69 | "'''\n", 70 | "center = (25.04882, 121.51375)\n", 71 | "zoom = 17\n", 72 | "\n", 73 | "# 更換地圖\n", 74 | "m = Map(basemap=basemaps.OpenStreetMap.Mapnik, center=center, zoom=zoom)\n", 75 | "\n", 76 | "# 顯示地圖\n", 77 | "m" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "'''\n", 87 | "進階操作\n", 88 | "'''\n", 89 | "# 自訂地圖樣式\n", 90 | "m = Map(\n", 91 | " basemap=basemap_to_tiles(basemaps.Gaode.Normal, \"2024-11-11\"),\n", 92 | " center=(25.04882, 121.51375),\n", 93 | " zoom=17,\n", 94 | " layout=Layout(width='100%', height='600px')\n", 95 | ")\n", 96 | "\n", 97 | "# 加入標記\n", 98 | "m.add(Marker(location=(25.0463287, 121.5148673)))\n", 99 | "\n", 100 | "# 顯示地圖\n", 101 | "m" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "'''\n", 111 | "Cafe Nomad:咖啡廳遊牧民族\n", 112 | "https://cafenomad.tw/\n", 113 | "\n", 114 | "臺北咖啡廳資訊 Web API:\n", 115 | "https://cafenomad.tw/api/v1.2/cafes/taipei\n", 116 | "'''\n", 117 | "\n", 118 | "# 發出請求,取得回應\n", 119 | "response = requests.get(url=\"https://cafenomad.tw/api/v1.2/cafes/taipei\")\n", 120 | "\n", 121 | "# 將回應內容以 json 格式 (將 json 字串轉成 dict 或 list) 回傳\n", 122 | "data = response.json()\n", 123 | "\n", 124 | "# 顯示資料型態\n", 125 | "print(type(data))\n", 126 | "\n", 127 | "# 顯示資料筆數\n", 128 | "print(len(data))\n", 129 | "\n", 130 | "# 顯示所有資料\n", 131 | "for obj in data:\n", 132 | " # 取得緯度、經度、名稱、地址\n", 133 | " lat = obj[\"latitude\"]\n", 134 | " lon = obj[\"longitude\"]\n", 135 | " name = obj[\"name\"]\n", 136 | " address = obj[\"address\"]\n", 137 | "\n", 138 | " print(f\"店名: {name}\\t地址: {address}\\t緯度: {lat}\\t經度: {lon}\")" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "'''\n", 148 | "加入咖啡廳標記\n", 149 | "\n", 150 | "參考連結:\n", 151 | "[1] ipyleaflet API Reference - marker.title\n", 152 | "https://ipyleaflet.readthedocs.io/en/latest/api_reference/index.html#ipyleaflet.leaflet.Marker.title\n", 153 | "[2] Layers - Popup\n", 154 | "https://ipyleaflet.readthedocs.io/en/latest/layers/popup.html\n", 155 | "'''\n", 156 | "# 發出請求,取得回應\n", 157 | "response = requests.get(url=\"https://cafenomad.tw/api/v1.2/cafes/taipei\")\n", 158 | "\n", 159 | "# 將回應內容以 json 格式 (將 json 字串轉成 dict 或 list) 回傳\n", 160 | "data = response.json()\n", 161 | "\n", 162 | "# 自訂地圖樣式\n", 163 | "m = Map(\n", 164 | " basemap=basemap_to_tiles(basemaps.Gaode.Normal, \"2024-11-11\"),\n", 165 | " center=(25.04882, 121.51375),\n", 166 | " zoom=17,\n", 167 | " layout=Layout(width='100%', height='1000px')\n", 168 | ")\n", 169 | "\n", 170 | "# 加入標記\n", 171 | "for obj in data:\n", 172 | " # 建立標記物件\n", 173 | " marker = Marker(\n", 174 | " # 設定標記位置\n", 175 | " location=(obj['latitude'], obj['longitude']),\n", 176 | " \n", 177 | " # 設定標記是否可拖曳\n", 178 | " draggable=False,\n", 179 | "\n", 180 | " # 將移動到標記位置上,會顯示咖啡廳名稱\n", 181 | " title=f\"{obj['name']}\\n{obj['address']}\",\n", 182 | "\n", 183 | " # 設定 popup\n", 184 | " popup=HTML(value=f\"{obj['name']}
{obj['address']}\")\n", 185 | " )\n", 186 | " \n", 187 | " '''\n", 188 | " # 你也可以另外透過 marker.attribute = value 的方式,設定標記的 title\n", 189 | " marker.title = f\"{obj['name']}\\n{obj['address']}\"\n", 190 | " '''\n", 191 | " \n", 192 | " # 將標記加入地圖\n", 193 | " m.add(marker)\n", 194 | "\n", 195 | "\n", 196 | "# 顯示地圖\n", 197 | "m" 198 | ] 199 | } 200 | ], 201 | "metadata": { 202 | "kernelspec": { 203 | "display_name": "test", 204 | "language": "python", 205 | "name": "python3" 206 | }, 207 | "language_info": { 208 | "codemirror_mode": { 209 | "name": "ipython", 210 | "version": 3 211 | }, 212 | "file_extension": ".py", 213 | "mimetype": "text/x-python", 214 | "name": "python", 215 | "nbconvert_exporter": "python", 216 | "pygments_lexer": "ipython3", 217 | "version": "3.10.14" 218 | } 219 | }, 220 | "nbformat": 4, 221 | "nbformat_minor": 2 222 | } 223 | -------------------------------------------------------------------------------- /2-6 Pandas 建立 DataFrame.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "141b3655", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "b8a88665", 16 | "metadata": {}, 17 | "source": [ 18 | "# 建立 dataframe" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "id": "0584e8df", 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/html": [ 30 | "
\n", 31 | "\n", 44 | "\n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | "
nameage
0Alex19
1Bill22
2Carl14
3Darren18
\n", 75 | "
" 76 | ], 77 | "text/plain": [ 78 | " name age\n", 79 | "0 Alex 19\n", 80 | "1 Bill 22\n", 81 | "2 Carl 14\n", 82 | "3 Darren 18" 83 | ] 84 | }, 85 | "execution_count": 2, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "# 新增一個二維 list\n", 92 | "list_students = [\n", 93 | " ['Alex', 19],\n", 94 | " ['Bill', 22],\n", 95 | " ['Carl', 14],\n", 96 | " ['Darren', 18]\n", 97 | "]\n", 98 | "\n", 99 | "# 建立基本的 dataframe\n", 100 | "df = pd.DataFrame(list_students)\n", 101 | "\n", 102 | "# 設定 dataframe 的欄位\n", 103 | "df.columns = ['name', 'age']; df" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 3, 109 | "id": "9f051ff4", 110 | "metadata": {}, 111 | "outputs": [ 112 | { 113 | "data": { 114 | "text/html": [ 115 | "
\n", 116 | "\n", 129 | "\n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | "
nameage
0Alex19
1Bill22
2Carl14
3Darren18
\n", 160 | "
" 161 | ], 162 | "text/plain": [ 163 | " name age\n", 164 | "0 Alex 19\n", 165 | "1 Bill 22\n", 166 | "2 Carl 14\n", 167 | "3 Darren 18" 168 | ] 169 | }, 170 | "execution_count": 3, 171 | "metadata": {}, 172 | "output_type": "execute_result" 173 | } 174 | ], 175 | "source": [ 176 | "# 使用 dict 來建立 dataframe\n", 177 | "df = pd.DataFrame({\n", 178 | " 'name': ['Alex', 'Bill', 'Carl', 'Darren'],\n", 179 | " 'age': [19, 22, 14, 18]\n", 180 | "}); df" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 4, 186 | "id": "a7075006", 187 | "metadata": {}, 188 | "outputs": [ 189 | { 190 | "data": { 191 | "text/html": [ 192 | "
\n", 193 | "\n", 206 | "\n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | "
nameage
0Alex19
1Bill22
2Carl14
3Darren18
\n", 237 | "
" 238 | ], 239 | "text/plain": [ 240 | " name age\n", 241 | "0 Alex 19\n", 242 | "1 Bill 22\n", 243 | "2 Carl 14\n", 244 | "3 Darren 18" 245 | ] 246 | }, 247 | "execution_count": 4, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "# 使用 list of dict 來建立 dataframe\n", 254 | "data = [\n", 255 | " {'name': 'Alex', 'age': 19},\n", 256 | " {'name': 'Bill', 'age': 22},\n", 257 | " {'name': 'Carl', 'age': 14},\n", 258 | " {'name': 'Darren', 'age': 18},\n", 259 | "]\n", 260 | "\n", 261 | "df = pd.DataFrame(data); df" 262 | ] 263 | } 264 | ], 265 | "metadata": { 266 | "kernelspec": { 267 | "display_name": "python3@nlp", 268 | "language": "python", 269 | "name": "k_nlp" 270 | }, 271 | "language_info": { 272 | "codemirror_mode": { 273 | "name": "ipython", 274 | "version": 3 275 | }, 276 | "file_extension": ".py", 277 | "mimetype": "text/x-python", 278 | "name": "python", 279 | "nbconvert_exporter": "python", 280 | "pygments_lexer": "ipython3", 281 | "version": "3.10.11" 282 | } 283 | }, 284 | "nbformat": 4, 285 | "nbformat_minor": 5 286 | } 287 | -------------------------------------------------------------------------------- /2-4 Numpy 簡單運算.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "aa6a1d74", 6 | "metadata": {}, 7 | "source": [ 8 | "# 陣列簡單加總" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "c075b69c", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "id": "c428506a", 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/plain": [ 30 | "10" 31 | ] 32 | }, 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "output_type": "execute_result" 36 | } 37 | ], 38 | "source": [ 39 | "# 一維陣列的「和」 (加總)\n", 40 | "np.sum([1, 2, 3, 4])" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "id": "8fb845cb", 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/plain": [ 52 | "10" 53 | ] 54 | }, 55 | "execution_count": 3, 56 | "metadata": {}, 57 | "output_type": "execute_result" 58 | } 59 | ], 60 | "source": [ 61 | "# 二維陣列的「和」 (加總)\n", 62 | "np.sum([[1,2], [3,4]])" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "id": "0ec83a96", 68 | "metadata": {}, 69 | "source": [ 70 | "軸 (axis) 的概念\n", 71 | "![軸 (axis) 的概念](https://i.imgur.com/u7ItSQZ.png)\n", 72 | "\n", 73 | "參考資料: [Numpy Axes, Explained](https://youtu.be/aF96TC_6kDg)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 4, 79 | "id": "71477b19", 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "data": { 84 | "text/plain": [ 85 | "21" 86 | ] 87 | }, 88 | "execution_count": 4, 89 | "metadata": {}, 90 | "output_type": "execute_result" 91 | } 92 | ], 93 | "source": [ 94 | "# 二維陣列的「和」,指定軸 (axis) 來計算\n", 95 | "'''\n", 96 | "[\n", 97 | " [1,2], \n", 98 | " [3,4], \n", 99 | " [5,6]\n", 100 | "]\n", 101 | "\n", 102 | "axis = None: \n", 103 | "- 所有元素,不分列 (row) 或行 (column)。預設值。\n", 104 | "axis = 0:\n", 105 | "- 順著 [0][0], [1][0], [2][0], ... 等索引號碼增加的方向\n", 106 | "- 類似座標的 y 軸,針對每一個元素 y 軸對應的位置 (由上而下) 進行計算\n", 107 | "axis = 1: \n", 108 | "- 順著 [0][0], [0][1], [0][2], ... 等索引號碼增加的方向\n", 109 | "- 類似座標的 x 軸,針對每一個元素 x 軸對應的位置 (由左而右) 進行計算\n", 110 | "'''\n", 111 | "np.sum([[1,2], [3,4], [5,6]], axis=None)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "id": "f71b96f7", 117 | "metadata": {}, 118 | "source": [ 119 | "# 捨去函數" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 11, 125 | "id": "670cb37e", 126 | "metadata": {}, 127 | "outputs": [ 128 | { 129 | "data": { 130 | "text/plain": [ 131 | "array([0.4, 1.8])" 132 | ] 133 | }, 134 | "execution_count": 11, 135 | "metadata": {}, 136 | "output_type": "execute_result" 137 | } 138 | ], 139 | "source": [ 140 | "# around(): 四捨五入到最近的「偶數值」\n", 141 | "'''\n", 142 | "numpy.around(a, decimals=0, out=None)\n", 143 | "\n", 144 | "decimals: 指定小數位數 (小數點後面第幾位)\n", 145 | "'''\n", 146 | "np.around([0.45, 1.85], 1)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 12, 152 | "id": "04724310", 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "data": { 157 | "text/plain": [ 158 | "array([2., 2., 2., 3.])" 159 | ] 160 | }, 161 | "execution_count": 12, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "# rint(): 回傳最近的整數 (四捨五入到最近的「偶數值」)\n", 168 | "np.rint([1.5, 2.5, 1.6, 3.3])" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 13, 174 | "id": "1d8317b6", 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "data": { 179 | "text/plain": [ 180 | "array([1., 2., 1., 3.])" 181 | ] 182 | }, 183 | "execution_count": 13, 184 | "metadata": {}, 185 | "output_type": "execute_result" 186 | } 187 | ], 188 | "source": [ 189 | "# floor(): 無條件捨去\n", 190 | "np.floor([1.5, 2.5, 1.6, 3.3])" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 14, 196 | "id": "2f8a98ad", 197 | "metadata": {}, 198 | "outputs": [ 199 | { 200 | "data": { 201 | "text/plain": [ 202 | "array([2., 3., 2., 4.])" 203 | ] 204 | }, 205 | "execution_count": 14, 206 | "metadata": {}, 207 | "output_type": "execute_result" 208 | } 209 | ], 210 | "source": [ 211 | "# ceil(): 無條件進位\n", 212 | "np.ceil([1.1, 2.5, 1.6, 3.3])" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 15, 218 | "id": "44a31c8d", 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "data": { 223 | "text/plain": [ 224 | "array([1., 2., 1., 3.])" 225 | ] 226 | }, 227 | "execution_count": 15, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | } 231 | ], 232 | "source": [ 233 | "# trunc(): 捨棄小數\n", 234 | "np.trunc([1.1, 2.5, 1.6, 3.3])" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "id": "9b47b1e6", 240 | "metadata": {}, 241 | "source": [ 242 | "# 其它" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 21, 248 | "id": "2f279802", 249 | "metadata": {}, 250 | "outputs": [ 251 | { 252 | "data": { 253 | "text/plain": [ 254 | "array([3, 3])" 255 | ] 256 | }, 257 | "execution_count": 21, 258 | "metadata": {}, 259 | "output_type": "execute_result" 260 | } 261 | ], 262 | "source": [ 263 | "# absolute(): 回傳絕對值\n", 264 | "np.absolute([-3, 3])" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": 22, 270 | "id": "7b13b57b", 271 | "metadata": {}, 272 | "outputs": [ 273 | { 274 | "data": { 275 | "text/plain": [ 276 | "array([16, 81])" 277 | ] 278 | }, 279 | "execution_count": 22, 280 | "metadata": {}, 281 | "output_type": "execute_result" 282 | } 283 | ], 284 | "source": [ 285 | "# square(): 回傳平方值\n", 286 | "np.square([4, 9])" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 23, 292 | "id": "c196fb25", 293 | "metadata": {}, 294 | "outputs": [ 295 | { 296 | "data": { 297 | "text/plain": [ 298 | "array([2., 3.])" 299 | ] 300 | }, 301 | "execution_count": 23, 302 | "metadata": {}, 303 | "output_type": "execute_result" 304 | } 305 | ], 306 | "source": [ 307 | "# sqrt(): 回傳平方根\n", 308 | "np.sqrt([4, 9])" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "id": "bd884db8", 314 | "metadata": {}, 315 | "source": [ 316 | "# 隨機函數\n", 317 | "- np.random.rand(dim0\\[, dim1, dim2, ...\\])\n", 318 | "- np.random.randint(low, high=None, size=None)\n", 319 | "- 參考資料: \n", 320 | " - [Random sampling](https://numpy.org/doc/stable/reference/random/index.html)\n", 321 | " - [python numpy 常用随机数的产生方法](https://blog.csdn.net/m0_37804518/article/details/78490709)" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": 24, 327 | "id": "875a3757", 328 | "metadata": {}, 329 | "outputs": [ 330 | { 331 | "data": { 332 | "text/plain": [ 333 | "array([0.37454012, 0.95071431, 0.73199394])" 334 | ] 335 | }, 336 | "execution_count": 24, 337 | "metadata": {}, 338 | "output_type": "execute_result" 339 | } 340 | ], 341 | "source": [ 342 | "'''\n", 343 | "np.random.seed(42)\n", 344 | "numpy.random.rand(): 設定種子後,取得隨機亂數\n", 345 | "\n", 346 | "註: np.random.seed() 的引數固定,隨機產生的值也會固定,亂數的值介於 [0, 1)\n", 347 | "'''\n", 348 | "np.random.seed(42)\n", 349 | "a = np.random.rand(3); a" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 2, 355 | "id": "dce628fe", 356 | "metadata": {}, 357 | "outputs": [ 358 | { 359 | "data": { 360 | "text/plain": [ 361 | "array([53, 9, 65, 56, 44, 42, 17, 23, 55, 14, 21, 25, 78, 34, 56, 35, 55,\n", 362 | " 32, 33, 84, 38, 51, 81, 55, 85, 58, 96, 89, 13, 49, 13, 15, 47, 20,\n", 363 | " 41, 16, 61, 55, 42, 43, 38, 26, 36, 98, 88, 95, 42, 80, 67, 73, 4,\n", 364 | " 31, 91, 9, 84, 99, 52, 76, 18, 30, 72, 27, 50, 56, 6, 93, 28, 4,\n", 365 | " 6, 44, 10, 17, 61, 71, 72, 59, 68, 4, 0, 40, 38, 69, 80, 72, 74,\n", 366 | " 89, 53, 40, 50, 91, 77, 75, 90, 66, 60, 30, 9, 42, 63, 22])" 367 | ] 368 | }, 369 | "execution_count": 2, 370 | "metadata": {}, 371 | "output_type": "execute_result" 372 | } 373 | ], 374 | "source": [ 375 | "'''\n", 376 | "np.random.randint(start, end, size=數量): 產生 start 到不包含 end 之間的整數值\n", 377 | "\n", 378 | "隨機產生 100 位同學的成績\n", 379 | "'''\n", 380 | "a = np.random.randint(0, 101, size=100); a" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": 26, 386 | "id": "89bc0e4b", 387 | "metadata": {}, 388 | "outputs": [ 389 | { 390 | "data": { 391 | "text/plain": [ 392 | "array([1, 5, 2, 4, 3, 7, 9, 6, 8, 0])" 393 | ] 394 | }, 395 | "execution_count": 26, 396 | "metadata": {}, 397 | "output_type": "execute_result" 398 | } 399 | ], 400 | "source": [ 401 | "'''\n", 402 | "np.random.shuffle(): 陣列元素重新排列\n", 403 | "'''\n", 404 | "x = np.arange(10)\n", 405 | "np.random.shuffle(x)\n", 406 | "x" 407 | ] 408 | } 409 | ], 410 | "metadata": { 411 | "kernelspec": { 412 | "display_name": "python3@nlp", 413 | "language": "python", 414 | "name": "k_nlp" 415 | }, 416 | "language_info": { 417 | "codemirror_mode": { 418 | "name": "ipython", 419 | "version": 3 420 | }, 421 | "file_extension": ".py", 422 | "mimetype": "text/x-python", 423 | "name": "python", 424 | "nbconvert_exporter": "python", 425 | "pygments_lexer": "ipython3", 426 | "version": "3.10.11" 427 | } 428 | }, 429 | "nbformat": 4, 430 | "nbformat_minor": 5 431 | } 432 | -------------------------------------------------------------------------------- /2-5 Pandas 使用 Series.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "afa05ded", 6 | "metadata": {}, 7 | "source": [ 8 | "# 安裝 pandas\n", 9 | "若有語法不了解的地方,可以參考以下連結:\n", 10 | "- [API reference](https://pandas.pydata.org/docs/reference/index.html)\n", 11 | "- [Pandas Tutorial](https://www.w3schools.com/python/pandas/default.asp)\n", 12 | "- [Pandas 教程](https://www.runoob.com/pandas/pandas-tutorial.html)" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "id": "d6cb020d", 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "# 安裝套件\n", 23 | "!pip install pandas" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 1, 29 | "id": "db0a68e1", 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "import pandas as pd\n", 34 | "import numpy as np" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "id": "b2cb779c", 40 | "metadata": {}, 41 | "source": [ 42 | "# 使用 series" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 2, 48 | "id": "9049ceba", 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "text/plain": [ 54 | "0 11\n", 55 | "1 22\n", 56 | "2 33\n", 57 | "3 44\n", 58 | "4 55\n", 59 | "dtype: int64" 60 | ] 61 | }, 62 | "execution_count": 2, 63 | "metadata": {}, 64 | "output_type": "execute_result" 65 | } 66 | ], 67 | "source": [ 68 | "# 使用 list 建立 series 物件\n", 69 | "s = pd.Series([11, 22, 33, 44, 55]); s" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 3, 75 | "id": "4a244e6b", 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/plain": [ 81 | "蘋果 60\n", 82 | "水梨 50\n", 83 | "dtype: int64" 84 | ] 85 | }, 86 | "execution_count": 3, 87 | "metadata": {}, 88 | "output_type": "execute_result" 89 | } 90 | ], 91 | "source": [ 92 | "# 使用 dict 建立 series 物件\n", 93 | "myDict = {\"蘋果\": 60, \"水梨\": 50}\n", 94 | "s = pd.Series(myDict); s" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 4, 100 | "id": "430408f8", 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "0 13\n", 107 | "1 14\n", 108 | "2 15\n", 109 | "3 16\n", 110 | "4 17\n", 111 | "5 18\n", 112 | "6 19\n", 113 | "7 20\n", 114 | "8 21\n", 115 | "9 22\n", 116 | "10 23\n", 117 | "dtype: int32" 118 | ] 119 | }, 120 | "execution_count": 4, 121 | "metadata": {}, 122 | "output_type": "execute_result" 123 | } 124 | ], 125 | "source": [ 126 | "# 使用 Numpy 的 ndarray 建立 series 物件\n", 127 | "s = pd.Series(np.arange(13, 24)); s" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 5, 133 | "id": "3ecb0e0e", 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "data": { 138 | "text/plain": [ 139 | "3 10\n", 140 | "6 20\n", 141 | "9 30\n", 142 | "dtype: int64" 143 | ] 144 | }, 145 | "execution_count": 5, 146 | "metadata": {}, 147 | "output_type": "execute_result" 148 | } 149 | ], 150 | "source": [ 151 | "# 建立含索引的 series 物件\n", 152 | "myIndex = [3, 6, 9] # 也可以用字串當 index (key)\n", 153 | "price = [10, 20, 30]\n", 154 | "s = pd.Series(price, index=myIndex); s" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 6, 160 | "id": "4f15bab0", 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "data": { 165 | "text/plain": [ 166 | "1 7\n", 167 | "2 7\n", 168 | "3 7\n", 169 | "dtype: int64" 170 | ] 171 | }, 172 | "execution_count": 6, 173 | "metadata": {}, 174 | "output_type": "execute_result" 175 | } 176 | ], 177 | "source": [ 178 | "# 使用純量 (scalar) 建立 series\n", 179 | "s = pd.Series(7, index=[1,2,3]); s" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 7, 185 | "id": "865f4127", 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "data": { 190 | "text/plain": [ 191 | "apple 10\n", 192 | "orange 20\n", 193 | "pear 30\n", 194 | "dtype: int64" 195 | ] 196 | }, 197 | "execution_count": 7, 198 | "metadata": {}, 199 | "output_type": "execute_result" 200 | } 201 | ], 202 | "source": [ 203 | "# 列出 series 物件索引與值\n", 204 | "s = pd.Series([10, 20, 30], index=['apple', 'orange', 'pear']); s" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": 8, 210 | "id": "f1cade79", 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "data": { 215 | "text/plain": [ 216 | "array([10, 20, 30], dtype=int64)" 217 | ] 218 | }, 219 | "execution_count": 8, 220 | "metadata": {}, 221 | "output_type": "execute_result" 222 | } 223 | ], 224 | "source": [ 225 | "# 承上,印出所有的值\n", 226 | "s.values" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 9, 232 | "id": "c2c4f545", 233 | "metadata": {}, 234 | "outputs": [ 235 | { 236 | "data": { 237 | "text/plain": [ 238 | "Index(['apple', 'orange', 'pear'], dtype='object')" 239 | ] 240 | }, 241 | "execution_count": 9, 242 | "metadata": {}, 243 | "output_type": "execute_result" 244 | } 245 | ], 246 | "source": [ 247 | "# 承上,印出所有的索引編號\n", 248 | "s.index" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "id": "cba06627", 254 | "metadata": {}, 255 | "source": [ 256 | "# series 的運算" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 10, 262 | "id": "a6acc1dc", 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "# 將切片觀念用在 series 物件\n", 267 | "s = pd.Series([11, 22, 33, 44, 55, 66])" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 11, 273 | "id": "3bdb0430", 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "data": { 278 | "text/plain": [ 279 | "44" 280 | ] 281 | }, 282 | "execution_count": 11, 283 | "metadata": {}, 284 | "output_type": "execute_result" 285 | } 286 | ], 287 | "source": [ 288 | "s[3]" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 12, 294 | "id": "5bec234d", 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "data": { 299 | "text/plain": [ 300 | "2 33\n", 301 | "3 44\n", 302 | "dtype: int64" 303 | ] 304 | }, 305 | "execution_count": 12, 306 | "metadata": {}, 307 | "output_type": "execute_result" 308 | } 309 | ], 310 | "source": [ 311 | "s[2:4]" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 13, 317 | "id": "9c9bf8d0", 318 | "metadata": {}, 319 | "outputs": [ 320 | { 321 | "data": { 322 | "text/plain": [ 323 | "0 11\n", 324 | "1 22\n", 325 | "2 33\n", 326 | "dtype: int64" 327 | ] 328 | }, 329 | "execution_count": 13, 330 | "metadata": {}, 331 | "output_type": "execute_result" 332 | } 333 | ], 334 | "source": [ 335 | "s[:3]" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 14, 341 | "id": "e7a52244", 342 | "metadata": {}, 343 | "outputs": [ 344 | { 345 | "data": { 346 | "text/plain": [ 347 | "2 33\n", 348 | "3 44\n", 349 | "4 55\n", 350 | "5 66\n", 351 | "dtype: int64" 352 | ] 353 | }, 354 | "execution_count": 14, 355 | "metadata": {}, 356 | "output_type": "execute_result" 357 | } 358 | ], 359 | "source": [ 360 | "s[2:]" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": 15, 366 | "id": "a50f22f6", 367 | "metadata": {}, 368 | "outputs": [ 369 | { 370 | "data": { 371 | "text/plain": [ 372 | "5 66\n", 373 | "dtype: int64" 374 | ] 375 | }, 376 | "execution_count": 15, 377 | "metadata": {}, 378 | "output_type": "execute_result" 379 | } 380 | ], 381 | "source": [ 382 | "# series 只能使用 slicing,無法直接使用 -1 來取得最後一個元素的值\n", 383 | "s[-1:]" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": 16, 389 | "id": "a86388ce", 390 | "metadata": {}, 391 | "outputs": [ 392 | { 393 | "data": { 394 | "text/plain": [ 395 | "0 4\n", 396 | "1 6\n", 397 | "dtype: int64" 398 | ] 399 | }, 400 | "execution_count": 16, 401 | "metadata": {}, 402 | "output_type": "execute_result" 403 | } 404 | ], 405 | "source": [ 406 | "# series 物件相加\n", 407 | "x = pd.Series([1, 2])\n", 408 | "y = pd.Series([3, 4])\n", 409 | "x + y" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": 17, 415 | "id": "59d9585c", 416 | "metadata": {}, 417 | "outputs": [ 418 | { 419 | "data": { 420 | "text/plain": [ 421 | "0 3\n", 422 | "1 8\n", 423 | "dtype: int64" 424 | ] 425 | }, 426 | "execution_count": 17, 427 | "metadata": {}, 428 | "output_type": "execute_result" 429 | } 430 | ], 431 | "source": [ 432 | "# series 物件相乘\n", 433 | "x * y" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 18, 439 | "id": "29b5a452", 440 | "metadata": {}, 441 | "outputs": [ 442 | { 443 | "data": { 444 | "text/plain": [ 445 | "0 False\n", 446 | "1 False\n", 447 | "dtype: bool" 448 | ] 449 | }, 450 | "execution_count": 18, 451 | "metadata": {}, 452 | "output_type": "execute_result" 453 | } 454 | ], 455 | "source": [ 456 | "# 邏輯判斷: 大於 (可以嘗試其它判斷)\n", 457 | "x > y" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": 19, 463 | "id": "a6f85135", 464 | "metadata": {}, 465 | "outputs": [ 466 | { 467 | "data": { 468 | "text/plain": [ 469 | "apple 35\n", 470 | "orange 65\n", 471 | "pear 95\n", 472 | "dtype: int64" 473 | ] 474 | }, 475 | "execution_count": 19, 476 | "metadata": {}, 477 | "output_type": "execute_result" 478 | } 479 | ], 480 | "source": [ 481 | "# 擁有相同的 index (或 key),執行相加\n", 482 | "fruits = ['apple', 'orange', 'pear']\n", 483 | "x1 = pd.Series([20, 30, 40], index=fruits)\n", 484 | "x2 = pd.Series([15, 35, 55], index=fruits)\n", 485 | "x1 + x2" 486 | ] 487 | }, 488 | { 489 | "cell_type": "code", 490 | "execution_count": 20, 491 | "id": "235ba655", 492 | "metadata": {}, 493 | "outputs": [ 494 | { 495 | "data": { 496 | "text/plain": [ 497 | "apple 35.0\n", 498 | "banana NaN\n", 499 | "orange NaN\n", 500 | "pear 95.0\n", 501 | "dtype: float64" 502 | ] 503 | }, 504 | "execution_count": 20, 505 | "metadata": {}, 506 | "output_type": "execute_result" 507 | } 508 | ], 509 | "source": [ 510 | "# 擁有不同的 index (或 key),執行相加,不同索引之間的值相加,會填上 NaN (Not a Number)\n", 511 | "fruits1 = ['apple', 'orange', 'pear']\n", 512 | "fruits2 = ['apple', 'banana', 'pear']\n", 513 | "x1 = pd.Series([20, 30, 40], index=fruits1)\n", 514 | "x2 = pd.Series([15, 35, 55], index=fruits2)\n", 515 | "x1 + x2" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": 21, 521 | "id": "33e039f2", 522 | "metadata": {}, 523 | "outputs": [ 524 | { 525 | "data": { 526 | "text/plain": [ 527 | "apple 20\n", 528 | "orange 30\n", 529 | "pear 40\n", 530 | "dtype: int64" 531 | ] 532 | }, 533 | "execution_count": 21, 534 | "metadata": {}, 535 | "output_type": "execute_result" 536 | } 537 | ], 538 | "source": [ 539 | "# series 的索引是字串 (key),取得元素內容\n", 540 | "fruits = ['apple', 'orange', 'pear']\n", 541 | "x = pd.Series([20, 30, 40], index=fruits); x" 542 | ] 543 | }, 544 | { 545 | "cell_type": "code", 546 | "execution_count": 22, 547 | "id": "85d6c782", 548 | "metadata": {}, 549 | "outputs": [ 550 | { 551 | "data": { 552 | "text/plain": [ 553 | "20" 554 | ] 555 | }, 556 | "execution_count": 22, 557 | "metadata": {}, 558 | "output_type": "execute_result" 559 | } 560 | ], 561 | "source": [ 562 | "# 取得單一 key 的資料\n", 563 | "x['apple']" 564 | ] 565 | }, 566 | { 567 | "cell_type": "code", 568 | "execution_count": 23, 569 | "id": "fea13afd", 570 | "metadata": {}, 571 | "outputs": [ 572 | { 573 | "data": { 574 | "text/plain": [ 575 | "apple 20\n", 576 | "orange 30\n", 577 | "dtype: int64" 578 | ] 579 | }, 580 | "execution_count": 23, 581 | "metadata": {}, 582 | "output_type": "execute_result" 583 | } 584 | ], 585 | "source": [ 586 | "# 取得多個 key 的資料,要使用 list 包起來 (在 pandas 很常用)\n", 587 | "x[ ['apple', 'orange'] ]" 588 | ] 589 | }, 590 | { 591 | "cell_type": "code", 592 | "execution_count": 24, 593 | "id": "ac5c4688", 594 | "metadata": {}, 595 | "outputs": [ 596 | { 597 | "data": { 598 | "text/plain": [ 599 | "apple 400\n", 600 | "orange 900\n", 601 | "pear 1600\n", 602 | "dtype: int64" 603 | ] 604 | }, 605 | "execution_count": 24, 606 | "metadata": {}, 607 | "output_type": "execute_result" 608 | } 609 | ], 610 | "source": [ 611 | "# 每個元素都取得平方值\n", 612 | "np.square(x)" 613 | ] 614 | } 615 | ], 616 | "metadata": { 617 | "kernelspec": { 618 | "display_name": "python3@nlp", 619 | "language": "python", 620 | "name": "k_nlp" 621 | }, 622 | "language_info": { 623 | "codemirror_mode": { 624 | "name": "ipython", 625 | "version": 3 626 | }, 627 | "file_extension": ".py", 628 | "mimetype": "text/x-python", 629 | "name": "python", 630 | "nbconvert_exporter": "python", 631 | "pygments_lexer": "ipython3", 632 | "version": "3.10.11" 633 | } 634 | }, 635 | "nbformat": 4, 636 | "nbformat_minor": 5 637 | } 638 | -------------------------------------------------------------------------------- /2-1 Numpy 建立陣列.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "504249b4", 6 | "metadata": {}, 7 | "source": [ 8 | "# 安裝 Numpy\n", 9 | "若有語法不了解的地方,可以參考以下連結:\n", 10 | "- [NumPy Reference](https://numpy.org/doc/stable/reference/index.html)\n", 11 | "- [NumPy Tutorial](https://www.w3schools.com/python/numpy/default.asp)\n", 12 | "- [NumPy 教程](https://www.runoob.com/numpy/numpy-tutorial.html)" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "id": "dd315f58", 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "'''\n", 23 | "註:\n", 24 | "- 也可以在 conda 的虛擬環境中 (如 da) 裡面安裝 Numpy。\n", 25 | " - 指令: pip install -U numpy。\n", 26 | "- 如果直接安裝 pandas,會連同 numpy 一起安裝。\n", 27 | " - 指令: pip install -U pandas\n", 28 | "'''\n", 29 | "\n", 30 | "!pip install -U pandas" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 1, 36 | "id": "d26a51c3", 37 | "metadata": { 38 | "scrolled": true 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "# 匯入套件\n", 43 | "import numpy as np" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "id": "a4a4f546", 49 | "metadata": {}, 50 | "source": [ 51 | "建議遇到不確定如何使用的語法,可以經常查詢官方的文件\n", 52 | "- [doc 版本列表](https://numpy.org/doc/)\n", 53 | "- [當前 stable 版本](https://numpy.org/doc/stable/reference/index.html)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "id": "e5064307", 59 | "metadata": {}, 60 | "source": [ 61 | "# np.array(): 產生陣列 (Array)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "id": "dcab2a66", 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "# 語法查詢\n", 72 | "np.array?" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "id": "fd4babb6", 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "# 在未完成的語法後面按下 tab 鍵,會開啟自動完成功能 (語法提示) \n", 83 | "np.ar" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 2, 89 | "id": "d8a9f8f9", 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "name": "stdout", 94 | "output_type": "stream", 95 | "text": [ 96 | "66\n" 97 | ] 98 | } 99 | ], 100 | "source": [ 101 | "'''\n", 102 | "numpy.array(object, dtype=None, *, copy=True, order='K', subok=False, ndmin=0, like=None)\n", 103 | "'''\n", 104 | "# 0 維陣列\n", 105 | "a = np.array(33)\n", 106 | "print(a + a)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 3, 112 | "id": "4da11d07", 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "data": { 117 | "text/plain": [ 118 | "array([0, 1, 2, 3])" 119 | ] 120 | }, 121 | "execution_count": 3, 122 | "metadata": {}, 123 | "output_type": "execute_result" 124 | } 125 | ], 126 | "source": [ 127 | "# 一維陣列\n", 128 | "a = np.array([0,1,2,3])\n", 129 | "'''\n", 130 | "註:\n", 131 | "np.array() 裡面的 [0,1,2,3],是 Python 的 List (串列) 變數,\n", 132 | "而使用 np.array([0,1,2,3]) 賦值之後的 a,\n", 133 | "它是真正的 Array (陣列)\n", 134 | "'''\n", 135 | "\n", 136 | "# 在 cell 裡面,程式碼最後一行是變數名稱時,可以自動預覽結果\n", 137 | "a" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 4, 143 | "id": "7edcd067", 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "data": { 148 | "text/plain": [ 149 | "array([0, 1, 2, 3])" 150 | ] 151 | }, 152 | "execution_count": 4, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "# 如果希望宣告變數後,直接預覽變數,可以用「;」隔開,寫在同一行:\n", 159 | "a = np.array([0,1,2,3]); a" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 5, 165 | "id": "64ba5ae9", 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "data": { 170 | "text/plain": [ 171 | "array([[0, 1, 2],\n", 172 | " [3, 4, 5]])" 173 | ] 174 | }, 175 | "execution_count": 5, 176 | "metadata": {}, 177 | "output_type": "execute_result" 178 | } 179 | ], 180 | "source": [ 181 | "# 二維陣列\n", 182 | "'''\n", 183 | "長這個樣子:\n", 184 | "arr = [\n", 185 | " [0,1,2], \n", 186 | " [3,4,5]\n", 187 | "]\n", 188 | "'''\n", 189 | "a = np.array([[0,1,2], [3,4,5]]); a" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "id": "63bb8194", 195 | "metadata": {}, 196 | "source": [ 197 | "# Numpy 的 shape, ndim, dtype" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 6, 203 | "id": "472bf450", 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "data": { 208 | "text/plain": [ 209 | "(2, 3)" 210 | ] 211 | }, 212 | "execution_count": 6, 213 | "metadata": {}, 214 | "output_type": "execute_result" 215 | } 216 | ], 217 | "source": [ 218 | "# shape (形狀,以 tuple 格式呈現)\n", 219 | "'''\n", 220 | "(2, 3) 代表 2 維陣列:\n", 221 | "- 有 2 列,每 1 列有 3 個元素,\n", 222 | "- 也可看成幾個 row、幾個 column\n", 223 | "'''\n", 224 | "a.shape" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 7, 230 | "id": "acc2e418", 231 | "metadata": {}, 232 | "outputs": [ 233 | { 234 | "data": { 235 | "text/plain": [ 236 | "2" 237 | ] 238 | }, 239 | "execution_count": 7, 240 | "metadata": {}, 241 | "output_type": "execute_result" 242 | } 243 | ], 244 | "source": [ 245 | "# ndim (n-dimensions,維度)\n", 246 | "a.ndim" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 8, 252 | "id": "726c08f5", 253 | "metadata": {}, 254 | "outputs": [ 255 | { 256 | "data": { 257 | "text/plain": [ 258 | "dtype('int32')" 259 | ] 260 | }, 261 | "execution_count": 8, 262 | "metadata": {}, 263 | "output_type": "execute_result" 264 | } 265 | ], 266 | "source": [ 267 | "# dtype (data-type,陣列當中每一個元素的屬性)\n", 268 | "a.dtype" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 9, 274 | "id": "5fc72a7b", 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "data": { 279 | "text/plain": [ 280 | "(2, 3)" 281 | ] 282 | }, 283 | "execution_count": 9, 284 | "metadata": {}, 285 | "output_type": "execute_result" 286 | } 287 | ], 288 | "source": [ 289 | "# 1 維陣列的 shape 長什麼樣子?\n", 290 | "'''\n", 291 | "說明:\n", 292 | "- (4) 代表一個數字\n", 293 | "- (4,) 代表一個 tuple (1 維陣列)\n", 294 | "- (1, 4) 代表 2 維陣列,裡面只有 1 維的資料,該維有 4 個元素,例如 np.array([[1,2,3,4]])\n", 295 | "'''\n", 296 | "a.shape" 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "id": "bc34ce34", 302 | "metadata": {}, 303 | "source": [ 304 | "# np.arange(): 從數值範圍來建立陣列" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 10, 310 | "id": "0387c714", 311 | "metadata": {}, 312 | "outputs": [ 313 | { 314 | "data": { 315 | "text/plain": [ 316 | "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])" 317 | ] 318 | }, 319 | "execution_count": 10, 320 | "metadata": {}, 321 | "output_type": "execute_result" 322 | } 323 | ], 324 | "source": [ 325 | "# 跟 range() 用法一樣\n", 326 | "'''\n", 327 | "用法:\n", 328 | "numpy.arange([start, ]stop, [step, ]dtype=None, *, like=None)\n", 329 | "'''\n", 330 | "a = np.arange(10); a" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 11, 336 | "id": "d1095578", 337 | "metadata": {}, 338 | "outputs": [ 339 | { 340 | "data": { 341 | "text/plain": [ 342 | "array([1, 2, 3, 4, 5, 6, 7, 8, 9])" 343 | ] 344 | }, 345 | "execution_count": 11, 346 | "metadata": {}, 347 | "output_type": "execute_result" 348 | } 349 | ], 350 | "source": [ 351 | "# 取得陣列 [1, 2, 3, 4, 5, 6, 7, 8, 9]\n", 352 | "a = np.arange(1, 10); a" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": 12, 358 | "id": "090ce43e", 359 | "metadata": {}, 360 | "outputs": [ 361 | { 362 | "data": { 363 | "text/plain": [ 364 | "array([1, 4, 7])" 365 | ] 366 | }, 367 | "execution_count": 12, 368 | "metadata": {}, 369 | "output_type": "execute_result" 370 | } 371 | ], 372 | "source": [ 373 | "# 1 ~ 10 之間,每隔 3 個元素,加入資料到陣列: [1, 4, 7]\n", 374 | "a = np.arange(1, 10, 3); a" 375 | ] 376 | }, 377 | { 378 | "cell_type": "markdown", 379 | "id": "5cc9f625", 380 | "metadata": {}, 381 | "source": [ 382 | "# Numpy的資料型態 (data-type)" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": 13, 388 | "id": "f06a1e6a", 389 | "metadata": {}, 390 | "outputs": [ 391 | { 392 | "data": { 393 | "text/plain": [ 394 | "dtype('int32')" 395 | ] 396 | }, 397 | "execution_count": 13, 398 | "metadata": {}, 399 | "output_type": "execute_result" 400 | } 401 | ], 402 | "source": [ 403 | "# 自動判斷資料型態\n", 404 | "a = np.array([1,2,3,4]); a.dtype" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": 14, 410 | "id": "6ff957e9", 411 | "metadata": {}, 412 | "outputs": [ 413 | { 414 | "data": { 415 | "text/plain": [ 416 | "dtype('float64')" 417 | ] 418 | }, 419 | "execution_count": 14, 420 | "metadata": {}, 421 | "output_type": "execute_result" 422 | } 423 | ], 424 | "source": [ 425 | "# 自動判斷資料型態 (其中一個元素變成浮點數,dtype 會自動轉型)\n", 426 | "a = np.array([1., 2, 3, 4]); a.dtype" 427 | ] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "execution_count": 15, 432 | "id": "fa37e289", 433 | "metadata": {}, 434 | "outputs": [ 435 | { 436 | "data": { 437 | "text/plain": [ 438 | "array([1., 2., 3., 4.])" 439 | ] 440 | }, 441 | "execution_count": 15, 442 | "metadata": {}, 443 | "output_type": "execute_result" 444 | } 445 | ], 446 | "source": [ 447 | "# 指定陣列每一個資料的資料型態 (指定 float64)\n", 448 | "a = np.array([1, 2, 3, 4], dtype='float64')\n", 449 | "a\n", 450 | "# a.dtype" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": 17, 456 | "id": "ae230475", 457 | "metadata": {}, 458 | "outputs": [ 459 | { 460 | "data": { 461 | "text/plain": [ 462 | "dtype('bool')" 463 | ] 464 | }, 465 | "execution_count": 17, 466 | "metadata": {}, 467 | "output_type": "execute_result" 468 | } 469 | ], 470 | "source": [ 471 | "# 布林值用於 numpy\n", 472 | "a = np.array([True, True, False]); a.dtype" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": 18, 478 | "id": "42f1e8a9", 479 | "metadata": {}, 480 | "outputs": [ 481 | { 482 | "data": { 483 | "text/plain": [ 484 | "array([1, 2, 3])" 485 | ] 486 | }, 487 | "execution_count": 18, 488 | "metadata": {}, 489 | "output_type": "execute_result" 490 | } 491 | ], 492 | "source": [ 493 | "# 類型轉換 (float 轉成 int)\n", 494 | "a = np.array([1, 2, 3.14]) # dtype('float64')\n", 495 | "a_ = a.astype(int); a_" 496 | ] 497 | }, 498 | { 499 | "cell_type": "markdown", 500 | "id": "25169fab", 501 | "metadata": {}, 502 | "source": [ 503 | "# Array creation routine 用於創建多維陣列的函數" 504 | ] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "execution_count": 19, 509 | "id": "0ebf7f90", 510 | "metadata": {}, 511 | "outputs": [ 512 | { 513 | "data": { 514 | "text/plain": [ 515 | "array([1., 5., 9.])" 516 | ] 517 | }, 518 | "execution_count": 19, 519 | "metadata": {}, 520 | "output_type": "execute_result" 521 | } 522 | ], 523 | "source": [ 524 | "# np.linspace(): 建立等距陣列的 1 維陣列\n", 525 | "'''\n", 526 | "np.linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None)\n", 527 | "'''\n", 528 | "a = np.linspace(1, 9, num=3); a" 529 | ] 530 | }, 531 | { 532 | "cell_type": "code", 533 | "execution_count": 20, 534 | "id": "f801ae16", 535 | "metadata": {}, 536 | "outputs": [ 537 | { 538 | "data": { 539 | "text/plain": [ 540 | "array([[1., 1., 1.],\n", 541 | " [1., 1., 1.]])" 542 | ] 543 | }, 544 | "execution_count": 20, 545 | "metadata": {}, 546 | "output_type": "execute_result" 547 | } 548 | ], 549 | "source": [ 550 | "# np.ones(): 建立填滿 1 的陣列,透過 shape 指定幾維\n", 551 | "'''\n", 552 | "numpy.ones(shape, dtype=None, order='C', *, like=None)[source]\n", 553 | "'''\n", 554 | "a = np.ones((2,3)); a" 555 | ] 556 | }, 557 | { 558 | "cell_type": "code", 559 | "execution_count": 21, 560 | "id": "63842f47", 561 | "metadata": {}, 562 | "outputs": [ 563 | { 564 | "data": { 565 | "text/plain": [ 566 | "array([0., 0., 0., 0., 0.])" 567 | ] 568 | }, 569 | "execution_count": 21, 570 | "metadata": {}, 571 | "output_type": "execute_result" 572 | } 573 | ], 574 | "source": [ 575 | "# np.zeros(): 建立填滿 0 的陣列,透過 shape 指定幾維 \n", 576 | "'''\n", 577 | "numpy.zeros(shape, dtype=float, order='C', *, like=None)\n", 578 | "'''\n", 579 | "a = np.zeros((5,)); a" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": 23, 585 | "id": "e4e8d90f", 586 | "metadata": {}, 587 | "outputs": [ 588 | { 589 | "data": { 590 | "text/plain": [ 591 | "array([[10, 10, 10, 10],\n", 592 | " [10, 10, 10, 10],\n", 593 | " [10, 10, 10, 10]])" 594 | ] 595 | }, 596 | "execution_count": 23, 597 | "metadata": {}, 598 | "output_type": "execute_result" 599 | } 600 | ], 601 | "source": [ 602 | "# np.full(): 建立填滿 fill_value 的陣列,透過 shape 指定幾維 \n", 603 | "'''\n", 604 | "numpy.full(shape, fill_value, dtype=None, order='C', *, like=None)\n", 605 | "'''\n", 606 | "a = np.full((3,4), 10); a" 607 | ] 608 | } 609 | ], 610 | "metadata": { 611 | "kernelspec": { 612 | "display_name": "python3@nlp", 613 | "language": "python", 614 | "name": "k_nlp" 615 | }, 616 | "language_info": { 617 | "codemirror_mode": { 618 | "name": "ipython", 619 | "version": 3 620 | }, 621 | "file_extension": ".py", 622 | "mimetype": "text/x-python", 623 | "name": "python", 624 | "nbconvert_exporter": "python", 625 | "pygments_lexer": "ipython3", 626 | "version": "3.10.11" 627 | } 628 | }, 629 | "nbformat": 4, 630 | "nbformat_minor": 5 631 | } 632 | -------------------------------------------------------------------------------- /2-2 Numpy 一維陣列.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "f6a59d5a", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import numpy as np" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "1c83eb2c", 16 | "metadata": {}, 17 | "source": [ 18 | "# 一維陣列的四則運算" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "id": "7844ce81", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "# 初始化陣列\n", 29 | "x = np.array([2, 4, 6, 8, 10])\n", 30 | "y = np.array([10, 8, 6, 4, 2])" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "id": "1b490ed1", 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "data": { 41 | "text/plain": [ 42 | "array([ 6, 8, 10, 12, 14])" 43 | ] 44 | }, 45 | "execution_count": 3, 46 | "metadata": {}, 47 | "output_type": "execute_result" 48 | } 49 | ], 50 | "source": [ 51 | "# 整數與陣列: 加法\n", 52 | "a = x + 4; a" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 4, 58 | "id": "98cbe863", 59 | "metadata": {}, 60 | "outputs": [ 61 | { 62 | "data": { 63 | "text/plain": [ 64 | "array([0, 2, 4, 6, 8])" 65 | ] 66 | }, 67 | "execution_count": 4, 68 | "metadata": {}, 69 | "output_type": "execute_result" 70 | } 71 | ], 72 | "source": [ 73 | "# 整數與陣列: 減法\n", 74 | "a = x - 2; a" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 5, 80 | "id": "eab9a606", 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": [ 86 | "array([ 4, 8, 12, 16, 20])" 87 | ] 88 | }, 89 | "execution_count": 5, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | } 93 | ], 94 | "source": [ 95 | "# 整數與陣列: 乘法\n", 96 | "a = x * 2; a" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 6, 102 | "id": "a0ec1b0e", 103 | "metadata": {}, 104 | "outputs": [ 105 | { 106 | "data": { 107 | "text/plain": [ 108 | "array([1., 2., 3., 4., 5.])" 109 | ] 110 | }, 111 | "execution_count": 6, 112 | "metadata": {}, 113 | "output_type": "execute_result" 114 | } 115 | ], 116 | "source": [ 117 | "# 整數與陣列: 除法\n", 118 | "a = x / 2; a" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 7, 124 | "id": "18ec130c", 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "data": { 129 | "text/plain": [ 130 | "array([12, 12, 12, 12, 12])" 131 | ] 132 | }, 133 | "execution_count": 7, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "# 陣列加法運算\n", 140 | "a = x + y; a" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 8, 146 | "id": "a1b6013d", 147 | "metadata": {}, 148 | "outputs": [ 149 | { 150 | "data": { 151 | "text/plain": [ 152 | "array([-8, -4, 0, 4, 8])" 153 | ] 154 | }, 155 | "execution_count": 8, 156 | "metadata": {}, 157 | "output_type": "execute_result" 158 | } 159 | ], 160 | "source": [ 161 | "# 陣列減法運算\n", 162 | "a = x - y; a" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 9, 168 | "id": "bb4790ff", 169 | "metadata": {}, 170 | "outputs": [ 171 | { 172 | "data": { 173 | "text/plain": [ 174 | "array([20, 32, 36, 32, 20])" 175 | ] 176 | }, 177 | "execution_count": 9, 178 | "metadata": {}, 179 | "output_type": "execute_result" 180 | } 181 | ], 182 | "source": [ 183 | "# 陣列乘法運算\n", 184 | "a = x * y; a" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 10, 190 | "id": "93a342c9", 191 | "metadata": {}, 192 | "outputs": [ 193 | { 194 | "data": { 195 | "text/plain": [ 196 | "array([0.2, 0.5, 1. , 2. , 5. ])" 197 | ] 198 | }, 199 | "execution_count": 10, 200 | "metadata": {}, 201 | "output_type": "execute_result" 202 | } 203 | ], 204 | "source": [ 205 | "# 陣列除法運算\n", 206 | "a = x / y; a" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 11, 212 | "id": "22e4e358", 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "# 陣列元素平方\n", 217 | "'''\n", 218 | "也可以使用\n", 219 | "A = np.square(x)\n", 220 | "B = np.square(y)\n", 221 | "'''\n", 222 | "A = x ** 2\n", 223 | "B = y ** 2" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 12, 229 | "id": "d81de89c", 230 | "metadata": {}, 231 | "outputs": [ 232 | { 233 | "data": { 234 | "text/plain": [ 235 | "array([ 4, 16, 36, 64, 100])" 236 | ] 237 | }, 238 | "execution_count": 12, 239 | "metadata": {}, 240 | "output_type": "execute_result" 241 | } 242 | ], 243 | "source": [ 244 | "A" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 13, 250 | "id": "0a96856f", 251 | "metadata": {}, 252 | "outputs": [ 253 | { 254 | "data": { 255 | "text/plain": [ 256 | "array([100, 64, 36, 16, 4])" 257 | ] 258 | }, 259 | "execution_count": 13, 260 | "metadata": {}, 261 | "output_type": "execute_result" 262 | } 263 | ], 264 | "source": [ 265 | "B" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 14, 271 | "id": "9fe67913", 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [ 275 | "# 陣列元素開根號\n", 276 | "A = np.sqrt(x)\n", 277 | "B = np.sqrt(y)" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 15, 283 | "id": "f988778e", 284 | "metadata": {}, 285 | "outputs": [ 286 | { 287 | "data": { 288 | "text/plain": [ 289 | "array([1.41421356, 2. , 2.44948974, 2.82842712, 3.16227766])" 290 | ] 291 | }, 292 | "execution_count": 15, 293 | "metadata": {}, 294 | "output_type": "execute_result" 295 | } 296 | ], 297 | "source": [ 298 | "A" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 16, 304 | "id": "058f7a3f", 305 | "metadata": {}, 306 | "outputs": [ 307 | { 308 | "data": { 309 | "text/plain": [ 310 | "array([3.16227766, 2.82842712, 2.44948974, 2. , 1.41421356])" 311 | ] 312 | }, 313 | "execution_count": 16, 314 | "metadata": {}, 315 | "output_type": "execute_result" 316 | } 317 | ], 318 | "source": [ 319 | "B" 320 | ] 321 | }, 322 | { 323 | "cell_type": "markdown", 324 | "id": "c4104ffc", 325 | "metadata": {}, 326 | "source": [ 327 | "# 關係運算子運算" 328 | ] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "id": "d598c55b", 333 | "metadata": {}, 334 | "source": [ 335 | "以 大於 (>)、小於 (<)、等於 (==) 為例" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 17, 341 | "id": "257f9a40", 342 | "metadata": {}, 343 | "outputs": [ 344 | { 345 | "data": { 346 | "text/plain": [ 347 | "array([False, False, False, True, True])" 348 | ] 349 | }, 350 | "execution_count": 17, 351 | "metadata": {}, 352 | "output_type": "execute_result" 353 | } 354 | ], 355 | "source": [ 356 | "# 大於\n", 357 | "a = x > y; a" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": 18, 363 | "id": "cb35bf22", 364 | "metadata": {}, 365 | "outputs": [ 366 | { 367 | "data": { 368 | "text/plain": [ 369 | "array([ True, True, False, False, False])" 370 | ] 371 | }, 372 | "execution_count": 18, 373 | "metadata": {}, 374 | "output_type": "execute_result" 375 | } 376 | ], 377 | "source": [ 378 | "# 小於\n", 379 | "a = x < y; a" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": 19, 385 | "id": "f13f881f", 386 | "metadata": {}, 387 | "outputs": [ 388 | { 389 | "data": { 390 | "text/plain": [ 391 | "array([False, False, True, False, False])" 392 | ] 393 | }, 394 | "execution_count": 19, 395 | "metadata": {}, 396 | "output_type": "execute_result" 397 | } 398 | ], 399 | "source": [ 400 | "# 等於\n", 401 | "a = x == y; a" 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "id": "fe1f5465", 407 | "metadata": {}, 408 | "source": [ 409 | "# 陣列索引與切片 (Indexing & Slicing)" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": 20, 415 | "id": "28fc397b", 416 | "metadata": {}, 417 | "outputs": [ 418 | { 419 | "data": { 420 | "text/plain": [ 421 | "2" 422 | ] 423 | }, 424 | "execution_count": 20, 425 | "metadata": {}, 426 | "output_type": "execute_result" 427 | } 428 | ], 429 | "source": [ 430 | "# 指定索引\n", 431 | "x[0]" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": 21, 437 | "id": "cea337c2", 438 | "metadata": {}, 439 | "outputs": [ 440 | { 441 | "data": { 442 | "text/plain": [ 443 | "array([2, 4])" 444 | ] 445 | }, 446 | "execution_count": 21, 447 | "metadata": {}, 448 | "output_type": "execute_result" 449 | } 450 | ], 451 | "source": [ 452 | "# 切片 (範圍: [start, end-1, step])\n", 453 | "x[0:2]" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 22, 459 | "id": "e44f4023", 460 | "metadata": {}, 461 | "outputs": [ 462 | { 463 | "data": { 464 | "text/plain": [ 465 | "array([ 2, 6, 10])" 466 | ] 467 | }, 468 | "execution_count": 22, 469 | "metadata": {}, 470 | "output_type": "execute_result" 471 | } 472 | ], 473 | "source": [ 474 | "# 切片,指定每幾步算一次\n", 475 | "'''\n", 476 | "這樣也可以\n", 477 | "x[::2]\n", 478 | "'''\n", 479 | "x[0:5:2]" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": 23, 485 | "id": "833125db", 486 | "metadata": {}, 487 | "outputs": [ 488 | { 489 | "data": { 490 | "text/plain": [ 491 | "10" 492 | ] 493 | }, 494 | "execution_count": 23, 495 | "metadata": {}, 496 | "output_type": "execute_result" 497 | } 498 | ], 499 | "source": [ 500 | "# 切片 (使用負號)\n", 501 | "x[-1]" 502 | ] 503 | }, 504 | { 505 | "cell_type": "code", 506 | "execution_count": 24, 507 | "id": "1c063cf6", 508 | "metadata": {}, 509 | "outputs": [ 510 | { 511 | "data": { 512 | "text/plain": [ 513 | "array([6, 8])" 514 | ] 515 | }, 516 | "execution_count": 24, 517 | "metadata": {}, 518 | "output_type": "execute_result" 519 | } 520 | ], 521 | "source": [ 522 | "# 切片 (範圍: [start, end-1, step])\n", 523 | "x[-3:-1]" 524 | ] 525 | }, 526 | { 527 | "cell_type": "markdown", 528 | "id": "627bb997", 529 | "metadata": {}, 530 | "source": [ 531 | "# 陣列的結合與加入" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": 25, 537 | "id": "9d140825", 538 | "metadata": {}, 539 | "outputs": [ 540 | { 541 | "data": { 542 | "text/plain": [ 543 | "array([ 2, 4, 6, 8, 10, 10, 8, 6, 4, 2])" 544 | ] 545 | }, 546 | "execution_count": 25, 547 | "metadata": {}, 548 | "output_type": "execute_result" 549 | } 550 | ], 551 | "source": [ 552 | "# 陣列結合\n", 553 | "'''\n", 554 | "numpy.concatenate((a1, a2, ...), axis=0, out=None, dtype=None, casting=\"same_kind\")\n", 555 | "'''\n", 556 | "z = np.concatenate((x, y)); z" 557 | ] 558 | }, 559 | { 560 | "cell_type": "code", 561 | "execution_count": 26, 562 | "id": "027658bd", 563 | "metadata": {}, 564 | "outputs": [ 565 | { 566 | "data": { 567 | "text/plain": [ 568 | "array([ 2, 4, 6, 8, 10, 12, 14, 16])" 569 | ] 570 | }, 571 | "execution_count": 26, 572 | "metadata": {}, 573 | "output_type": "execute_result" 574 | } 575 | ], 576 | "source": [ 577 | "# 將陣列元素加入其它陣列\n", 578 | "z = np.concatenate((x, [12, 14, 16])); z" 579 | ] 580 | }, 581 | { 582 | "cell_type": "markdown", 583 | "id": "f4736b3a", 584 | "metadata": {}, 585 | "source": [ 586 | "# 在陣列指定索引插入元素 insert()" 587 | ] 588 | }, 589 | { 590 | "cell_type": "markdown", 591 | "id": "9510b3fc", 592 | "metadata": {}, 593 | "source": [ 594 | "原本 x 是 [2,4,6,8,10]" 595 | ] 596 | }, 597 | { 598 | "cell_type": "code", 599 | "execution_count": 27, 600 | "id": "8dce528a", 601 | "metadata": {}, 602 | "outputs": [ 603 | { 604 | "data": { 605 | "text/plain": [ 606 | "array([ 2, 4, 9, 6, 8, 10])" 607 | ] 608 | }, 609 | "execution_count": 27, 610 | "metadata": {}, 611 | "output_type": "execute_result" 612 | } 613 | ], 614 | "source": [ 615 | "# 在陣列指定索引 2 插入元素 9\n", 616 | "'''\n", 617 | "numpy.insert(arr, obj, values, axis=None)\n", 618 | "'''\n", 619 | "z = np.insert(x, 2, 9); z" 620 | ] 621 | }, 622 | { 623 | "cell_type": "code", 624 | "execution_count": 28, 625 | "id": "2dd56e47", 626 | "metadata": {}, 627 | "outputs": [ 628 | { 629 | "data": { 630 | "text/plain": [ 631 | "array([ 2, 7, 4, 6, 9, 8, 10])" 632 | ] 633 | }, 634 | "execution_count": 28, 635 | "metadata": {}, 636 | "output_type": "execute_result" 637 | } 638 | ], 639 | "source": [ 640 | "# 在陣列指定索引 1 跟 3,分別插入元素 7 跟 9\n", 641 | "z = np.insert(x, [1,3], [7,9]); z" 642 | ] 643 | }, 644 | { 645 | "cell_type": "markdown", 646 | "id": "63a77f53", 647 | "metadata": {}, 648 | "source": [ 649 | "# 刪除指定索引的陣列元素 delete()" 650 | ] 651 | }, 652 | { 653 | "cell_type": "code", 654 | "execution_count": 29, 655 | "id": "c8fd4758", 656 | "metadata": {}, 657 | "outputs": [ 658 | { 659 | "data": { 660 | "text/plain": [ 661 | "array([ 2, 6, 8, 10])" 662 | ] 663 | }, 664 | "execution_count": 29, 665 | "metadata": {}, 666 | "output_type": "execute_result" 667 | } 668 | ], 669 | "source": [ 670 | "# 刪除索引 1 的元素\n", 671 | "'''\n", 672 | "numpy.delete(arr, obj, axis=None)\n", 673 | "'''\n", 674 | "z = np.delete(x, 1); z" 675 | ] 676 | }, 677 | { 678 | "cell_type": "code", 679 | "execution_count": 30, 680 | "id": "59143742", 681 | "metadata": {}, 682 | "outputs": [ 683 | { 684 | "data": { 685 | "text/plain": [ 686 | "array([ 2, 6, 10])" 687 | ] 688 | }, 689 | "execution_count": 30, 690 | "metadata": {}, 691 | "output_type": "execute_result" 692 | } 693 | ], 694 | "source": [ 695 | "# 刪除索引 1 和 3 所放置的元素\n", 696 | "z = np.delete(x, [1, 3]); z" 697 | ] 698 | }, 699 | { 700 | "cell_type": "markdown", 701 | "id": "c49578ae", 702 | "metadata": {}, 703 | "source": [ 704 | "# 向量內部元素總和 sum()" 705 | ] 706 | }, 707 | { 708 | "cell_type": "code", 709 | "execution_count": 31, 710 | "id": "2e5a8668", 711 | "metadata": {}, 712 | "outputs": [ 713 | { 714 | "data": { 715 | "text/plain": [ 716 | "16" 717 | ] 718 | }, 719 | "execution_count": 31, 720 | "metadata": {}, 721 | "output_type": "execute_result" 722 | } 723 | ], 724 | "source": [ 725 | "# 計算元素總和\n", 726 | "'''\n", 727 | "numpy.sum(a, axis=None, dtype=None, ...)\n", 728 | "'''\n", 729 | "c = np.array([1, 3, 5, 7])\n", 730 | "d = np.sum(c); d" 731 | ] 732 | }, 733 | { 734 | "cell_type": "markdown", 735 | "id": "4c0e34f2", 736 | "metadata": {}, 737 | "source": [ 738 | "# 向量內積 (inner product)" 739 | ] 740 | }, 741 | { 742 | "cell_type": "code", 743 | "execution_count": null, 744 | "id": "df403ab9", 745 | "metadata": {}, 746 | "outputs": [], 747 | "source": [ 748 | "'''\n", 749 | "內積在機器學習的領域,是很重要的概念,\n", 750 | "無論是取得向量特徵,或是向量間的關係,\n", 751 | "內積都扮演重要的角色\n", 752 | "\n", 753 | "numpy.inner(a, b, /)\n", 754 | "'''\n", 755 | "a = np.array([4,5,1,3,4])\n", 756 | "b = np.array([5,3,5,4,2])\n", 757 | "\n", 758 | "'''\n", 759 | "計算過程:\n", 760 | "4*5 + 5*3 + 1*5 + 3*4 + 4*2 = 60\n", 761 | "'''\n", 762 | "\n", 763 | "c = np.inner(a, b); c" 764 | ] 765 | }, 766 | { 767 | "cell_type": "markdown", 768 | "id": "3ec3190a", 769 | "metadata": {}, 770 | "source": [ 771 | "# 牛刀小試: 餘弦相似度 (Cosine Similarity)\n", 772 | "參考網頁: [https://zh.wikipedia.org/wiki/余弦相似性](https://zh.wikipedia.org/wiki/余弦相似性)\n", 773 | "![餘弦相似性](https://wikimedia.org/api/rest_v1/media/math/render/svg/2a8c50526e2cc7aa837477be87eff1ea703f9dec)\n", 774 | "\n", 775 | "\n", 776 | "假設某 A 的向量為 \\[4,5,1,3,4\\],某 B 的向量為 \\[5,3,5,4,2\\],餘弦相似度的算法為:\n", 777 | "![Cosine Similarity](https://i.imgur.com/NOQBXuN.png)\n", 778 | "\n" 779 | ] 780 | } 781 | ], 782 | "metadata": { 783 | "kernelspec": { 784 | "display_name": "python3@nlp", 785 | "language": "python", 786 | "name": "k_nlp" 787 | }, 788 | "language_info": { 789 | "codemirror_mode": { 790 | "name": "ipython", 791 | "version": 3 792 | }, 793 | "file_extension": ".py", 794 | "mimetype": "text/x-python", 795 | "name": "python", 796 | "nbconvert_exporter": "python", 797 | "pygments_lexer": "ipython3", 798 | "version": "3.10.11" 799 | } 800 | }, 801 | "nbformat": 4, 802 | "nbformat_minor": 5 803 | } 804 | -------------------------------------------------------------------------------- /2-3 Numpy 二維陣列.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "33c44d85", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import numpy as np" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "id": "5500ac5b", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "# 建立二維陣列\n", 21 | "x = np.array([[1, 2], [3, 4]])\n", 22 | "y = np.array([[5, 6], [7, 8]])" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "e76a1f30", 28 | "metadata": {}, 29 | "source": [ 30 | "# 二維陣列相對位置與四則運算" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "id": "a771ec9b", 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "data": { 41 | "text/plain": [ 42 | "array([[11, 12],\n", 43 | " [13, 14]])" 44 | ] 45 | }, 46 | "execution_count": 3, 47 | "metadata": {}, 48 | "output_type": "execute_result" 49 | } 50 | ], 51 | "source": [ 52 | "# 與整數的加法運算\n", 53 | "a = x + 10; a" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 4, 59 | "id": "4edb56db", 60 | "metadata": {}, 61 | "outputs": [ 62 | { 63 | "data": { 64 | "text/plain": [ 65 | "array([[ 6, 8],\n", 66 | " [10, 12]])" 67 | ] 68 | }, 69 | "execution_count": 4, 70 | "metadata": {}, 71 | "output_type": "execute_result" 72 | } 73 | ], 74 | "source": [ 75 | "# 加法運算\n", 76 | "a = x + y; a" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 5, 82 | "id": "01781ceb", 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/plain": [ 88 | "array([[ 5, 12],\n", 89 | " [21, 32]])" 90 | ] 91 | }, 92 | "execution_count": 5, 93 | "metadata": {}, 94 | "output_type": "execute_result" 95 | } 96 | ], 97 | "source": [ 98 | "# 相對位置乘法運算\n", 99 | "'''\n", 100 | "註: 這裡是陣列運算,與矩陣 (matrix) 運算是不一樣的作法\n", 101 | "'''\n", 102 | "a = x * y; a" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 6, 108 | "id": "f7508a11", 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "data": { 113 | "text/plain": [ 114 | "array([[0.2 , 0.33333333],\n", 115 | " [0.42857143, 0.5 ]])" 116 | ] 117 | }, 118 | "execution_count": 6, 119 | "metadata": {}, 120 | "output_type": "execute_result" 121 | } 122 | ], 123 | "source": [ 124 | "# 相對位置除法運算\n", 125 | "a = x / y; a" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "id": "26745b22", 131 | "metadata": {}, 132 | "source": [ 133 | "# 關係運算子運算" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 7, 139 | "id": "1173e62d", 140 | "metadata": {}, 141 | "outputs": [ 142 | { 143 | "data": { 144 | "text/plain": [ 145 | "array([[False, False],\n", 146 | " [False, False]])" 147 | ] 148 | }, 149 | "execution_count": 7, 150 | "metadata": {}, 151 | "output_type": "execute_result" 152 | } 153 | ], 154 | "source": [ 155 | "# 大於\n", 156 | "a = x > y; a" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 8, 162 | "id": "2899833e", 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "data": { 167 | "text/plain": [ 168 | "array([[ True, True],\n", 169 | " [ True, True]])" 170 | ] 171 | }, 172 | "execution_count": 8, 173 | "metadata": {}, 174 | "output_type": "execute_result" 175 | } 176 | ], 177 | "source": [ 178 | "# 小於\n", 179 | "a = x < y; a" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "id": "d1f945f3", 185 | "metadata": {}, 186 | "source": [ 187 | "# 取得與設定二維陣列元素 (Indexing)" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 9, 193 | "id": "81962580", 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "data": { 198 | "text/plain": [ 199 | "2" 200 | ] 201 | }, 202 | "execution_count": 9, 203 | "metadata": {}, 204 | "output_type": "execute_result" 205 | } 206 | ], 207 | "source": [ 208 | "# 取得二維陣列某元素內容\n", 209 | "x[0, 1] # row = 0, col = 1" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 10, 215 | "id": "e8b7c110", 216 | "metadata": {}, 217 | "outputs": [ 218 | { 219 | "data": { 220 | "text/plain": [ 221 | "array([[ 5, 5, 6, 6],\n", 222 | " [ 3, 3, 11, 2]])" 223 | ] 224 | }, 225 | "execution_count": 10, 226 | "metadata": {}, 227 | "output_type": "execute_result" 228 | } 229 | ], 230 | "source": [ 231 | "# 設定二維陣列某元素內容\n", 232 | "tmp = np.array([[5, 5, 6, 6], [3, 3, 1, 2]])\n", 233 | "tmp[1, 2] = 11; tmp" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 11, 239 | "id": "84d457c7", 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "data": { 244 | "text/plain": [ 245 | "array([1, 2])" 246 | ] 247 | }, 248 | "execution_count": 11, 249 | "metadata": {}, 250 | "output_type": "execute_result" 251 | } 252 | ], 253 | "source": [ 254 | "# 取得 row = 0 的元素\n", 255 | "'''\n", 256 | "x[0] 的內容: [[1,2], [3,4]]\n", 257 | "\n", 258 | "以下方式同樣效果\n", 259 | "x[0,]\n", 260 | "x[0,:]\n", 261 | "'''\n", 262 | "x[0]" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 12, 268 | "id": "2a89443b", 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "data": { 273 | "text/plain": [ 274 | "array([1, 3])" 275 | ] 276 | }, 277 | "execution_count": 12, 278 | "metadata": {}, 279 | "output_type": "execute_result" 280 | } 281 | ], 282 | "source": [ 283 | "# 取得 column = 0 的元素\n", 284 | "'''\n", 285 | "x[0] 的內容: [[1,2], [3,4]]\n", 286 | "\n", 287 | "以下方式同樣效果\n", 288 | "x[:,0]\n", 289 | "'''\n", 290 | "x[:,0]" 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "id": "06415ddf", 296 | "metadata": {}, 297 | "source": [ 298 | "# 切片 (Slicing)" 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "id": "9b2f1e3a", 304 | "metadata": {}, 305 | "source": [ 306 | "在這裡使用 xx = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 13, 312 | "id": "8c451745", 313 | "metadata": {}, 314 | "outputs": [], 315 | "source": [ 316 | "xx = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 14, 322 | "id": "ac2bc558", 323 | "metadata": {}, 324 | "outputs": [ 325 | { 326 | "data": { 327 | "text/plain": [ 328 | "array([1, 2, 3])" 329 | ] 330 | }, 331 | "execution_count": 14, 332 | "metadata": {}, 333 | "output_type": "execute_result" 334 | } 335 | ], 336 | "source": [ 337 | "# 取得 row = 0 的前 3 個元素\n", 338 | "xx[0, :3]" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 15, 344 | "id": "7861cbc2", 345 | "metadata": {}, 346 | "outputs": [ 347 | { 348 | "data": { 349 | "text/plain": [ 350 | "array([[3, 4],\n", 351 | " [7, 8]])" 352 | ] 353 | }, 354 | "execution_count": 15, 355 | "metadata": {}, 356 | "output_type": "execute_result" 357 | } 358 | ], 359 | "source": [ 360 | "# 取得 row=0:2, column=2:4\n", 361 | "'''\n", 362 | "實際上是 row 0 ~ row 1,以及 column 2 和 column 3 的資料\n", 363 | "'''\n", 364 | "xx[0:2, 2:4]" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 16, 370 | "id": "7997a99c", 371 | "metadata": {}, 372 | "outputs": [ 373 | { 374 | "data": { 375 | "text/plain": [ 376 | "array([[1, 2, 3, 4],\n", 377 | " [5, 6, 7, 8]])" 378 | ] 379 | }, 380 | "execution_count": 16, 381 | "metadata": {}, 382 | "output_type": "execute_result" 383 | } 384 | ], 385 | "source": [ 386 | "# 取得前 2 個 row 的元素\n", 387 | "xx[:2]" 388 | ] 389 | }, 390 | { 391 | "cell_type": "code", 392 | "execution_count": 17, 393 | "id": "ff019b0a", 394 | "metadata": {}, 395 | "outputs": [ 396 | { 397 | "data": { 398 | "text/plain": [ 399 | "array([[ 5, 6, 7, 8],\n", 400 | " [ 9, 10, 11, 12]])" 401 | ] 402 | }, 403 | "execution_count": 17, 404 | "metadata": {}, 405 | "output_type": "execute_result" 406 | } 407 | ], 408 | "source": [ 409 | "# 取得 row = 1 之後的元素\n", 410 | "xx[1:]" 411 | ] 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "id": "d355f3e1", 416 | "metadata": {}, 417 | "source": [ 418 | "# 更改陣列形狀 (即是改變陣列維度)" 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "execution_count": 18, 424 | "id": "32eab133", 425 | "metadata": {}, 426 | "outputs": [], 427 | "source": [ 428 | "# 建立一維陣列\n", 429 | "x1 = np.array([1, 2, 3, 4, 5, 6])" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 19, 435 | "id": "2e35e466", 436 | "metadata": {}, 437 | "outputs": [ 438 | { 439 | "data": { 440 | "text/plain": [ 441 | "array([[1, 2, 3],\n", 442 | " [4, 5, 6]])" 443 | ] 444 | }, 445 | "execution_count": 19, 446 | "metadata": {}, 447 | "output_type": "execute_result" 448 | } 449 | ], 450 | "source": [ 451 | "# 轉成 2x3 陣列 (2 列 3 行,2 rows,3 columns)\n", 452 | "# 註: 「不會」修改原始陣列,「會」回傳轉換後的新陣列\n", 453 | "y1 = x1.reshape(2, 3); y1" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 20, 459 | "id": "38db8541", 460 | "metadata": {}, 461 | "outputs": [ 462 | { 463 | "data": { 464 | "text/plain": [ 465 | "array([1, 2, 3, 4, 5, 6])" 466 | ] 467 | }, 468 | "execution_count": 20, 469 | "metadata": {}, 470 | "output_type": "execute_result" 471 | } 472 | ], 473 | "source": [ 474 | "# 將 2x3 陣列轉成一維陣列\n", 475 | "'''\n", 476 | "numpy.ravel(a, order='C')\n", 477 | "'''\n", 478 | "x2 = y1.ravel(); x2" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": 21, 484 | "id": "dfb707ac", 485 | "metadata": {}, 486 | "outputs": [ 487 | { 488 | "data": { 489 | "text/plain": [ 490 | "array([[1, 2],\n", 491 | " [3, 4],\n", 492 | " [5, 6]])" 493 | ] 494 | }, 495 | "execution_count": 21, 496 | "metadata": {}, 497 | "output_type": "execute_result" 498 | } 499 | ], 500 | "source": [ 501 | "# 將 2x3 陣列,改成 3x2 陣列\n", 502 | "# 註: 「會」修改原始陣列,「不會」回傳轉換後的新陣列\n", 503 | "y1.resize(3,2); y1" 504 | ] 505 | }, 506 | { 507 | "cell_type": "markdown", 508 | "id": "1462c324", 509 | "metadata": {}, 510 | "source": [ 511 | "# 轉置矩陣 transpose()" 512 | ] 513 | }, 514 | { 515 | "cell_type": "markdown", 516 | "id": "92f85206", 517 | "metadata": {}, 518 | "source": [ 519 | "把 n x m 的矩陣,變成 m x n 的矩陣" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": 22, 525 | "id": "96d90580", 526 | "metadata": {}, 527 | "outputs": [ 528 | { 529 | "data": { 530 | "text/plain": [ 531 | "array([[0, 2, 4, 6],\n", 532 | " [1, 3, 5, 7]])" 533 | ] 534 | }, 535 | "execution_count": 22, 536 | "metadata": {}, 537 | "output_type": "execute_result" 538 | } 539 | ], 540 | "source": [ 541 | "# 生成一維陣列 (0 - 7),reshape 成 4 列 2 行後,再 transpose 成為 2 列 4 行\n", 542 | "a = np.arange(8).reshape(4, 2); a\n", 543 | "'''\n", 544 | "reshape 後的結果:\n", 545 | "[[0, 1],\n", 546 | " [2, 3],\n", 547 | " [4, 5],\n", 548 | " [6, 7]]\n", 549 | "'''\n", 550 | "b = a.transpose(); b" 551 | ] 552 | }, 553 | { 554 | "cell_type": "code", 555 | "execution_count": 23, 556 | "id": "41299890", 557 | "metadata": {}, 558 | "outputs": [ 559 | { 560 | "data": { 561 | "text/plain": [ 562 | "array([[0, 2, 4, 6],\n", 563 | " [1, 3, 5, 7]])" 564 | ] 565 | }, 566 | "execution_count": 23, 567 | "metadata": {}, 568 | "output_type": "execute_result" 569 | } 570 | ], 571 | "source": [ 572 | "# 也可以使用 T 來進行 transpose\n", 573 | "a = np.arange(8).reshape(4, 2); a\n", 574 | "b = a.T; b" 575 | ] 576 | }, 577 | { 578 | "cell_type": "markdown", 579 | "id": "bd7a0460", 580 | "metadata": {}, 581 | "source": [ 582 | "# 點積 dot()" 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": 30, 588 | "id": "ee6fa276", 589 | "metadata": {}, 590 | "outputs": [ 591 | { 592 | "data": { 593 | "text/plain": [ 594 | "408" 595 | ] 596 | }, 597 | "execution_count": 30, 598 | "metadata": {}, 599 | "output_type": "execute_result" 600 | } 601 | ], 602 | "source": [ 603 | "# 如果是一維,計算結果等同於 inner product \n", 604 | "# = np.inner(a, b)\n", 605 | "# = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3] + a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7]\n", 606 | "# = 1*2 + 2*4 + 3*6 + 4*8 + 5*10 + 6*12 + 7*14 + 8*16\n", 607 | "# = 2 + 8 + 18 + 32 + 50 + 72 + 98 + 128\n", 608 | "# = 408\n", 609 | "a = np.array([1, 2, 3, 4, 5, 6, 7, 8])\n", 610 | "b = np.array([2, 4, 6, 8, 10, 12, 14, 16])\n", 611 | "c = np.dot(a, b); c" 612 | ] 613 | }, 614 | { 615 | "cell_type": "code", 616 | "execution_count": 31, 617 | "id": "99673b42", 618 | "metadata": {}, 619 | "outputs": [ 620 | { 621 | "data": { 622 | "text/plain": [ 623 | "array([[19, 22],\n", 624 | " [43, 50]])" 625 | ] 626 | }, 627 | "execution_count": 31, 628 | "metadata": {}, 629 | "output_type": "execute_result" 630 | } 631 | ], 632 | "source": [ 633 | "# 如果是二維,計算結果等同於 matrix multiplication\n", 634 | "a = np.array([[1, 2], [3, 4]])\n", 635 | "b = np.array([[5, 6], [7, 8]])\n", 636 | "c = np.dot(a, b); c\n" 637 | ] 638 | }, 639 | { 640 | "cell_type": "markdown", 641 | "id": "3d3e3504", 642 | "metadata": {}, 643 | "source": [ 644 | "# 矩陣相乘 matmul()" 645 | ] 646 | }, 647 | { 648 | "cell_type": "code", 649 | "execution_count": 29, 650 | "id": "a12421e2", 651 | "metadata": {}, 652 | "outputs": [ 653 | { 654 | "data": { 655 | "text/plain": [ 656 | "array([[ 76, 82],\n", 657 | " [184, 199],\n", 658 | " [292, 316]])" 659 | ] 660 | }, 661 | "execution_count": 29, 662 | "metadata": {}, 663 | "output_type": "execute_result" 664 | } 665 | ], 666 | "source": [ 667 | "# 典型的矩陣乘法\n", 668 | "a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])\n", 669 | "b = np.array([[10, 11], [12, 13], [14, 15]])\n", 670 | "c = np.matmul(a, b); c" 671 | ] 672 | }, 673 | { 674 | "cell_type": "markdown", 675 | "id": "1e65463d", 676 | "metadata": {}, 677 | "source": [ 678 | "# 對應矩陣相乘 multiply()" 679 | ] 680 | }, 681 | { 682 | "cell_type": "code", 683 | "execution_count": 33, 684 | "id": "13650d19", 685 | "metadata": {}, 686 | "outputs": [ 687 | { 688 | "data": { 689 | "text/plain": [ 690 | "array([ 4, 10, 18])" 691 | ] 692 | }, 693 | "execution_count": 33, 694 | "metadata": {}, 695 | "output_type": "execute_result" 696 | } 697 | ], 698 | "source": [ 699 | "# 如果是一維,計算結果等同於 element-wise product\n", 700 | "a = np.array([1, 2, 3])\n", 701 | "b = np.array([4, 5, 6])\n", 702 | "c = np.multiply(a, b); c" 703 | ] 704 | }, 705 | { 706 | "cell_type": "code", 707 | "execution_count": 34, 708 | "id": "0835e290", 709 | "metadata": {}, 710 | "outputs": [ 711 | { 712 | "data": { 713 | "text/plain": [ 714 | "array([[ 5, 12],\n", 715 | " [21, 32]])" 716 | ] 717 | }, 718 | "execution_count": 34, 719 | "metadata": {}, 720 | "output_type": "execute_result" 721 | } 722 | ], 723 | "source": [ 724 | "# 如果是二維,計算結果等同於 element-wise product\n", 725 | "a = np.array([[1, 2], [3, 4]])\n", 726 | "b = np.array([[5, 6], [7, 8]])\n", 727 | "c = np.multiply(a, b); c" 728 | ] 729 | }, 730 | { 731 | "cell_type": "markdown", 732 | "id": "32b5b415", 733 | "metadata": {}, 734 | "source": [ 735 | "[【一些遇到就懵的概念】point-wise, pair-wise, element-wise 是什么](https://blog.csdn.net/weixin_43900869/article/details/131490669)" 736 | ] 737 | }, 738 | { 739 | "cell_type": "markdown", 740 | "id": "8f04c483", 741 | "metadata": {}, 742 | "source": [ 743 | "# 廣播 (broadcast)" 744 | ] 745 | }, 746 | { 747 | "cell_type": "markdown", 748 | "id": "54656c16", 749 | "metadata": {}, 750 | "source": [ 751 | "執行 2 個陣列運算時,原則上必須外形相同才能運算,如果不同,可以使用廣播 (broadcasting) 機制,將小陣列「擴大」到兩個陣列都相同,之後再進行運算。\n", 752 | "\n", 753 | "![Computation on Arrays: Broadcasting](https://i.imgur.com/CuB6Bsh.png)\n", 754 | "參考資料: [Computation on Arrays: Broadcasting](https://jakevdp.github.io/PythonDataScienceHandbook/02.05-computation-on-arrays-broadcasting.html)" 755 | ] 756 | }, 757 | { 758 | "cell_type": "code", 759 | "execution_count": 25, 760 | "id": "b6e059bd", 761 | "metadata": {}, 762 | "outputs": [ 763 | { 764 | "data": { 765 | "text/plain": [ 766 | "array([5, 6, 7])" 767 | ] 768 | }, 769 | "execution_count": 25, 770 | "metadata": {}, 771 | "output_type": "execute_result" 772 | } 773 | ], 774 | "source": [ 775 | "# np.arange(3) + 5 是否等於 [5,6,7] ?\n", 776 | "a = np.arange(3) + 5; a" 777 | ] 778 | }, 779 | { 780 | "cell_type": "code", 781 | "execution_count": 26, 782 | "id": "3b7fa0ae", 783 | "metadata": {}, 784 | "outputs": [ 785 | { 786 | "data": { 787 | "text/plain": [ 788 | "array([[1., 2., 3.],\n", 789 | " [1., 2., 3.],\n", 790 | " [1., 2., 3.]])" 791 | ] 792 | }, 793 | "execution_count": 26, 794 | "metadata": {}, 795 | "output_type": "execute_result" 796 | } 797 | ], 798 | "source": [ 799 | "# np.ones((3, 3)) + np.arange(3) 是否等於 [[1,2,3],[1,2,3],[1,2,3]] ?\n", 800 | "a = np.ones((3, 3)) + np.arange(3); a" 801 | ] 802 | }, 803 | { 804 | "cell_type": "code", 805 | "execution_count": 27, 806 | "id": "8fcdd327", 807 | "metadata": {}, 808 | "outputs": [ 809 | { 810 | "data": { 811 | "text/plain": [ 812 | "array([[0, 1, 2],\n", 813 | " [1, 2, 3],\n", 814 | " [2, 3, 4]])" 815 | ] 816 | }, 817 | "execution_count": 27, 818 | "metadata": {}, 819 | "output_type": "execute_result" 820 | } 821 | ], 822 | "source": [ 823 | "# np.arange(3).reshape((3,1)) + np.arange(3) 是否等於 [[0,1,2],[1,2,3],[2,3,4]] ?\n", 824 | "a = np.arange(3).reshape((3,1)) + np.arange(3); a" 825 | ] 826 | } 827 | ], 828 | "metadata": { 829 | "kernelspec": { 830 | "display_name": "test", 831 | "language": "python", 832 | "name": "python3" 833 | }, 834 | "language_info": { 835 | "codemirror_mode": { 836 | "name": "ipython", 837 | "version": 3 838 | }, 839 | "file_extension": ".py", 840 | "mimetype": "text/x-python", 841 | "name": "python", 842 | "nbconvert_exporter": "python", 843 | "pygments_lexer": "ipython3", 844 | "version": "3.11.11" 845 | } 846 | }, 847 | "nbformat": 4, 848 | "nbformat_minor": 5 849 | } 850 | -------------------------------------------------------------------------------- /cases/mysql.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "6f9c10d9", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "!pip install pymysql SQLAlchemy" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "e5b252d8", 16 | "metadata": {}, 17 | "source": [ 18 | "## 在資料庫的交易中,為確保交易(Transaction)是正確可靠的,所以必須具備四個特性:\n", 19 | "- **Atomicity 原子性**\n", 20 | " - 在資料庫的每一筆交易中只有兩種可能發生,第一種是全部完全(commit),第二種是全部不完成(rollback),不會因為某個環節出錯,而終止在那個環節,在出錯之後會恢復至交易之前的狀態,如同還沒執行此筆交易。\n", 21 | "- **Consistency 一致性**\n", 22 | " - 在交易中會產生資料或者驗證狀態,然而當錯誤發生,所有已更改的資料或狀態將會恢復至交易之前。\n", 23 | "- **Isolation 隔離性**\n", 24 | " - 資料庫允許多筆交易同時進行,交易進行時未完成的交易資料並不會被其他交易使用,直到此筆交易完成。\n", 25 | "- **Durability 永續性**\n", 26 | " - 交易完成後對資料的修改是永久性的,資料不會因為系統重啟或錯誤而改變。" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "id": "e18723b0", 32 | "metadata": {}, 33 | "source": [ 34 | "![交易過程](https://i.imgur.com/r29XFgO.png \"交易過程\")" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "id": "ae333e11", 40 | "metadata": {}, 41 | "source": [ 42 | "# PyMySQL 使用方式" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 1, 48 | "id": "82561ad3", 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "name": "stdout", 53 | "output_type": "stream", 54 | "text": [ 55 | "{'sId': '003', 'sName': '王○○', 'sGender': '男', 'sNickname': '小王'}\n", 56 | "{'sId': '004', 'sName': '江○○', 'sGender': '女', 'sNickname': '小江'}\n", 57 | "{'sId': '005', 'sName': '周○○', 'sGender': '女', 'sNickname': '小周'}\n", 58 | "{'sId': '006', 'sName': '黃○○', 'sGender': '男', 'sNickname': '小黃'}\n", 59 | "{'sId': '007', 'sName': '丁○○', 'sGender': '男', 'sNickname': '小丁'}\n", 60 | "{'sId': '008', 'sName': '鄭○○', 'sGender': '男', 'sNickname': '小鄭'}\n", 61 | "{'sId': '087', 'sName': '楊○○', 'sGender': '男', 'sNickname': '好人'}\n", 62 | "{'sId': '088', 'sName': '陳○○', 'sGender': '女', 'sNickname': '小白'}\n" 63 | ] 64 | } 65 | ], 66 | "source": [ 67 | "'''\n", 68 | "參考頁面:\n", 69 | "[1] PyMySQL Examples\n", 70 | "https://pymysql.readthedocs.io/en/latest/user/examples.html\n", 71 | "[2] Python+MySQL資料庫操作(PyMySQL)\n", 72 | "https://www.tw511.com/3/39/1388.html\n", 73 | "[3] Python資料庫學習筆記(四):使用PyMySQL模組\n", 74 | "https://reurl.cc/Q78eD2\n", 75 | "'''\n", 76 | "\n", 77 | "import pymysql\n", 78 | "\n", 79 | "# 資料庫連線\n", 80 | "connection = pymysql.connect(\n", 81 | " host = 'localhost',\n", 82 | " user = 'root',\n", 83 | " password = 'root',\n", 84 | " database = 'my_db',\n", 85 | " charset = 'utf8mb4',\n", 86 | " cursorclass=pymysql.cursors.DictCursor # 不加這個,預設以 tuple 回傳\n", 87 | ")\n", 88 | "\n", 89 | "# 取得 cursor 物件,進行 CRUD\n", 90 | "cursor = connection.cursor()\n", 91 | "\n", 92 | "try:\n", 93 | " # 寫入資料\n", 94 | " # sql = \"INSERT INTO `users` (`email`, `password`) VALUES (%s, %s)\"\n", 95 | " # cursor.execute(sql, ('你的email', '你的密碼'))\n", 96 | "\n", 97 | " # 查詢資料\n", 98 | " sql = \"SELECT * FROM `students`\"\n", 99 | " cursor.execute(sql)\n", 100 | "\n", 101 | " # 查詢結果列數大於0 ,代表有資料\n", 102 | " if cursor.rowcount > 0:\n", 103 | " # 將查詢結果轉成 list 型態 (裡頭元素都是 dict)\n", 104 | " results = cursor.fetchall() # 如果 sql 語法明顯只取得一筆,則使用 fetchone()\n", 105 | " '''\n", 106 | " [\n", 107 | " {'sId': '003', 'sName': '王○○', 'sGender': '男', 'sNickname': '小王'},\n", 108 | " {'sId': '004', 'sName': '江○○', 'sGender': '女', 'sNickname': '小江'},\n", 109 | " {'sId': '005', 'sName': '周○○', 'sGender': '女', 'sNickname': '小周'},\n", 110 | " {'sId': '006', 'sName': '黃○○', 'sGender': '男', 'sNickname': '小黃'},\n", 111 | " {'sId': '007', 'sName': '丁○○', 'sGender': '男', 'sNickname': '小丁'},\n", 112 | " {'sId': '008', 'sName': '鄭○○', 'sGender': '男', 'sNickname': '小鄭'},\n", 113 | " {'sId': '087', 'sName': '楊○○', 'sGender': '男', 'sNickname': '好人'},\n", 114 | " {'sId': '088', 'sName': '陳○○', 'sGender': '女', 'sNickname': '小白'}\n", 115 | " ]\n", 116 | " '''\n", 117 | " \n", 118 | " # 迭代取得資料 (dict 型態)\n", 119 | " for result in results:\n", 120 | " print(result)\n", 121 | " else:\n", 122 | " print(\"rowcount: 0\")\n", 123 | "\n", 124 | " # 提交 SQL 執行結果\n", 125 | " connection.commit()\n", 126 | "except Exception as e:\n", 127 | " # 回滾\n", 128 | " connection.rollback()\n", 129 | " print(\"SQL 執行失敗\")\n", 130 | " print(e)\n", 131 | "\n", 132 | "# 釋放 cursor\n", 133 | "cursor.close()\n", 134 | "\n", 135 | "# 關閉資料庫連線\n", 136 | "connection.close()" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "id": "05e7cc13", 142 | "metadata": {}, 143 | "source": [ 144 | "# MySQL 整合 Pandas" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 2, 150 | "id": "d7d896a5", 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "data": { 155 | "text/html": [ 156 | "
\n", 157 | "\n", 170 | "\n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | "
sIdsNamesGendersNickname
0003王○○小王
1004江○○小江
2005周○○小周
3006黃○○小黃
4007丁○○小丁
5008鄭○○小鄭
6087楊○○好人
7088陳○○小白
\n", 239 | "
" 240 | ], 241 | "text/plain": [ 242 | " sId sName sGender sNickname\n", 243 | "0 003 王○○ 男 小王\n", 244 | "1 004 江○○ 女 小江\n", 245 | "2 005 周○○ 女 小周\n", 246 | "3 006 黃○○ 男 小黃\n", 247 | "4 007 丁○○ 男 小丁\n", 248 | "5 008 鄭○○ 男 小鄭\n", 249 | "6 087 楊○○ 男 好人\n", 250 | "7 088 陳○○ 女 小白" 251 | ] 252 | }, 253 | "execution_count": 2, 254 | "metadata": {}, 255 | "output_type": "execute_result" 256 | } 257 | ], 258 | "source": [ 259 | "# 在這裡會同時使用到 pymysql 和 sqlalchemy\n", 260 | "import pymysql\n", 261 | "import pandas as pd\n", 262 | "from sqlalchemy import create_engine\n", 263 | "\n", 264 | "# 資料庫基本設定\n", 265 | "host = 'localhost'\n", 266 | "user = 'root'\n", 267 | "password = 'root'\n", 268 | "database = 'my_db'\n", 269 | "charset = 'utf8mb4'\n", 270 | "\n", 271 | "# 建立 sqlalchemy engine\n", 272 | "engine = create_engine(f\"mysql+pymysql://{user}:{password}@{host}/{database}?charset={charset}\")\n", 273 | "\n", 274 | "# SQL 語法 (可以撰寫多行的 SQL)\n", 275 | "sql = '''SELECT * FROM `my_db`.`students`;'''\n", 276 | "\n", 277 | "# 整合 PyMySQL 連線,透過 Pandas 取得 SQL 查詢後的資料,變成 dataframe\n", 278 | "df = pd.read_sql(sql, con=engine); df" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": 3, 284 | "id": "b92f5afe", 285 | "metadata": {}, 286 | "outputs": [ 287 | { 288 | "data": { 289 | "image/png": "", 290 | "text/plain": [ 291 | "
" 292 | ] 293 | }, 294 | "metadata": {}, 295 | "output_type": "display_data" 296 | } 297 | ], 298 | "source": [ 299 | "# 匯入套件(模組)\n", 300 | "import matplotlib.pyplot as plt\n", 301 | "import numpy as np\n", 302 | "\n", 303 | "# group by 以後,進行查詢,並與 matplotlib 整合\n", 304 | "sql = '''\n", 305 | "SELECT `cId`, AVG(`score`) AS `avg_score`\n", 306 | "FROM `scores`\n", 307 | "GROUP BY `cId`\n", 308 | "'''\n", 309 | "df = pd.read_sql(sql, con=engine); df\n", 310 | "\n", 311 | "# 繪製長條圖\n", 312 | "plt.bar(df['cId'], df['avg_score'], width=0.5)\n", 313 | "\n", 314 | "# 設定數值文字在圖表上\n", 315 | "for i in df.index:\n", 316 | " plt.text(i, df['avg_score'][i], df['avg_score'][i])\n", 317 | "\n", 318 | "# 顯示圖表\n", 319 | "plt.show()" 320 | ] 321 | } 322 | ], 323 | "metadata": { 324 | "kernelspec": { 325 | "display_name": "Python 3 (ipykernel)", 326 | "language": "python", 327 | "name": "python3" 328 | }, 329 | "language_info": { 330 | "codemirror_mode": { 331 | "name": "ipython", 332 | "version": 3 333 | }, 334 | "file_extension": ".py", 335 | "mimetype": "text/x-python", 336 | "name": "python", 337 | "nbconvert_exporter": "python", 338 | "pygments_lexer": "ipython3", 339 | "version": "3.10.10" 340 | } 341 | }, 342 | "nbformat": 4, 343 | "nbformat_minor": 5 344 | } 345 | -------------------------------------------------------------------------------- /3-1 Pandas 檔案輸入與輸出.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "868197df", 6 | "metadata": {}, 7 | "source": [ 8 | "# 有關檔案編碼的部分,請參考這個連結\n", 9 | "[standard-encodings](https://docs.python.org/3/library/codecs.html#standard-encodings)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "id": "da4e9e76", 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "# 安裝套件\n", 20 | "!pip install pandas openpyxl" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 1, 26 | "id": "0aa8cc86", 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "# 匯入套件\n", 31 | "import pandas as pd" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "id": "ba1bde58", 37 | "metadata": {}, 38 | "source": [ 39 | "# 讀寫 csv 檔\n", 40 | "- CSV: Comma-Separated Values\n", 41 | " - pd.read_csv(): 讀取 csv 檔\n", 42 | " - df.to_csv(): 寫入 csv 檔\n", 43 | "- 請另存下載 [csv檔: 垃圾車點位路線資訊](https://data.taipei/api/frontstage/tpeod/dataset/resource.download?rid=a6e90031-7ec4-4089-afb5-361a4efe7202) 到專案目錄當中。" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 2, 49 | "id": "773ed123", 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/html": [ 55 | "
\n", 56 | "\n", 69 | "\n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | "
行政區里別分隊局編車號路線車次抵達時間離開時間地點緯度經度
0中山區力行里長安分隊100-021119-BQ長安-3第1車16301638臺北市中山區建國北路一段69號前121.53694425.051111
1中山區力行里長安分隊100-021119-BQ長安-3第1車16401649臺北市中山區南京東路三段176號前(遼寧街口)121.54222225.051944
2中山區力行里長安分隊100-021119-BQ長安-3第1車16501658臺北市中山區南京東路三段214號前121.54338525.051725
3中山區力行里長安分隊100-021119-BQ長安-3第1車17001709臺北市中山區復興北路66號121.54385325.050820
4中山區力行里長安分隊100-021119-BQ長安-3第1車17101716臺北市中山區復興北路28號前121.54381325.048861
.......................................
4043萬華區新起里漢中分隊110-G01KEJ-0038漢中-1第3車21412145臺北市萬華區內江街110號前121.50343025.041577
4044萬華區新起里漢中分隊110-G01KEJ-0038漢中-1第3車21462150臺北市萬華區內江街74之1號前121.50418525.041492
4045萬華區西門里漢中分隊110-G01KEJ-0038漢中-1第3車21512155臺北市萬華區昆明街160號前121.50460725.042125
4046萬華區西門里漢中分隊110-G01KEJ-0038漢中-1第3車21562200臺北市萬華區昆明街134之2號前121.50472425.042720
4047萬華區西門里漢中分隊110-G01KEJ-0038漢中-1第3車22012205臺北市萬華區成都路105號前121.50420725.043244
\n", 255 | "

4048 rows × 12 columns

\n", 256 | "
" 257 | ], 258 | "text/plain": [ 259 | " 行政區 里別 分隊 局編 車號 路線 車次 抵達時間 離開時間 \n", 260 | "0 中山區 力行里 長安分隊 100-021 119-BQ 長安-3 第1車 1630 1638 \\\n", 261 | "1 中山區 力行里 長安分隊 100-021 119-BQ 長安-3 第1車 1640 1649 \n", 262 | "2 中山區 力行里 長安分隊 100-021 119-BQ 長安-3 第1車 1650 1658 \n", 263 | "3 中山區 力行里 長安分隊 100-021 119-BQ 長安-3 第1車 1700 1709 \n", 264 | "4 中山區 力行里 長安分隊 100-021 119-BQ 長安-3 第1車 1710 1716 \n", 265 | "... ... ... ... ... ... ... ... ... ... \n", 266 | "4043 萬華區 新起里 漢中分隊 110-G01 KEJ-0038 漢中-1 第3車 2141 2145 \n", 267 | "4044 萬華區 新起里 漢中分隊 110-G01 KEJ-0038 漢中-1 第3車 2146 2150 \n", 268 | "4045 萬華區 西門里 漢中分隊 110-G01 KEJ-0038 漢中-1 第3車 2151 2155 \n", 269 | "4046 萬華區 西門里 漢中分隊 110-G01 KEJ-0038 漢中-1 第3車 2156 2200 \n", 270 | "4047 萬華區 西門里 漢中分隊 110-G01 KEJ-0038 漢中-1 第3車 2201 2205 \n", 271 | "\n", 272 | " 地點 緯度 經度 \n", 273 | "0 臺北市中山區建國北路一段69號前 121.536944 25.051111 \n", 274 | "1 臺北市中山區南京東路三段176號前(遼寧街口) 121.542222 25.051944 \n", 275 | "2 臺北市中山區南京東路三段214號前 121.543385 25.051725 \n", 276 | "3 臺北市中山區復興北路66號 121.543853 25.050820 \n", 277 | "4 臺北市中山區復興北路28號前 121.543813 25.048861 \n", 278 | "... ... ... ... \n", 279 | "4043 臺北市萬華區內江街110號前 121.503430 25.041577 \n", 280 | "4044 臺北市萬華區內江街74之1號前 121.504185 25.041492 \n", 281 | "4045 臺北市萬華區昆明街160號前 121.504607 25.042125 \n", 282 | "4046 臺北市萬華區昆明街134之2號前 121.504724 25.042720 \n", 283 | "4047 臺北市萬華區成都路105號前 121.504207 25.043244 \n", 284 | "\n", 285 | "[4048 rows x 12 columns]" 286 | ] 287 | }, 288 | "execution_count": 2, 289 | "metadata": {}, 290 | "output_type": "execute_result" 291 | } 292 | ], 293 | "source": [ 294 | "# 讀取 csv 格式檔案\n", 295 | "'''\n", 296 | "pd.read_csv(\n", 297 | " filepath_or_buffer=\"你的檔案路徑\", \n", 298 | " sep=\"分隔符號\", \n", 299 | " header=True或False, \n", 300 | " index=True或False, \n", 301 | " encoding=None, \n", 302 | " ...\n", 303 | ")\n", 304 | "\n", 305 | "說明:\n", 306 | "- 引數 filepath_or_buffer不一定要寫: \n", 307 | " - df = pd.read_csv(\"./output.csv\")\n", 308 | "- 近年許多檔案或資料,都使用 UTF-8 進行編碼\n", 309 | " - 一般來說,只要是 utf-8 編碼格式儲存的檔案,讀取成 dataframe 時候,不用特別指定編碼。\n", 310 | " - 然而本例使用的 csv 檔,需要用 ANSI 編碼來開啟。\n", 311 | "'''\n", 312 | "df = pd.read_csv(\"./files/垃圾車點位資訊.csv\", encoding=\"big5\"); df" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 3, 318 | "id": "12b22911", 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [ 322 | "# 寫入 csv 檔案\n", 323 | "'''\n", 324 | "預設是 row number 和 column name 都包括進去,不習慣加進 csv 檔的話,可以這樣寫:\n", 325 | "df.to_csv(path_or_buf=\"./output.csv\", index=False, header=False)\n", 326 | "\n", 327 | "另外,引數 path_or_buf 不一定要寫:\n", 328 | "df.to_csv(\"./output.csv\", index=False, header=False)\n", 329 | "'''\n", 330 | "df.to_csv(\"./output.csv\", index=False, encoding=\"big5\")" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "id": "51ac8806", 336 | "metadata": {}, 337 | "source": [ 338 | "# 讀寫 json 檔\n", 339 | "- JSON(JavaScript Object Notation)\n", 340 | " - pd.read_json(): 讀取 json 檔\n", 341 | " - df.to_json(): 寫入 json 檔\n", 342 | "- 使用 [Cafe Nomad](https://cafenomad.tw/developers/docs/v1.2) 的資料來測試。\n", 343 | " - 請另存下載: [https://cafenomad.tw/api/v1.2/cafes/taipei](https://cafenomad.tw/api/v1.2/cafes/taipei)" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 4, 349 | "id": "e1e7bd3c", 350 | "metadata": {}, 351 | "outputs": [ 352 | { 353 | "data": { 354 | "text/html": [ 355 | "
\n", 356 | "\n", 369 | "\n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | "
idnamecitywifiseatquiettastycheapmusicurladdresslatitudelongitudelimited_timesocketstanding_deskmrtopen_time
0000703fe-cf8a-43c8-bd83-c90cfd61915f蜂巢咖啡taipei0.04.53.05.04.04.0https://www.facebook.com/honeycombcafe2016/新北市永和區永貞路214號25.004097121.515287nonoyes永安市場站雙週四公休,有變更另外公告
10022fc3b-598f-4bb5-bb69-1b7d1b9b5202(暫停營業) The Kaffa Loverstaipei5.05.05.05.04.05.0https://www.facebook.com/thekaffalovers/?fref=ts台北市中正區金山北路3號25.044354121.530456nomaybeyes忠孝新生週二至週五 10:00-19:00 六日09:00-19:00
2005a5ee9-7460-4f8a-b965-8b9565ee1e4e一杯咖啡士林店taipei0.00.00.00.00.00.0https://www.facebook.com/acupofcoffeeshilin/台北市士林區中山北路五段505巷36號25.093021121.526678noyesyes
300971781-cad6-4a6f-8a38-b597d8a1488bFull of Beans 充滿豆豆咖啡館taipei0.00.00.00.00.00.0https://www.facebook.com/FOBcafe.gallery/台北市中山區龍江路21巷9-6號25.047838121.541138
400b6ce2b-d92e-44e5-b1cb-daaa925a5135良食究好市集餐廳WONMItaipei5.03.03.05.00.05.0http://www.wonmi.com.tw/home.php台北市八德路四段138號 (京華城 Living Mall 10 F)25.048354121.562300
.........................................................
1652ff53abc4-7a89-43db-8d0b-43e5b00dff5f喜穗咖啡蘭花生活館(永久停業)taipei0.00.00.00.00.00.0242新北市新莊區中原東路206號25.054470121.460515maybeno
1653ff582eb9-3f83-4f65-98b2-c38a4ef511db自然產taipei3.05.04.04.03.03.0https://www.facebook.com/greenday209/新北市板橋區重慶路209號25.001575121.462947maybeyesno
1654ff973c43-9474-46dc-bb24-0d1947143ba1金獅取水taipei0.00.00.00.00.00.0https://www.facebook.com/goldenlionkafe台北市中山區民生東路2段151號25.058156121.535403maybeyesyes
1655ffd780fe-9d33-4a4b-90dd-74d7d9de31f7The Tuckaway Taipeitaipei5.05.05.05.05.04.0IG FB tuckawaytaipei台北市文山區木柵路三段48巷1弄6號1樓24.987979121.565233nomaybeno
1656fff8267f-3235-4eb5-95c1-36015a59b229豆工場精品咖啡館taipei4.04.04.04.04.04.0新北市中和區中和路400巷31號25.002081121.512065maybeyesyes永安市場
\n", 627 | "

1657 rows × 18 columns

\n", 628 | "
" 629 | ], 630 | "text/plain": [ 631 | " id name city \n", 632 | "0 000703fe-cf8a-43c8-bd83-c90cfd61915f 蜂巢咖啡 taipei \\\n", 633 | "1 0022fc3b-598f-4bb5-bb69-1b7d1b9b5202 (暫停營業) The Kaffa Lovers taipei \n", 634 | "2 005a5ee9-7460-4f8a-b965-8b9565ee1e4e 一杯咖啡士林店 taipei \n", 635 | "3 00971781-cad6-4a6f-8a38-b597d8a1488b Full of Beans 充滿豆豆咖啡館 taipei \n", 636 | "4 00b6ce2b-d92e-44e5-b1cb-daaa925a5135 良食究好市集餐廳WONMI taipei \n", 637 | "... ... ... ... \n", 638 | "1652 ff53abc4-7a89-43db-8d0b-43e5b00dff5f 喜穗咖啡蘭花生活館(永久停業) taipei \n", 639 | "1653 ff582eb9-3f83-4f65-98b2-c38a4ef511db 自然產 taipei \n", 640 | "1654 ff973c43-9474-46dc-bb24-0d1947143ba1 金獅取水 taipei \n", 641 | "1655 ffd780fe-9d33-4a4b-90dd-74d7d9de31f7 The Tuckaway Taipei taipei \n", 642 | "1656 fff8267f-3235-4eb5-95c1-36015a59b229 豆工場精品咖啡館 taipei \n", 643 | "\n", 644 | " wifi seat quiet tasty cheap music \n", 645 | "0 0.0 4.5 3.0 5.0 4.0 4.0 \\\n", 646 | "1 5.0 5.0 5.0 5.0 4.0 5.0 \n", 647 | "2 0.0 0.0 0.0 0.0 0.0 0.0 \n", 648 | "3 0.0 0.0 0.0 0.0 0.0 0.0 \n", 649 | "4 5.0 3.0 3.0 5.0 0.0 5.0 \n", 650 | "... ... ... ... ... ... ... \n", 651 | "1652 0.0 0.0 0.0 0.0 0.0 0.0 \n", 652 | "1653 3.0 5.0 4.0 4.0 3.0 3.0 \n", 653 | "1654 0.0 0.0 0.0 0.0 0.0 0.0 \n", 654 | "1655 5.0 5.0 5.0 5.0 5.0 4.0 \n", 655 | "1656 4.0 4.0 4.0 4.0 4.0 4.0 \n", 656 | "\n", 657 | " url \n", 658 | "0 https://www.facebook.com/honeycombcafe2016/ \\\n", 659 | "1 https://www.facebook.com/thekaffalovers/?fref=ts \n", 660 | "2 https://www.facebook.com/acupofcoffeeshilin/ \n", 661 | "3 https://www.facebook.com/FOBcafe.gallery/ \n", 662 | "4 http://www.wonmi.com.tw/home.php \n", 663 | "... ... \n", 664 | "1652 \n", 665 | "1653 https://www.facebook.com/greenday209/ \n", 666 | "1654 https://www.facebook.com/goldenlionkafe \n", 667 | "1655 IG FB tuckawaytaipei \n", 668 | "1656 \n", 669 | "\n", 670 | " address latitude longitude \n", 671 | "0 新北市永和區永貞路214號 25.004097 121.515287 \\\n", 672 | "1 台北市中正區金山北路3號 25.044354 121.530456 \n", 673 | "2 台北市士林區中山北路五段505巷36號 25.093021 121.526678 \n", 674 | "3 台北市中山區龍江路21巷9-6號 25.047838 121.541138 \n", 675 | "4 台北市八德路四段138號 (京華城 Living Mall 10 F) 25.048354 121.562300 \n", 676 | "... ... ... ... \n", 677 | "1652 242新北市新莊區中原東路206號 25.054470 121.460515 \n", 678 | "1653 新北市板橋區重慶路209號 25.001575 121.462947 \n", 679 | "1654 台北市中山區民生東路2段151號 25.058156 121.535403 \n", 680 | "1655 台北市文山區木柵路三段48巷1弄6號1樓 24.987979 121.565233 \n", 681 | "1656 新北市中和區中和路400巷31號 25.002081 121.512065 \n", 682 | "\n", 683 | " limited_time socket standing_desk mrt \n", 684 | "0 no no yes 永安市場站 \\\n", 685 | "1 no maybe yes 忠孝新生 \n", 686 | "2 no yes yes \n", 687 | "3 \n", 688 | "4 \n", 689 | "... ... ... ... ... \n", 690 | "1652 maybe no \n", 691 | "1653 maybe yes no \n", 692 | "1654 maybe yes yes \n", 693 | "1655 no maybe no \n", 694 | "1656 maybe yes yes 永安市場 \n", 695 | "\n", 696 | " open_time \n", 697 | "0 雙週四公休,有變更另外公告 \n", 698 | "1 週二至週五 10:00-19:00 六日09:00-19:00 \n", 699 | "2 \n", 700 | "3 \n", 701 | "4 \n", 702 | "... ... \n", 703 | "1652 \n", 704 | "1653 \n", 705 | "1654 \n", 706 | "1655 \n", 707 | "1656 \n", 708 | "\n", 709 | "[1657 rows x 18 columns]" 710 | ] 711 | }, 712 | "execution_count": 4, 713 | "metadata": {}, 714 | "output_type": "execute_result" 715 | } 716 | ], 717 | "source": [ 718 | "# 讀取 json 格式檔案\n", 719 | "'''\n", 720 | "參考連結:\n", 721 | "[1] pandas.read_json\n", 722 | "https://pandas.pydata.org/docs/reference/api/pandas.read_json.html\n", 723 | "[2] pandas.json_normalize\n", 724 | "https://pandas.pydata.org/docs/reference/api/pandas.json_normalize.html\n", 725 | "'''\n", 726 | "df = pd.read_json('./files/taipei.json'); df" 727 | ] 728 | }, 729 | { 730 | "cell_type": "code", 731 | "execution_count": 5, 732 | "id": "39dc90ce", 733 | "metadata": {}, 734 | "outputs": [], 735 | "source": [ 736 | "# 寫入 json 檔案\n", 737 | "'''\n", 738 | "參考連結:\n", 739 | "[1] pandas.DataFrame.to_json\n", 740 | "https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_json.html\n", 741 | "'''\n", 742 | "df.to_json(\n", 743 | " './output.json', \n", 744 | " indent=None, \n", 745 | " orient='records', \n", 746 | " lines=False, \n", 747 | " force_ascii=False\n", 748 | ")" 749 | ] 750 | }, 751 | { 752 | "cell_type": "markdown", 753 | "id": "7b1cbd38", 754 | "metadata": {}, 755 | "source": [ 756 | "# 讀寫 xml\n", 757 | "- eXtensible Markup Language\n", 758 | " - pd.read_xml(): 讀取 xml 檔\n", 759 | " - df.to_xml(): 寫入 xml 檔\n", 760 | "- 使用 [臺北市政府求職徵才職缺資訊](https://data.taipei/dataset/detail?id=f2f3f0d3-9e84-4fc5-af4d-5814563e17b3) 的資料來測試\n", 761 | " - 請另存下載: [https://dop.blob.core.windows.net/ipsnworkcontainer/jobs.xml](https://dop.blob.core.windows.net/ipsnworkcontainer/jobs.xml)" 762 | ] 763 | }, 764 | { 765 | "cell_type": "code", 766 | "execution_count": 6, 767 | "id": "6c8e2f2c", 768 | "metadata": { 769 | "scrolled": false 770 | }, 771 | "outputs": [ 772 | { 773 | "data": { 774 | "text/html": [ 775 | "
\n", 776 | "\n", 789 | "\n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | "
Nodept_codedept_nameJob_noModify_datetelleveljob_Kind_Nametitlepop_Kind_Name...apply_rulejob_descjob_location_Namejob_addressofficial_ruleis_dutySpecial_rulecontactFile_PathNote
01379590000E臺北市政府文化局62120112/04/06NaN280薪點,月支報酬新臺幣36,316元約僱科員約僱人員...一、中華民國國民並無雙重國籍者。<br>二、大專以上畢業者。<br>三、具土木、建築、營建工...一、古蹟、歷史建築等文化資產修復及文化設施之委託技術服務招標、工程招標、施工監督及驗收等相關...NaN臺北市政府文化局(11008 臺北市信義區市府路一號四樓)無需輪值歡迎身心障礙、歡迎原住民族報名應繳文件:\\n一、個人履歷表。\\n二、最高學歷畢業證書等資料影本(國內外學歷須經教育部認...NaNNaN
12379670000G臺北市政府產業發展局62119112/04/06NaN280薪點約僱辦事員約僱人員...1、國內外之公、私立大專以上學校畢業。<br>2、具中華民國國籍,無公務人員任用法第26、2...1.辦理工廠設立登記、動產擔保交易登記。<br>2.工廠管理輔導法相關法規與專案性質業務與查...NaN臺北市信義區市府路1號北區10樓無需輪值歡迎身心障礙、歡迎原住民族1、本職缺⽉薪280薪點,折合為新臺幣36,316元,尚需扣除勞、健保及勞退個⼈⾃付等費⽤。...NaNNaN
23379040300E臺北市立圖書館62118112/04/06NaN280薪點政風室約僱課員約僱人員...(一)國內外專科以上學校畢業者。<br> (二)高級中等學校畢業,並具有與擬任工作性質相當...(一)廉政宣導。<br>(二)安全維護業務。<br>(三)錄影監視系統管理。<br>(四)採...NaN臺北市立圖書館政風室(106210 臺北市大安區建國南路二段125號7樓)無需輪值歡迎身心障礙、歡迎原住民族(一)意者請檢具履歷、自傳及相關學歷畢業證書影本,如具政風、工程或建築管理及與本職務相關之實...NaNNaN
34379490000A臺北市文山區公所62117112/04/06NaN約僱5等280薪點(約新台幣36,316元)約僱人員約僱人員...1.國內外大專以上學校畢業,或高級中等學校畢業,並具有與擬任工作性質相當之訓練6個月以上或2...1.總收文及相關公文處理等業務。<br>2.協助檔案管理相關作業。<br>3.每日郵局寄信及...NaN116008臺北市文山區木柵路3段220號8、9樓無需輪值NaN(一)網路報名者:請於公告日期截止前至行政院人事行政總處「事求人」網頁-本職缺公告點選「我要...NaNNaN
45379064100X臺北市立北政國民中學62116112/04/06NaN委任第5職等至薦任第7職等綜合行政事務組長一般人員...一、經銓敘合格實授委任第五職等(含)以上,並具綜合行政職系任用資格之<br>公務人員,且無特...一、大額工程發包、大額財物招標、大額勞物招標。<br>二、各招標合約製作及採購案之擴充續約等...NaN臺北市立北政國民中學總務處(臺北市文山區指南路三段 2 巷 14 號)。無需輪值NaN(02)29393651轉12,聯絡人:劉主任。NaNNaN
..................................................................
144145379700300A臺北市建成地政事務所61824112/04/06NaN委任第5職等或薦任第6職等至第7職等地政課員一般人員...1.教育部認可之國內外大專以上學歷,且具地政職系任用資格者。<br>2.熟稔地政業務,具審查...1.辦理土地登記案件審查。<br>2.承辦相關公文業務。<br>3.土地登記法令諮詢服務。<...NaN臺北市建成地政事務所(臺北市萬華區和平西路3段120號7~9樓)無需輪值歡迎身心障礙、歡迎原住民族(一)\\t本職缺採線上作業方式,意者請於112年4月6日前進入行政院人事行政總處「事求人」徵...NaNNaN
145146379145700I臺北市中山區健康服務中心61806112/04/06NaN技工技工...1.需為臺北市政府所屬各機關現職技工、工友或駕駛,並符合下列資格條件:<br>(1)國民小學...1.公文交換。<br>2.辦公機具簡易維護。<br>3.環境清潔及維護。<br>4.協助庶務...NaN臺北市中山區健康服務中心(臺北市中山區松江路367號7樓)無需輪值歡迎身心障礙、歡迎原住民族1.請將「調僱意願表」親自簽章後,併同履歷表(亦可採用公務人員簡式履歷表)、畢業證書影本、身...NaNNaN
146147379023300A臺北市中山區戶政事務所61770112/04/06NaN工友工友...需為臺北市政府所屬各機關學校現職職工,並符合下列資格條件:<br>1.國小以上畢業或具有同等...1.行政庶務(市府公文交換及繳款外勤工作等)。<br>2.勞務工作(辦公廳舍打掃清潔等)。<...NaN臺北市中山區戶政事務所(臺北市中山區松江路367號2樓)無需輪值歡迎身心障礙、歡迎原住民族一、符合資格條件且有意願者,請填妥事務工友甄選履歷表及職工調僱意願表後由當事人簽章,並檢附(...NaNNaN
147148379091000Y臺北市中山區濱江國民小學61758112/04/06NaN代理教師代理教師...凡中華民國國民,且身心健康、品德優良,無現行教師法第14條、109年6月30日施行之教師法第...幼兒園教學工作(詳見簡章)NaN臺北市中山區濱江國民小學(臺北市中山區樂群二路266巷99號)無需輪值歡迎身心障礙、歡迎原住民族一、甄選類別與員額:\\n代理教師職缺(幼兒園差假代理教師,詳見簡章11-20招)\\n二、報考...NaNNaN
148149379110300G臺北市政府工務局公園路燈工程管理處61726112/04/06NaN聘用7等-支328薪點(約新台幣42,541元/每月)職務代理約僱人員(A620087)聘用人員...1.具中華民國國籍,且未具雙重國籍者。<br>2.無公務人員任用法第26條、28條所列各款情...1.綜理大安區所轄公園維護相關業務。<br>2.綜理大安區公文彙辦、會勘、陳情業務。<br>...NaN臺北市政府工務局公園路燈工程管理處\\n青年公園管理所無需輪值歡迎身心障礙、歡迎原住民族1.配合行政院人事行政總處(下稱人事總處)推動線上徵才作業,本職缺應徵作業一律採線上方式辦理...NaNNaN
\n", 1083 | "

149 rows × 24 columns

\n", 1084 | "
" 1085 | ], 1086 | "text/plain": [ 1087 | " No dept_code dept_name Job_no Modify_date tel \n", 1088 | "0 1 379590000E 臺北市政府文化局 62120 112/04/06 NaN \\\n", 1089 | "1 2 379670000G 臺北市政府產業發展局 62119 112/04/06 NaN \n", 1090 | "2 3 379040300E 臺北市立圖書館 62118 112/04/06 NaN \n", 1091 | "3 4 379490000A 臺北市文山區公所 62117 112/04/06 NaN \n", 1092 | "4 5 379064100X 臺北市立北政國民中學 62116 112/04/06 NaN \n", 1093 | ".. ... ... ... ... ... ... \n", 1094 | "144 145 379700300A 臺北市建成地政事務所 61824 112/04/06 NaN \n", 1095 | "145 146 379145700I 臺北市中山區健康服務中心 61806 112/04/06 NaN \n", 1096 | "146 147 379023300A 臺北市中山區戶政事務所 61770 112/04/06 NaN \n", 1097 | "147 148 379091000Y 臺北市中山區濱江國民小學 61758 112/04/06 NaN \n", 1098 | "148 149 379110300G 臺北市政府工務局公園路燈工程管理處 61726 112/04/06 NaN \n", 1099 | "\n", 1100 | " level job_Kind_Name title \n", 1101 | "0 280薪點,月支報酬新臺幣36,316元 無 約僱科員 \\\n", 1102 | "1 280薪點 無 約僱辦事員 \n", 1103 | "2 280薪點 無 政風室約僱課員 \n", 1104 | "3 約僱5等280薪點(約新台幣36,316元) 無 約僱人員 \n", 1105 | "4 委任第5職等至薦任第7職等 綜合行政 事務組長 \n", 1106 | ".. ... ... ... \n", 1107 | "144 委任第5職等或薦任第6職等至第7職等 地政 課員 \n", 1108 | "145 無 無 技工 \n", 1109 | "146 無 無 工友 \n", 1110 | "147 無 無 代理教師 \n", 1111 | "148 聘用7等-支328薪點(約新台幣42,541元/每月) 無 職務代理約僱人員(A620087) \n", 1112 | "\n", 1113 | " pop_Kind_Name ... apply_rule \n", 1114 | "0 約僱人員 ... 一、中華民國國民並無雙重國籍者。
二、大專以上畢業者。
三、具土木、建築、營建工... \\\n", 1115 | "1 約僱人員 ... 1、國內外之公、私立大專以上學校畢業。
2、具中華民國國籍,無公務人員任用法第26、2... \n", 1116 | "2 約僱人員 ... (一)國內外專科以上學校畢業者。
(二)高級中等學校畢業,並具有與擬任工作性質相當... \n", 1117 | "3 約僱人員 ... 1.國內外大專以上學校畢業,或高級中等學校畢業,並具有與擬任工作性質相當之訓練6個月以上或2... \n", 1118 | "4 一般人員 ... 一、經銓敘合格實授委任第五職等(含)以上,並具綜合行政職系任用資格之
公務人員,且無特... \n", 1119 | ".. ... ... ... \n", 1120 | "144 一般人員 ... 1.教育部認可之國內外大專以上學歷,且具地政職系任用資格者。
2.熟稔地政業務,具審查... \n", 1121 | "145 技工 ... 1.需為臺北市政府所屬各機關現職技工、工友或駕駛,並符合下列資格條件:
(1)國民小學... \n", 1122 | "146 工友 ... 需為臺北市政府所屬各機關學校現職職工,並符合下列資格條件:
1.國小以上畢業或具有同等... \n", 1123 | "147 代理教師 ... 凡中華民國國民,且身心健康、品德優良,無現行教師法第14條、109年6月30日施行之教師法第... \n", 1124 | "148 聘用人員 ... 1.具中華民國國籍,且未具雙重國籍者。
2.無公務人員任用法第26條、28條所列各款情... \n", 1125 | "\n", 1126 | " job_desc job_location_Name \n", 1127 | "0 一、古蹟、歷史建築等文化資產修復及文化設施之委託技術服務招標、工程招標、施工監督及驗收等相關... NaN \\\n", 1128 | "1 1.辦理工廠設立登記、動產擔保交易登記。
2.工廠管理輔導法相關法規與專案性質業務與查... NaN \n", 1129 | "2 (一)廉政宣導。
(二)安全維護業務。
(三)錄影監視系統管理。
(四)採... NaN \n", 1130 | "3 1.總收文及相關公文處理等業務。
2.協助檔案管理相關作業。
3.每日郵局寄信及... NaN \n", 1131 | "4 一、大額工程發包、大額財物招標、大額勞物招標。
二、各招標合約製作及採購案之擴充續約等... NaN \n", 1132 | ".. ... ... \n", 1133 | "144 1.辦理土地登記案件審查。
2.承辦相關公文業務。
3.土地登記法令諮詢服務。<... NaN \n", 1134 | "145 1.公文交換。
2.辦公機具簡易維護。
3.環境清潔及維護。
4.協助庶務... NaN \n", 1135 | "146 1.行政庶務(市府公文交換及繳款外勤工作等)。
2.勞務工作(辦公廳舍打掃清潔等)。<... NaN \n", 1136 | "147 幼兒園教學工作(詳見簡章) NaN \n", 1137 | "148 1.綜理大安區所轄公園維護相關業務。
2.綜理大安區公文彙辦、會勘、陳情業務。
... NaN \n", 1138 | "\n", 1139 | " job_address official_rule is_duty \n", 1140 | "0 臺北市政府文化局(11008 臺北市信義區市府路一號四樓) 否 無需輪值 \\\n", 1141 | "1 臺北市信義區市府路1號北區10樓 否 無需輪值 \n", 1142 | "2 臺北市立圖書館政風室(106210 臺北市大安區建國南路二段125號7樓) 否 無需輪值 \n", 1143 | "3 116008臺北市文山區木柵路3段220號8、9樓 否 無需輪值 \n", 1144 | "4 臺北市立北政國民中學總務處(臺北市文山區指南路三段 2 巷 14 號)。 是 無需輪值 \n", 1145 | ".. ... ... ... \n", 1146 | "144 臺北市建成地政事務所(臺北市萬華區和平西路3段120號7~9樓) 是 無需輪值 \n", 1147 | "145 臺北市中山區健康服務中心(臺北市中山區松江路367號7樓) 否 無需輪值 \n", 1148 | "146 臺北市中山區戶政事務所(臺北市中山區松江路367號2樓) 否 無需輪值 \n", 1149 | "147 臺北市中山區濱江國民小學(臺北市中山區樂群二路266巷99號) 否 無需輪值 \n", 1150 | "148 臺北市政府工務局公園路燈工程管理處\\n青年公園管理所 否 無需輪值 \n", 1151 | "\n", 1152 | " Special_rule contact \n", 1153 | "0 歡迎身心障礙、歡迎原住民族 報名應繳文件:\\n一、個人履歷表。\\n二、最高學歷畢業證書等資料影本(國內外學歷須經教育部認... \\\n", 1154 | "1 歡迎身心障礙、歡迎原住民族 1、本職缺⽉薪280薪點,折合為新臺幣36,316元,尚需扣除勞、健保及勞退個⼈⾃付等費⽤。... \n", 1155 | "2 歡迎身心障礙、歡迎原住民族 (一)意者請檢具履歷、自傳及相關學歷畢業證書影本,如具政風、工程或建築管理及與本職務相關之實... \n", 1156 | "3 NaN (一)網路報名者:請於公告日期截止前至行政院人事行政總處「事求人」網頁-本職缺公告點選「我要... \n", 1157 | "4 NaN (02)29393651轉12,聯絡人:劉主任。 \n", 1158 | ".. ... ... \n", 1159 | "144 歡迎身心障礙、歡迎原住民族 (一)\\t本職缺採線上作業方式,意者請於112年4月6日前進入行政院人事行政總處「事求人」徵... \n", 1160 | "145 歡迎身心障礙、歡迎原住民族 1.請將「調僱意願表」親自簽章後,併同履歷表(亦可採用公務人員簡式履歷表)、畢業證書影本、身... \n", 1161 | "146 歡迎身心障礙、歡迎原住民族 一、符合資格條件且有意願者,請填妥事務工友甄選履歷表及職工調僱意願表後由當事人簽章,並檢附(... \n", 1162 | "147 歡迎身心障礙、歡迎原住民族 一、甄選類別與員額:\\n代理教師職缺(幼兒園差假代理教師,詳見簡章11-20招)\\n二、報考... \n", 1163 | "148 歡迎身心障礙、歡迎原住民族 1.配合行政院人事行政總處(下稱人事總處)推動線上徵才作業,本職缺應徵作業一律採線上方式辦理... \n", 1164 | "\n", 1165 | " File_Path Note \n", 1166 | "0 NaN NaN \n", 1167 | "1 NaN NaN \n", 1168 | "2 NaN NaN \n", 1169 | "3 NaN NaN \n", 1170 | "4 NaN NaN \n", 1171 | ".. ... ... \n", 1172 | "144 NaN NaN \n", 1173 | "145 NaN NaN \n", 1174 | "146 NaN NaN \n", 1175 | "147 NaN NaN \n", 1176 | "148 NaN NaN \n", 1177 | "\n", 1178 | "[149 rows x 24 columns]" 1179 | ] 1180 | }, 1181 | "execution_count": 6, 1182 | "metadata": {}, 1183 | "output_type": "execute_result" 1184 | } 1185 | ], 1186 | "source": [ 1187 | "# 讀取 xml 格式檔案\n", 1188 | "'''\n", 1189 | "參考連結:\n", 1190 | "[1] pandas.read_xml\n", 1191 | "https://pandas.pydata.org/docs/reference/api/pandas.read_xml.html\n", 1192 | "'''\n", 1193 | "df = pd.read_xml(\"./files/jobs.xml\"); df" 1194 | ] 1195 | }, 1196 | { 1197 | "cell_type": "code", 1198 | "execution_count": 7, 1199 | "id": "9f0b5649", 1200 | "metadata": {}, 1201 | "outputs": [], 1202 | "source": [ 1203 | "# 寫入 xml 檔案\n", 1204 | "'''\n", 1205 | "參考連結:\n", 1206 | "[1] pandas.DataFrame.to_xml\n", 1207 | "https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_xml.html\n", 1208 | "'''\n", 1209 | "df.to_xml(\"./output.xml\")" 1210 | ] 1211 | }, 1212 | { 1213 | "cell_type": "markdown", 1214 | "id": "5fe3cbe7", 1215 | "metadata": {}, 1216 | "source": [ 1217 | "# 讀寫 excel\n", 1218 | "- 語法: \n", 1219 | " - pd.read_excel(): 讀取 excel 檔\n", 1220 | " - df.to_excel(): 寫入 excel 檔\n", 1221 | "- 記得先安裝套件: `pip install openpyxl`" 1222 | ] 1223 | }, 1224 | { 1225 | "cell_type": "code", 1226 | "execution_count": 8, 1227 | "id": "f16823c7", 1228 | "metadata": {}, 1229 | "outputs": [ 1230 | { 1231 | "data": { 1232 | "text/html": [ 1233 | "
\n", 1234 | "\n", 1247 | "\n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | "
中文姓名電話英文名稱
0楊○○978000000Darren
1林○○910000000Lin
2江○○939000000Jiang
\n", 1277 | "
" 1278 | ], 1279 | "text/plain": [ 1280 | " 中文姓名 電話 英文名稱\n", 1281 | "0 楊○○ 978000000 Darren\n", 1282 | "1 林○○ 910000000 Lin\n", 1283 | "2 江○○ 939000000 Jiang" 1284 | ] 1285 | }, 1286 | "execution_count": 8, 1287 | "metadata": {}, 1288 | "output_type": "execute_result" 1289 | } 1290 | ], 1291 | "source": [ 1292 | "# 讀取 excel 格式檔案\n", 1293 | "'''\n", 1294 | "參考連結:\n", 1295 | "[1] openpyxl - A Python library to read/write Excel 2010 xlsx/xlsm files\n", 1296 | "https://openpyxl.readthedocs.io/en/stable/\n", 1297 | "[2] pandas.read_excel\n", 1298 | "https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html\n", 1299 | "'''\n", 1300 | "\n", 1301 | "df = pd.read_excel(\"./files/通訊錄.xlsx\"); df" 1302 | ] 1303 | }, 1304 | { 1305 | "cell_type": "code", 1306 | "execution_count": 9, 1307 | "id": "82d25004", 1308 | "metadata": {}, 1309 | "outputs": [], 1310 | "source": [ 1311 | "# 寫入 excel 檔案\n", 1312 | "'''\n", 1313 | "參考連結:\n", 1314 | "[1] pandas.DataFrame.to_excel\n", 1315 | "https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_excel.html\n", 1316 | "'''\n", 1317 | "df.to_excel(\"./output.xlsx\", index=False)" 1318 | ] 1319 | } 1320 | ], 1321 | "metadata": { 1322 | "kernelspec": { 1323 | "display_name": "python3@nlp", 1324 | "language": "python", 1325 | "name": "k_nlp" 1326 | }, 1327 | "language_info": { 1328 | "codemirror_mode": { 1329 | "name": "ipython", 1330 | "version": 3 1331 | }, 1332 | "file_extension": ".py", 1333 | "mimetype": "text/x-python", 1334 | "name": "python", 1335 | "nbconvert_exporter": "python", 1336 | "pygments_lexer": "ipython3", 1337 | "version": "3.10.11" 1338 | } 1339 | }, 1340 | "nbformat": 4, 1341 | "nbformat_minor": 5 1342 | } 1343 | --------------------------------------------------------------------------------