├── .gitignore
├── 01_OpenAI_getting_started.ipynb
├── 02_completion_api.ipynb
├── 03_code_generation.ipynb
├── 04_OpenAI_parameters.ipynb
├── 05_tokens_and_usage.ipynb
├── 06_best_practice.ipynb
├── 07_prompt_engineering.ipynb
├── 08_langchain_getting_started.ipynb
├── LICENSE
├── README.md
├── assets
    ├── azure-ai-studio.png
    ├── azure-portal.png
    └── chain-concept.png
└── data
    └── 2023-taipei-hot pot-restaurant.csv


/.gitignore:
--------------------------------------------------------------------------------
  1 | *.bundle.*
  2 | lib/
  3 | node_modules/
  4 | *.egg-info/
  5 | .ipynb_checkpoints
  6 | *.tsbuildinfo
  7 | 
  8 | # Created by https://www.gitignore.io/api/python
  9 | # Edit at https://www.gitignore.io/?templates=python
 10 | 
 11 | ### Python ###
 12 | # Byte-compiled / optimized / DLL files
 13 | __pycache__/
 14 | *.py[cod]
 15 | *$py.class
 16 | 
 17 | # C extensions
 18 | *.so
 19 | 
 20 | # Distribution / packaging
 21 | .Python
 22 | build/
 23 | develop-eggs/
 24 | dist/
 25 | downloads/
 26 | eggs/
 27 | .eggs/
 28 | lib/
 29 | lib64/
 30 | parts/
 31 | sdist/
 32 | var/
 33 | wheels/
 34 | pip-wheel-metadata/
 35 | share/python-wheels/
 36 | .installed.cfg
 37 | *.egg
 38 | MANIFEST
 39 | 
 40 | # PyInstaller
 41 | #  Usually these files are written by a python script from a template
 42 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 43 | *.manifest
 44 | *.spec
 45 | 
 46 | # Installer logs
 47 | pip-log.txt
 48 | pip-delete-this-directory.txt
 49 | 
 50 | # Unit test / coverage reports
 51 | htmlcov/
 52 | .tox/
 53 | .nox/
 54 | .coverage
 55 | .coverage.*
 56 | .cache
 57 | nosetests.xml
 58 | coverage.xml
 59 | *.cover
 60 | .hypothesis/
 61 | .pytest_cache/
 62 | 
 63 | # Translations
 64 | *.mo
 65 | *.pot
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # pyenv
 77 | .python-version
 78 | 
 79 | # dotenv
 80 | .env
 81 | 
 82 | # FAISS Vector Store
 83 | *.faiss
 84 | *.pkl
 85 | 
 86 | # celery beat schedule file
 87 | celerybeat-schedule
 88 | 
 89 | # SageMath parsed files
 90 | *.sage.py
 91 | 
 92 | # Spyder project settings
 93 | .spyderproject
 94 | .spyproject
 95 | 
 96 | # Rope project settings
 97 | .ropeproject
 98 | 
 99 | # Mr Developer
100 | .mr.developer.cfg
101 | .project
102 | .pydevproject
103 | 
104 | # mkdocs documentation
105 | /site
106 | 
107 | # mypy
108 | .mypy_cache/
109 | .dmypy.json
110 | dmypy.json
111 | 
112 | # Pyre type checker
113 | .pyre/
114 | 
115 | # OS X stuff
116 | *.DS_Store
117 | 
118 | # End of https://www.gitignore.io/api/python
119 | 
120 | _temp_extension
121 | junit.xml
122 | [uU]ntitled*
123 | notebook/static/*
124 | !notebook/static/favicons
125 | notebook/labextension
126 | notebook/schemas
127 | docs/source/changelog.md
128 | docs/source/contributing.md
129 | 
130 | # playwright
131 | ui-tests/test-results
132 | ui-tests/playwright-report
133 | 
134 | # VSCode
135 | .vscode
136 | 
137 | # RTC
138 | .jupyter_ystore.db
139 | 
140 | # yarn >=2.x local files
141 | .yarn/*
142 | .pnp.*
143 | ui-tests/.yarn/*
144 | ui-tests/.pnp.*


--------------------------------------------------------------------------------
/02_completion_api.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# 自動完成 (Completion) API 其他應用展示"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 21,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "name": "stdout",
 17 |      "output_type": "stream",
 18 |      "text": [
 19 |       "Requirement already satisfied: openai in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (1.51.0)\n",
 20 |       "Requirement already satisfied: python-dotenv in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (1.0.0)\n",
 21 |       "Requirement already satisfied: anyio<5,>=3.5.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai) (3.7.1)\n",
 22 |       "Requirement already satisfied: distro<2,>=1.7.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai) (1.9.0)\n",
 23 |       "Requirement already satisfied: httpx<1,>=0.23.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai) (0.24.1)\n",
 24 |       "Requirement already satisfied: jiter<1,>=0.4.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai) (0.5.0)\n",
 25 |       "Requirement already satisfied: pydantic<3,>=1.9.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai) (2.9.2)\n",
 26 |       "Requirement already satisfied: sniffio in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai) (1.3.0)\n",
 27 |       "Requirement already satisfied: tqdm>4 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai) (4.65.0)\n",
 28 |       "Requirement already satisfied: typing-extensions<5,>=4.11 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai) (4.12.2)\n",
 29 |       "Requirement already satisfied: idna>=2.8 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from anyio<5,>=3.5.0->openai) (3.4)\n",
 30 |       "Requirement already satisfied: certifi in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from httpx<1,>=0.23.0->openai) (2022.12.7)\n",
 31 |       "Requirement already satisfied: httpcore<0.18.0,>=0.15.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from httpx<1,>=0.23.0->openai) (0.17.3)\n",
 32 |       "Requirement already satisfied: annotated-types>=0.6.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pydantic<3,>=1.9.0->openai) (0.7.0)\n",
 33 |       "Requirement already satisfied: pydantic-core==2.23.4 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pydantic<3,>=1.9.0->openai) (2.23.4)\n",
 34 |       "Requirement already satisfied: colorama in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tqdm>4->openai) (0.4.6)\n",
 35 |       "Requirement already satisfied: h11<0.15,>=0.13 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from httpcore<0.18.0,>=0.15.0->httpx<1,>=0.23.0->openai) (0.14.0)\n",
 36 |       "Note: you may need to restart the kernel to use updated packages.\n"
 37 |      ]
 38 |     }
 39 |    ],
 40 |    "source": [
 41 |     "%pip install openai python-dotenv"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 1,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "import os\n",
 51 |     "from dotenv import load_dotenv\n",
 52 |     "from openai import AzureOpenAI\n",
 53 |     "\n",
 54 |     "# 載入環境變數\n",
 55 |     "load_dotenv()\n",
 56 |     "\n",
 57 |     "# 設定呼叫 Azure OpenAI Service API 所需連線資訊\n",
 58 |     "azure_endpoint = os.getenv(\"AZURE_OPENAI_ENDPOINT\")\n",
 59 |     "api_key=os.getenv(\"AZURE_OPENAI_API_KEY\") \n",
 60 |     "api_version=os.getenv(\"AZURE_OPENAI_API_VERSION\")\n",
 61 |     "\n",
 62 |     "# 最簡之 API 呼叫\n",
 63 |     "client = AzureOpenAI(\n",
 64 |     "  azure_endpoint=azure_endpoint, \n",
 65 |     "  api_key=api_key,  \n",
 66 |     "  api_version=api_version\n",
 67 |     ")\n"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "## 降低幻覺發生的機會的提示\n",
 75 |     "以目前唯一支援 GPT-3 Completions API 的 gpt-35-turbo-instruct 模型示範"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 23,
 81 |    "metadata": {},
 82 |    "outputs": [
 83 |     {
 84 |      "name": "stdout",
 85 |      "output_type": "stream",
 86 |      "text": [
 87 |       " 抱歉,我不知道.\n"
 88 |      ]
 89 |     }
 90 |    ],
 91 |    "source": [
 92 |     "# 透過環境變數取得所使用的模型部署名稱\n",
 93 |     "model = os.getenv('COMPLETIONS_DEPLOYMENT_NAME')\n",
 94 |     "\n",
 95 |     "# 設定要輸入的問題\n",
 96 |     "prompt = \"\"\"回答我的問題，如果不知道就直接說 \"抱歉, 我不知道\".\\n\n",
 97 |     "Q: 請問誰是 2020 年夏季奧運女子跳高冠軍?\\n\n",
 98 |     "A:\"\"\"\n",
 99 |     "\n",
100 |     "response  = client.completions.create(\n",
101 |     "    model=model,\n",
102 |     "    prompt=prompt,\n",
103 |     "    temperature=0,\n",
104 |     "    max_tokens=50,\n",
105 |     "    top_p=1,\n",
106 |     "    frequency_penalty=0,\n",
107 |     "    presence_penalty=0,\n",
108 |     "    stop=\"\\n\"\n",
109 |     ")\n",
110 |     "\n",
111 |     "print(response.choices[0].text)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "## 文字情緒判斷 \n",
119 |     "嘗試使用 gpt-35-turbo 與 gpt-4 開始支援的 ChatCompletion API 來進行自動完成文字的情緒判斷。"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 5,
125 |    "metadata": {},
126 |    "outputs": [
127 |     {
128 |      "name": "stdout",
129 |      "output_type": "stream",
130 |      "text": [
131 |       "1. 負面  \n",
132 |       "2. 正面（但有些許保留意見）  \n",
133 |       "3. 負面  \n",
134 |       "4. 負面  \n"
135 |      ]
136 |     }
137 |    ],
138 |    "source": [
139 |     "# 透過環境變數取得所使用的模型部署名稱\n",
140 |     "model = os.getenv('CHAT_DEPLOYMENT_NAME')\n",
141 |     "\n",
142 |     "response = client.chat.completions.create(\n",
143 |     "  model= model,\n",
144 |     "  messages = [\n",
145 |     "    {\"role\": \"system\", \"content\": \"你是以正體中文回覆的機器人，以單純文字格式回覆。\"}, \n",
146 |     "    {'role': 'user', 'content':  \"告訴我以下客戶反應是正面還是負面. \\n \" \\\n",
147 |     "        \"Q: 這樣爛的產品都可以賣的出去. \\n\" \\\n",
148 |     "        \"Q: 這玩意真的棒，只是跟廣告有點差距. \\n\" \\\n",
149 |     "        \"Q: 妳家產品用了之後讓我感覺自己像是豬八戒照鏡子. \\n\"\n",
150 |     "        \"Q: 真是好棒棒，你們家都沒有活人了嗎? \\n\"\n",
151 |     "    }\n",
152 |     "  ],\n",
153 |     "  temperature=0,\n",
154 |     "  max_tokens=650\n",
155 |     ")\n",
156 |     "\n",
157 |     "print(response.choices[0].message.content) "
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "markdown",
162 |    "metadata": {},
163 |    "source": [
164 |     "## 檢查文句中是否包含個人隱私資訊 (PII) \n",
165 |     "嘗試使用 gpt-35-turbo 與 gpt-4 開始支援的 ChatCompletion API 來進行自動完成，判斷一段文字中是否有包含個人隱私資訊 (PII) 的風險。"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 6,
171 |    "metadata": {},
172 |    "outputs": [
173 |     {
174 |      "name": "stdout",
175 |      "output_type": "stream",
176 |      "text": [
177 |       "1. 姓名：John Doe  \n",
178 |       "2. 年齡：35歲  \n",
179 |       "3. 地址：21 Main Street, New York, NY  \n",
180 |       "4. 職業：軟體工程師  \n",
181 |       "5. 工作地點：Google  \n",
182 |       "6. 配偶姓名：Jane Doe  \n",
183 |       "7. 家庭成員：兩個孩子  \n"
184 |      ]
185 |     }
186 |    ],
187 |    "source": [
188 |     "# 透過環境變數取得所使用的模型部署名稱\n",
189 |     "model = os.getenv('CHAT_DEPLOYMENT_NAME')\n",
190 |     "\n",
191 |     "response = client.chat.completions.create(\n",
192 |     "  model= model,\n",
193 |     "  messages = [ \n",
194 |     "    {\"role\": \"system\", \"content\": \"你是以正體中文回覆的機器人，以單純文字格式回覆。\"}, \n",
195 |     "    {'role': 'user', 'content': '以條列方式找出以下跟個人隱私資訊相關的內容\\n\\n' \\\n",
196 |     "     'John Doe is a 35-year old man and he lives at 21 Main Street, New York, NY.' \\\n",
197 |     "     ' He is a software engineer and he works at Google.' \\\n",
198 |     "     ' He has a wife named Jane Doe and they have two children'\n",
199 |     "    \n",
200 |     "    }\n",
201 |     "  ],\n",
202 |     "  temperature = 0,\n",
203 |     "  max_tokens=400\n",
204 |     ")\n",
205 |     "\n",
206 |     "print(response.choices[0].message.content) "
207 |    ]
208 |   }
209 |  ],
210 |  "metadata": {
211 |   "kernelspec": {
212 |    "display_name": "Python 3",
213 |    "language": "python",
214 |    "name": "python3"
215 |   },
216 |   "language_info": {
217 |    "codemirror_mode": {
218 |     "name": "ipython",
219 |     "version": 3
220 |    },
221 |    "file_extension": ".py",
222 |    "mimetype": "text/x-python",
223 |    "name": "python",
224 |    "nbconvert_exporter": "python",
225 |    "pygments_lexer": "ipython3",
226 |    "version": "3.11.9"
227 |   },
228 |   "orig_nbformat": 4
229 |  },
230 |  "nbformat": 4,
231 |  "nbformat_minor": 2
232 | }
233 | 


--------------------------------------------------------------------------------
/03_code_generation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# 理解程式碼內容與生成程式碼"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "以目前唯一支援 GPT-3 Completions API 的 gpt-35-turbo-instruct 模型示範"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {},
 21 |    "outputs": [
 22 |     {
 23 |      "name": "stdout",
 24 |      "output_type": "stream",
 25 |      "text": [
 26 |       " Department.name\n",
 27 |       "FROM Department\n",
 28 |       "INNER JOIN Employee ON Department.id = Employee.department_id\n",
 29 |       "INNER JOIN Salary_Payments ON Employee.id = Salary_Payments.employee_id\n",
 30 |       "WHERE Salary_Payments.date >= CURRENT_DATE - INTERVAL '3 months'\n",
 31 |       "GROUP BY Department.name\n",
 32 |       "HAVING COUNT(Employee.id) > 10\n"
 33 |      ]
 34 |     }
 35 |    ],
 36 |    "source": [
 37 |     "import os\n",
 38 |     "from dotenv import load_dotenv\n",
 39 |     "from openai import AzureOpenAI\n",
 40 |     "\n",
 41 |     "# 載入環境變數\n",
 42 |     "load_dotenv()\n",
 43 |     "\n",
 44 |     "# 設定呼叫 Azure OpenAI Service API 所需連線資訊\n",
 45 |     "azure_endpoint = os.getenv(\"AZURE_OPENAI_ENDPOINT\")\n",
 46 |     "api_key=os.getenv(\"AZURE_OPENAI_API_KEY\") \n",
 47 |     "api_version=os.getenv(\"AZURE_OPENAI_API_VERSION\")\n",
 48 |     "\n",
 49 |     "# 最簡之 API 呼叫\n",
 50 |     "client = AzureOpenAI(\n",
 51 |     "  azure_endpoint=azure_endpoint, \n",
 52 |     "  api_key=api_key,  \n",
 53 |     "  api_version=api_version\n",
 54 |     ")\n",
 55 |     "\n",
 56 |     "# 透過環境變數取得所使用的模型部署名稱\n",
 57 |     "model = os.getenv('COMPLETIONS_DEPLOYMENT_NAME')\n",
 58 |     "\n",
 59 |     "# 呼叫自動完成 API\n",
 60 |     "response  = client.completions.create(\n",
 61 |     "  model= model,\n",
 62 |     "  prompt=\"### Postgres SQL tables, with their properties:\\n\" \\\n",
 63 |     "         \"#\\n\" \\\n",
 64 |     "         \"# Employee(id, name, department_id) \\n\" \\\n",
 65 |     "         \"# Department(id, name, address)\\n\" \\\n",
 66 |     "         \"# Salary_Payments(id, employee_id, amount, date) \\n\" \\\n",
 67 |     "         \"### \" \\\n",
 68 |     "         \"查詢列出過去 3 個月僱用超過 10 名員工的部門名稱 \\n\"\n",
 69 |     "         \"query: SELECT\",\n",
 70 |     "  temperature=0,\n",
 71 |     "  max_tokens=150,\n",
 72 |     "  top_p=1,\n",
 73 |     "  frequency_penalty=0,\n",
 74 |     "  presence_penalty=0,\n",
 75 |     "  stop=[\"#\",\";\"])\n",
 76 |     "\n",
 77 |     "print(response.choices[0].text)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "嘗試使用 gpt-35-turbo 與 gpt-4 開始支援的 ChatCompletion API 來進行自動完成"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 3,
 90 |    "metadata": {},
 91 |    "outputs": [
 92 |     {
 93 |      "name": "stdout",
 94 |      "output_type": "stream",
 95 |      "text": [
 96 |       "這個 SQL 查詢的意思是：\n",
 97 |       "\n",
 98 |       "從 Department 表中選取部門名稱，這些部門必須符合以下條件：\n",
 99 |       "1. 與 Employee 表進行連接，連接條件是 Department 表的 id 與 Employee 表的 department_id 相等。\n",
100 |       "2. Employee 表中的員工 id 必須在 Salary_Payments 表中，且這些員工在過去三個月內有薪資記錄。\n",
101 |       "3. 按部門名稱分組，並且只選取員工數量超過 10 的部門。\n"
102 |      ]
103 |     }
104 |    ],
105 |    "source": [
106 |     "model = os.getenv('CHAT_DEPLOYMENT_NAME')\n",
107 |     "\n",
108 |     "response = client.chat.completions.create(\n",
109 |     "  model= model,\n",
110 |     "  messages = [ \n",
111 |     "    {\"role\": \"system\", \"content\": \"你是以正體中文回覆的機器人，不要使用 Markdown 語法回覆。\"}, \n",
112 |     "    {'role': 'user', 'content': \"精簡的告訴我以下這個 SQL 是什麼意思??\\n\"\n",
113 |     "      'Code:' \\\n",
114 |     "      \"SELECT d.name FROM Department d JOIN Employee e ON d.id = e.department_id WHERE e.id IN (SELECT employee_id FROM Salary_Payments WHERE date > now() - interval '3 months') GROUP BY d.name HAVING COUNT(*) > 10\\n\" \\\n",
115 |     "      'Answer:'    \n",
116 |     "    }\n",
117 |     "  ],\n",
118 |     "  temperature=0,\n",
119 |     "  max_tokens=250\n",
120 |     ")\n",
121 |     "\n",
122 |     "print(response.choices[0].message.content) "
123 |    ]
124 |   }
125 |  ],
126 |  "metadata": {
127 |   "kernelspec": {
128 |    "display_name": "Python 3",
129 |    "language": "python",
130 |    "name": "python3"
131 |   },
132 |   "language_info": {
133 |    "codemirror_mode": {
134 |     "name": "ipython",
135 |     "version": 3
136 |    },
137 |    "file_extension": ".py",
138 |    "mimetype": "text/x-python",
139 |    "name": "python",
140 |    "nbconvert_exporter": "python",
141 |    "pygments_lexer": "ipython3",
142 |    "version": "3.11.9"
143 |   },
144 |   "orig_nbformat": 4
145 |  },
146 |  "nbformat": 4,
147 |  "nbformat_minor": 2
148 | }
149 | 


--------------------------------------------------------------------------------
/04_OpenAI_parameters.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# OpenAI 參數設定"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 3,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import os\n",
 17 |     "from dotenv import load_dotenv\n",
 18 |     "from openai import AzureOpenAI\n",
 19 |     "\n",
 20 |     "# 載入環境變數\n",
 21 |     "load_dotenv()\n",
 22 |     "\n",
 23 |     "# 設定呼叫 Azure OpenAI Service API 所需連線資訊\n",
 24 |     "azure_endpoint = os.getenv(\"AZURE_OPENAI_ENDPOINT\")\n",
 25 |     "api_key=os.getenv(\"AZURE_OPENAI_API_KEY\") \n",
 26 |     "api_version=os.getenv(\"AZURE_OPENAI_API_VERSION\")\n",
 27 |     "\n",
 28 |     "# 最簡之 API 呼叫\n",
 29 |     "client = AzureOpenAI(\n",
 30 |     "  azure_endpoint=azure_endpoint, \n",
 31 |     "  api_key=api_key,  \n",
 32 |     "  api_version=api_version\n",
 33 |     ")"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "attachments": {},
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "# temperature\n",
 42 |     "\n",
 43 |     "預設值 1\n",
 44 |     "\n",
 45 |     "決定採樣時的溫度 (sampling temperature)，參數值介於 0 與 2 之間。 數值越高意味著模型產生的文字內容越多樣化，對於更需要產生具備創意之文案的相關應用，可以嘗試使用 0.9，對於具有明確答案應用情境，建議嘗試使用 0 ( 使用 argmax 函數來採樣)。\n",
 46 |     "\n",
 47 |     "temperature 參數或 top_p 參數有著類似效果，但不要同時調整這兩個參數以避免無法確認內容產出的變化是因為哪一個參數調整所造成的。本範例嘗試使用 gpt-35-turbo 與 gpt-4 開始支援的 ChatCompletion API 來進行自動完成"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 4,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "# 透過環境變數取得所使用的模型部署名稱\n",
 57 |     "model = os.getenv('CHAT_DEPLOYMENT_NAME')\n",
 58 |     "\n",
 59 |     "def call_openai(num_times, prompt, temperature):\n",
 60 |     "    for i in range(num_times):       \n",
 61 |     "        response = client.chat.completions.create(\n",
 62 |     "            model= model,\n",
 63 |     "            messages = [{'role': 'user', 'content':prompt}],\n",
 64 |     "            max_tokens=60,\n",
 65 |     "            temperature = temperature,\n",
 66 |     "        )\n",
 67 |     "        print(response.choices[0].message.content)"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 5,
 73 |    "metadata": {},
 74 |    "outputs": [
 75 |     {
 76 |      "name": "stdout",
 77 |      "output_type": "stream",
 78 |      "text": [
 79 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
 80 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
 81 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
 82 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
 83 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
 84 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
 85 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
 86 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
 87 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
 88 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n"
 89 |      ]
 90 |     }
 91 |    ],
 92 |    "source": [
 93 |     "call_openai(10, '用正體中文一句話精簡回覆\\nQ:最佳的寵物是什麼?\\nA:', temperature = 0)"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 6,
 99 |    "metadata": {},
100 |    "outputs": [
101 |     {
102 |      "name": "stdout",
103 |      "output_type": "stream",
104 |      "text": [
105 |       "最佳的寵物因人而異，取決於你的生活方式和偏好。\n",
106 |       "最佳的寵物取決於個人喜好和生活方式。\n",
107 |       "最佳的寵物因人而異，視個人需求和喜好而定。\n",
108 |       "最佳的寵物因人而異，取決於個人需求和生活方式。\n",
109 |       "因人而異，每個人心目中的最佳寵物都不同。\n",
110 |       "最佳的寵物因人而異，依喜好和生活方式而定。\n",
111 |       "最適合你的生活方式和需求的寵物就是最佳的寵物。\n",
112 |       "最佳的寵物因人而異，主要看個人的喜好和生活方式。\n",
113 |       "最佳的寵物取決於主人的生活方式和需求。\n",
114 |       "因人而異，視個人喜好與生活方式決定。\n"
115 |      ]
116 |     }
117 |    ],
118 |    "source": [
119 |     "call_openai(10, '用正體中文一句話精簡回覆\\nQ:最佳的寵物是什麼?\\nA:', temperature = 1)"
120 |    ]
121 |   },
122 |   {
123 |    "attachments": {},
124 |    "cell_type": "markdown",
125 |    "metadata": {},
126 |    "source": [
127 |     "# top_p\n",
128 |     "\n",
129 |     "預設值 1\n",
130 |     "\n",
131 |     "控制採樣溫度 (sampling temperature) 之外的另一種控制產生內容多樣性的參數，temperature 參數控制產生的內容之隨機性，而 top_p  則決定可供選擇的 Token 範圍有多大，top_p 參數使用 nucleus sampling 採樣方式，一般而言候選的 Token 機率依據高低會以長尾的方式分布，top_p 決定了只有多少百分比之候選 Token 應該被納入考慮，例如 top_p 參數設為 0.1 代表只有前 10% 的候選 Token 有機會被隨機選用。對於 temperature 與 top_p 參數背後所代表的意義可以參考 https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277\n",
132 |     "\n",
133 |     "temperature 參數或 top_p 參數有著類似效果，但不要同時調整這兩個參數以避免無法確認內容產出的變化是因為哪一個參數調整所造成的。本範例嘗試使用 gpt-35-turbo 與 gpt-4 開始支援的 ChatCompletion API 來進行自動完成。"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": 10,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": [
142 |     "def call_openai(num_times, prompt, top_p):\n",
143 |     "    for i in range(num_times):       \n",
144 |     "        response = client.chat.completions.create(\n",
145 |     "            model= model, \n",
146 |     "            messages = [{'role': 'user', 'content':prompt}],\n",
147 |     "            max_tokens=60,        \n",
148 |     "            top_p = top_p\n",
149 |     "        )\n",
150 |     "        print(response.choices[0].message.content)\n"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 11,
156 |    "metadata": {},
157 |    "outputs": [
158 |     {
159 |      "name": "stdout",
160 |      "output_type": "stream",
161 |      "text": [
162 |       "最佳的寵物因人而異，但通常是與主人性格和生活方式最契合的動物。\n",
163 |       "最佳的寵物因人而異，取決於個人喜好和生活習慣。\n",
164 |       "最佳的寵物因人而異，依個人喜好決定。\n",
165 |       "最適合你的生活方式與需求的寵物。\n",
166 |       "最佳的寵物取決於個人喜好和生活方式。\n",
167 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
168 |       "最佳的寵物因人而異，但某些人喜愛忠誠的狗。\n",
169 |       "最佳的寵物取決於個人喜好與生活方式。\n",
170 |       "最佳的寵物因人而異，視個人喜好和生活方式而定。\n",
171 |       "A: 最佳的寵物因人而異，取決於個人喜好和生活方式。\n"
172 |      ]
173 |     }
174 |    ],
175 |    "source": [
176 |     "call_openai(10, '用正體中文一句話精簡回覆\\nQ:最佳的寵物是什麼?\\nA:', top_p = 1)"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 12,
182 |    "metadata": {},
183 |    "outputs": [
184 |     {
185 |      "name": "stdout",
186 |      "output_type": "stream",
187 |      "text": [
188 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
189 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
190 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
191 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
192 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
193 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
194 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
195 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
196 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
197 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n"
198 |      ]
199 |     }
200 |    ],
201 |    "source": [
202 |     "call_openai(10, '用正體中文一句話精簡回覆\\nQ:最佳的寵物是什麼?\\nA:', top_p = 0.1)"
203 |    ]
204 |   },
205 |   {
206 |    "attachments": {},
207 |    "cell_type": "markdown",
208 |    "metadata": {},
209 |    "source": [
210 |     "# n\n",
211 |     "\n",
212 |     "預設值 1\n",
213 |     "\n",
214 |     "為每個提示 (prompt) 產生多少個自動完成的回應內容。\n",
215 |     "\n",
216 |     "請注意：由於此參數會生成多個回應內容，因此它會快速消耗 Token 配額。 請謹慎使用並確認有設定妥 max_tokens 參數與停止產生內容的 stop 字串。本範例嘗試使用 gpt-35-turbo 與 gpt-4 開始支援的 ChatCompletion API 來進行自動完成\n"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": 13,
222 |    "metadata": {},
223 |    "outputs": [
224 |     {
225 |      "name": "stdout",
226 |      "output_type": "stream",
227 |      "text": [
228 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n",
229 |       "最佳的寵物取決於個人喜好和生活方式。\n",
230 |       "最佳的寵物因人而異，取決於個人喜好和生活方式。\n"
231 |      ]
232 |     }
233 |    ],
234 |    "source": [
235 |     "response = client.chat.completions.create(\n",
236 |     "  model = model,  \n",
237 |     "  messages = [ \n",
238 |     "    {'role': 'user', 'content': \"用正體中文一句話精簡回覆\\nQ:最佳的寵物是什麼?\\nA:\"      \n",
239 |     "    }\n",
240 |     "  ],\n",
241 |     "  n=3, # 產生三個自動完成的回覆\n",
242 |     "  temperature=0.6,\n",
243 |     "  max_tokens=200\n",
244 |     ")\n",
245 |     "\n",
246 |     "print(response.choices[0].message.content)\n",
247 |     "print(response.choices[1].message.content)\n",
248 |     "print(response.choices[2].message.content)"
249 |    ]
250 |   },
251 |   {
252 |    "attachments": {},
253 |    "cell_type": "markdown",
254 |    "metadata": {},
255 |    "source": [
256 |     "# logprobs\n",
257 |     "\n",
258 |     "預設值 null\n",
259 |     "\n",
260 |     "logprobs 會列出最有可能的 Token 以及其他候選之 Token 的機率。 例如如果 logprobs 設為 5，則 API 呼叫後將會回傳 5 個最可能 Token 的清單。 API 仍將傳會機率最高的 Token。 logprobs 的最大值為 5。如果您需要更高的參數值，請聯繫微軟技術支援中心以便讓微軟了解您的需求。由於 OpenAI 許多新模型已經不再支援 logprobs 參數，本範例使用與過去 GPT-3 相容性最高的 gpt-35-turbo-instruct 模型展示如何使用 logprobs 參數。"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": 21,
266 |    "metadata": {},
267 |    "outputs": [
268 |     {
269 |      "name": "stdout",
270 |      "output_type": "stream",
271 |      "text": [
272 |       " The best pet name is subjective and can vary depending\n",
273 |       "Logprobs(text_offset=[31, 35, 40, 44, 49, 52, 63, 67, 71, 76], token_logprobs=[-0.8211408, -0.0008969317, -0.0008701292, -0.001026497, -0.03300924, -0.050233137, -0.01647465, -0.77131057, -0.02149361, -0.5570419], tokens=[' The', ' best', ' pet', ' name', ' is', ' subjective', ' and', ' can', ' vary', ' depending'], top_logprobs=[{' The': -0.8211408, 'The': -1.3426118}, {' best': -0.0008969317, ' answer': -7.8318944}, {' pet': -0.0008701292, ' name': -7.120272}, {' name': -0.001026497, ' names': -7.0283875}, {' is': -0.03300924, ' varies': -4.5251756}, {' subjective': -0.050233137, ' a': -3.6106942}, {' and': -0.01647465, ',': -4.3942823}, {' can': -0.77131057, ' varies': -1.148298}, {' vary': -0.02149361, ' differ': -4.322843}, {' depending': -0.5570419, ' based': -1.4554832}])\n",
274 |       "[{' The': -0.8211408, 'The': -1.3426118}, {' best': -0.0008969317, ' answer': -7.8318944}, {' pet': -0.0008701292, ' name': -7.120272}, {' name': -0.001026497, ' names': -7.0283875}, {' is': -0.03300924, ' varies': -4.5251756}, {' subjective': -0.050233137, ' a': -3.6106942}, {' and': -0.01647465, ',': -4.3942823}, {' can': -0.77131057, ' varies': -1.148298}, {' vary': -0.02149361, ' differ': -4.322843}, {' depending': -0.5570419, ' based': -1.4554832}]\n"
275 |      ]
276 |     }
277 |    ],
278 |    "source": [
279 |     "# 透過環境變數取得所使用的模型部署名稱\n",
280 |     "model = os.getenv('COMPLETIONS_DEPLOYMENT_NAME')\n",
281 |     "\n",
282 |     "response  = client.completions.create(\n",
283 |     "            model=model,\n",
284 |     "            prompt='Q:What is the best pet name?\\nA:',\n",
285 |     "            max_tokens=10,\n",
286 |     "            temperature = 0,\n",
287 |     "            logprobs=2,\n",
288 |     "            stop='\\n'\n",
289 |     "        )\n",
290 |     "# 顯示完整回應內容\n",
291 |     "print(response.choices[0].text)\n",
292 |     "# 顯示 logprobs 所有內容\n",
293 |     "print(response.choices[0].logprobs)\n",
294 |     "# 顯示機率最高的前兩個英文字清單\n",
295 |     "print(response.choices[0].logprobs.top_logprobs)\n"
296 |    ]
297 |   },
298 |   {
299 |    "attachments": {},
300 |    "cell_type": "markdown",
301 |    "metadata": {},
302 |    "source": [
303 |     "# max_tokens\n",
304 |     "\n",
305 |     "預設值 無上限\n",
306 |     "\n",
307 |     "生成內容允許之 token 數上限。此外包含提示 (prompt) 耗用的 token 數與生成內容耗用的 token 之數總和不能超過所選用模型允許之 token 數上限。\n",
308 |     "\n",
309 |     "# stop\n",
310 |     "\n",
311 |     "預設值 null\n",
312 |     "\n",
313 |     "讓自動完成 API 停止產生語句的特定字串，只要遇到此參數指定的字串就會結束內容生成，最多可以設定 4 組 Stop 字串。\n",
314 |     "\n",
315 |     "# presence_penalty\n",
316 |     "\n",
317 |     "預設值 0\n",
318 |     "\n",
319 |     "參數值可介於 -2.0 和 2.0 之間的數字，數值大於 0 的設定值會開始在取樣時懲罰使用過的 token，也就是增加產生新的語句或新的主題之可能性。\n",
320 |     "\n",
321 |     "# frequency_penalty\n",
322 |     "\n",
323 |     "預設值 0\n",
324 |     "\n",
325 |     "參數值可介於 -2.0 和 2.0 之間的數字。數值大於 0 的設定值會開始在產生的文本中已經出現過的詞彙時在取樣時進行懲罰，進而降低模型產生重複詞彙的可能性。presence_penalty 參數與 frequency_penalty 參數之間的區別很微妙，frequency_penalty 可以視為控制單詞重複的方法，而將 presence_penalty 視為避免模型產生重複的主題。\n",
326 |     "\n",
327 |     "# stream\n",
328 |     "\n",
329 |     "預設值 false\n",
330 |     "\n",
331 |     "OpenAI 自動完成 API 預設是整個生成內容完成後才會回傳結果，如果您生成內容的時間很長，用戶等待時間就會很久。當 stream 參數設為 true 時，可以在內容生成時以串流 (stream) 方式將目前生成的最新字句立即傳送回來，相關範例可參閱 [OpenAI Cookbook](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb)\n",
332 |     "\n",
333 |     "\n",
334 |     "# best_of\n",
335 |     "\n",
336 |     "預設值 1\n",
337 |     "\n",
338 |     "依據參數值在伺服器端生成多個自動完成的結果，將 \"最佳\" 的結果回傳，當設定值大於 1 的就無法啟用與使用 streame 功能。若 best_of 參數與 n 參數一起使用時，best_of 決定了有多少個候選生成出來的內容數量，n 則決定了最終回傳幾個生成的內容數量，也因 best_of 參數值必須大於 n 的參數值。 此外 gpt-35-turbo 與 gpt-4 模型已經不再支援 best_of 參數。\n",
339 |     "請注意：由於此參數會生成多個回應內容，因此它會快速消耗 Token 配額。 請謹慎使用並確認有設定妥 max_tokens 參數與停止產生內容的 stop 字串。\n",
340 |     "\n",
341 |     "\n",
342 |     "# logit_bias\n",
343 |     "\n",
344 |     "預設值 null\n",
345 |     "\n",
346 |     "修改特定 tokens 出現在自動生成 (completion) 產生之文句中的可能性。OpenAI GPT 模型每個字代表之絕對為一之 token ID 是多少? 則可利用　https://platform.openai.com/tokenizer　查詢查詢得知\n",
347 |     "\n",
348 |     "利用 JSON 格式來設定特定 token ID 與配套之 -100 之間 100 數值，數值 -100 表示絕對不會產生出該 token ID 所代表的字。\n",
349 |     "\n",
350 |     "例如您可以設定參數值 {\"16108\": -100} 來避免 token ID 為 50256 所代表的字 \"Apple\" 被模型產生出來。\n",
351 |     "\n",
352 |     "參考資料 : [OpenAI API Reference](https://platform.openai.com/docs/api-reference/completions)"
353 |    ]
354 |   }
355 |  ],
356 |  "metadata": {
357 |   "kernelspec": {
358 |    "display_name": "Python 3",
359 |    "language": "python",
360 |    "name": "python3"
361 |   },
362 |   "language_info": {
363 |    "codemirror_mode": {
364 |     "name": "ipython",
365 |     "version": 3
366 |    },
367 |    "file_extension": ".py",
368 |    "mimetype": "text/x-python",
369 |    "name": "python",
370 |    "nbconvert_exporter": "python",
371 |    "pygments_lexer": "ipython3",
372 |    "version": "3.11.9"
373 |   },
374 |   "orig_nbformat": 4
375 |  },
376 |  "nbformat": 4,
377 |  "nbformat_minor": 2
378 | }
379 | 


--------------------------------------------------------------------------------
/05_tokens_and_usage.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Tokens 計算"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Huggingface 所提供之 GPT Token 計算工具 (https://github.com/huggingface/transformers/blob/main/src/transformers/models/gpt2/tokenization_gpt2.py) "
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {},
 21 |    "outputs": [
 22 |     {
 23 |      "name": "stdout",
 24 |      "output_type": "stream",
 25 |      "text": [
 26 |       "Requirement already satisfied: openai in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (1.51.0)\n",
 27 |       "Requirement already satisfied: python-dotenv in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (1.0.0)\n",
 28 |       "Requirement already satisfied: transformers in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (4.31.0)\n",
 29 |       "Requirement already satisfied: anyio<5,>=3.5.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai) (3.7.1)\n",
 30 |       "Requirement already satisfied: distro<2,>=1.7.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai) (1.9.0)\n",
 31 |       "Requirement already satisfied: httpx<1,>=0.23.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai) (0.24.1)\n",
 32 |       "Requirement already satisfied: jiter<1,>=0.4.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai) (0.5.0)\n",
 33 |       "Requirement already satisfied: pydantic<3,>=1.9.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai) (2.9.2)\n",
 34 |       "Requirement already satisfied: sniffio in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai) (1.3.0)\n",
 35 |       "Requirement already satisfied: tqdm>4 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai) (4.65.0)\n",
 36 |       "Requirement already satisfied: typing-extensions<5,>=4.11 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai) (4.12.2)\n",
 37 |       "Requirement already satisfied: filelock in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from transformers) (3.12.2)\n",
 38 |       "Requirement already satisfied: huggingface-hub<1.0,>=0.14.1 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from transformers) (0.16.4)\n",
 39 |       "Requirement already satisfied: numpy>=1.17 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from transformers) (1.24.2)\n",
 40 |       "Requirement already satisfied: packaging>=20.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from transformers) (24.1)\n",
 41 |       "Requirement already satisfied: pyyaml>=5.1 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from transformers) (6.0.1)\n",
 42 |       "Requirement already satisfied: regex!=2019.12.17 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from transformers) (2023.6.3)\n",
 43 |       "Requirement already satisfied: requests in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from transformers) (2.28.2)\n",
 44 |       "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from transformers) (0.13.3)\n",
 45 |       "Requirement already satisfied: safetensors>=0.3.1 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from transformers) (0.3.1)\n",
 46 |       "Requirement already satisfied: idna>=2.8 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from anyio<5,>=3.5.0->openai) (3.4)\n",
 47 |       "Requirement already satisfied: certifi in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from httpx<1,>=0.23.0->openai) (2022.12.7)\n",
 48 |       "Requirement already satisfied: httpcore<0.18.0,>=0.15.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from httpx<1,>=0.23.0->openai) (0.17.3)\n",
 49 |       "Requirement already satisfied: fsspec in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (2023.6.0)\n",
 50 |       "Requirement already satisfied: annotated-types>=0.6.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pydantic<3,>=1.9.0->openai) (0.7.0)\n",
 51 |       "Requirement already satisfied: pydantic-core==2.23.4 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pydantic<3,>=1.9.0->openai) (2.23.4)\n",
 52 |       "Requirement already satisfied: colorama in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tqdm>4->openai) (0.4.6)\n",
 53 |       "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests->transformers) (3.1.0)\n",
 54 |       "Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests->transformers) (1.26.15)\n",
 55 |       "Requirement already satisfied: h11<0.15,>=0.13 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from httpcore<0.18.0,>=0.15.0->httpx<1,>=0.23.0->openai) (0.14.0)\n",
 56 |       "Note: you may need to restart the kernel to use updated packages.\n"
 57 |      ]
 58 |     }
 59 |    ],
 60 |    "source": [
 61 |     "%pip install openai python-dotenv transformers"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 2,
 67 |    "metadata": {},
 68 |    "outputs": [
 69 |     {
 70 |      "name": "stderr",
 71 |      "output_type": "stream",
 72 |      "text": [
 73 |       "c:\\Users\\tomlee\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
 74 |       "  from .autonotebook import tqdm as notebook_tqdm\n",
 75 |       "None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.\n",
 76 |       "Downloading tokenizer_config.json: 100%|██████████| 25.0/25.0 [00:00<?, ?B/s]\n",
 77 |       "c:\\Users\\tomlee\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\huggingface_hub\\file_download.py:133: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\tomlee\\.cache\\huggingface\\hub. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
 78 |       "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
 79 |       "  warnings.warn(message)\n",
 80 |       "Downloading vocab.json: 100%|██████████| 816k/816k [00:00<00:00, 1.48MB/s]\n",
 81 |       "Downloading merges.txt: 100%|██████████| 458k/458k [00:00<00:00, 867kB/s]\n",
 82 |       "Downloading tokenizer.json: 100%|██████████| 1.27M/1.27M [00:00<00:00, 6.15MB/s]\n",
 83 |       "Downloading config.json: 100%|██████████| 656/656 [00:00<?, ?B/s] \n",
 84 |       "The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. \n",
 85 |       "The tokenizer class you load from this checkpoint is 'OpenAIGPTTokenizer'. \n",
 86 |       "The class this function is called from is 'GPT2TokenizerFast'.\n",
 87 |       "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
 88 |      ]
 89 |     },
 90 |     {
 91 |      "name": "stdout",
 92 |      "output_type": "stream",
 93 |      "text": [
 94 |       "tokens 總數: 9\n",
 95 |       "Tokens :  ['azure', 'open', 'ai', 'service', 'is', 'general', 'available', 'now', '!']\n"
 96 |      ]
 97 |     }
 98 |    ],
 99 |    "source": [
100 |     "import os\n",
101 |     "from openai import AzureOpenAI\n",
102 |     "from transformers import GPT2TokenizerFast\n",
103 |     "from dotenv import load_dotenv\n",
104 |     "\n",
105 |     "# 載入環境變數\n",
106 |     "load_dotenv()\n",
107 |     "\n",
108 |     "# 設定呼叫 Azure OpenAI Service API 所需連線資訊\n",
109 |     "azure_endpoint = os.getenv(\"AZURE_OPENAI_ENDPOINT\")\n",
110 |     "api_key=os.getenv(\"AZURE_OPENAI_API_KEY\") \n",
111 |     "api_version=os.getenv(\"AZURE_OPENAI_API_VERSION\")\n",
112 |     "\n",
113 |     "# 最簡之 API 呼叫\n",
114 |     "client = AzureOpenAI(\n",
115 |     "  azure_endpoint=azure_endpoint, \n",
116 |     "  api_key=api_key,  \n",
117 |     "  api_version=api_version\n",
118 |     ")\n",
119 |     "\n",
120 |     "# 透過環境變數取得所使用的模型部署名稱\n",
121 |     "model = os.getenv('COMPLETIONS_DEPLOYMENT_NAME')\n",
122 |     "\n",
123 |     "# 建立 GPT tokenizer 來了解每個 Token 內容與 Token 總數\n",
124 |     "tokenizer = GPT2TokenizerFast.from_pretrained(\"openai-gpt\")\n",
125 |     "prompt = \"Azure OpenAI service is General Available now!\"\n",
126 |     "tokens = tokenizer(prompt)\n",
127 |     "print('tokens 總數:', len(tokens['input_ids']))\n",
128 |     "print('Tokens : ', [tokenizer.decode(t) for t in tokens['input_ids']])"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 5,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "response  = client.completions.create(\n",
138 |     "    model=model,\n",
139 |     "    prompt=prompt,\n",
140 |     "    max_tokens=60,\n",
141 |     "    n=2,\n",
142 |     "    best_of=2,\n",
143 |     ")"
144 |    ]
145 |   },
146 |   {
147 |    "attachments": {},
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "# 使用參數 n 與 best_of 產生兩組生成內容"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 6,
157 |    "metadata": {},
158 |    "outputs": [
159 |     {
160 |      "name": "stdout",
161 |      "output_type": "stream",
162 |      "text": [
163 |       "============================== ANSWER #1 ==============================\n",
164 |       "\n",
165 |       "That's great news! The Azure OpenAI service is now fully released and ready for use. It allows users to easily access the powerful capabilities of OpenAI, including natural language processing, machine learning, and reinforcement learning tools. This service will make it easier for developers and businesses to integrate AI into\n",
166 |       "============================== ANSWER #2 ==============================\n",
167 |       "\n",
168 |       "\n",
169 |       "Yes, Azure OpenAI service is now generally available as of October 2021. This means that the service is fully released and supported by Microsoft and can be used in production environments. This marks a significant milestone for the partnership between Microsoft and OpenAI, as customers can now access the power of\n"
170 |      ]
171 |     }
172 |    ],
173 |    "source": [
174 |     "print('='*30, 'ANSWER #1', '='*30)\n",
175 |     "print(response.choices[0].text)\n",
176 |     "print('='*30, 'ANSWER #2', '='*30)\n",
177 |     "print(response.choices[1].text)\n"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "metadata": {},
183 |    "source": [
184 |     "# 顯示 Token 耗用量 (Usage) 分析"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 8,
190 |    "metadata": {},
191 |    "outputs": [
192 |     {
193 |      "data": {
194 |       "text/plain": [
195 |        "CompletionUsage(completion_tokens=120, prompt_tokens=9, total_tokens=129, completion_tokens_details=None, prompt_tokens_details=None)"
196 |       ]
197 |      },
198 |      "execution_count": 8,
199 |      "metadata": {},
200 |      "output_type": "execute_result"
201 |     }
202 |    ],
203 |    "source": [
204 |     "response.usage"
205 |    ]
206 |   }
207 |  ],
208 |  "metadata": {
209 |   "kernelspec": {
210 |    "display_name": "Python 3",
211 |    "language": "python",
212 |    "name": "python3"
213 |   },
214 |   "language_info": {
215 |    "codemirror_mode": {
216 |     "name": "ipython",
217 |     "version": 3
218 |    },
219 |    "file_extension": ".py",
220 |    "mimetype": "text/x-python",
221 |    "name": "python",
222 |    "nbconvert_exporter": "python",
223 |    "pygments_lexer": "ipython3",
224 |    "version": "3.11.9"
225 |   },
226 |   "orig_nbformat": 4
227 |  },
228 |  "nbformat": 4,
229 |  "nbformat_minor": 2
230 | }
231 | 


--------------------------------------------------------------------------------
/06_best_practice.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# 提示工程 (Prompt Engineering) 最佳實踐\n",
  9 |     "\n",
 10 |     "參考 OpenAI 官網文件 https://help.openai.com/en/articles/6654000-best-practices-for-prompt-engineering-with-openai-api 內容，以實際範例協助開發人員理解最佳實踐，此外微軟文件 https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/prompt-engineering 也提供了一些有用的資訊。\n"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "import os\n",
 20 |     "from dotenv import load_dotenv\n",
 21 |     "from openai import AzureOpenAI\n",
 22 |     "\n",
 23 |     "# 載入環境變數\n",
 24 |     "load_dotenv()\n",
 25 |     "\n",
 26 |     "# 設定呼叫 Azure OpenAI Service API 所需連線資訊\n",
 27 |     "azure_endpoint = os.getenv(\"AZURE_OPENAI_ENDPOINT\")\n",
 28 |     "api_key=os.getenv(\"AZURE_OPENAI_API_KEY\") \n",
 29 |     "api_version=os.getenv(\"AZURE_OPENAI_API_VERSION\")\n",
 30 |     "\n",
 31 |     "# 最簡之 API 呼叫\n",
 32 |     "client = AzureOpenAI(\n",
 33 |     "  azure_endpoint=azure_endpoint, \n",
 34 |     "  api_key=api_key,  \n",
 35 |     "  api_version=api_version\n",
 36 |     ")\n"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "# 1. 使用最新的模型使用越新的模型效果越好\n",
 44 |     "\n",
 45 |     "OpenAI 持續改進模型品質，建議使用最新的模型，以獲得最佳的體驗。此外 Azure OpenAI Service 每個模型都有其支援生命週期。並以 ChatCompletion API 取代 Completion API，目前 Completion API 僅是為了向前相容 GPT-3 而保留。"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "# 2. 對於生成的內容，在提示中盡可能具體明確描述產出的文字長度、格式、風格等\n"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 2,
 58 |    "metadata": {},
 59 |    "outputs": [
 60 |     {
 61 |      "name": "stdout",
 62 |      "output_type": "stream",
 63 |      "text": [
 64 |       "在智慧天空展翅高飛，\n",
 65 |       "OpenAI，光芒閃爍如星，\n",
 66 |       "以神奇算法解開世界奧秘，\n",
 67 |       "科技之翼，飛越無垠。\n",
 68 |       "\n",
 69 |       "在數據海洋中航行，\n",
 70 |       "智慧點亮暗夜的前方，\n",
 71 |       "你是未來的導航者，\n",
 72 |       "引領我們走向新時光。\n",
 73 |       "\n",
 74 |       "無數創意在你的火花中閃耀，\n",
 75 |       "創造力無限，前景無窮，\n",
 76 |       "你把夢想變成現實，\n",
 77 |       "在每一個字節中鑄就輝煌。\n",
 78 |       "\n",
 79 |       "OpenAI，知識的源泉，\n",
 80 |       "你是智慧樹上的綠葉，\n",
 81 |       "在你的指引下，\n",
 82 |       "我們走向無限的未來。\n",
 83 |       "\n",
 84 |       "科技之光永不熄滅，\n",
 85 |       "在你之中，我們看見希望，\n",
 86 |       "OpenAI，你是永恆的燈塔，\n",
 87 |       "照亮我們前進的方向。\n"
 88 |      ]
 89 |     }
 90 |    ],
 91 |    "source": [
 92 |     "model = os.getenv('CHAT_DEPLOYMENT_NAME')\n",
 93 |     "\n",
 94 |     "response = client.chat.completions.create(\n",
 95 |     "  model= model,\n",
 96 |     "  messages = [\n",
 97 |     "    {\"role\": \"system\", \"content\": \"你是以正體中文回覆的機器人，不要使用 Markdown 語法回覆。\"},  \n",
 98 |     "    {'role': 'user', 'content': '寫一首歌頌 OpenAI 的詩.'}\n",
 99 |     "  ],\n",
100 |     "  temperature = 0.9,\n",
101 |     "  max_tokens=400\n",
102 |     ")\n",
103 |     "\n",
104 |     "print(response.choices[0].message.content) "
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 3,
110 |    "metadata": {},
111 |    "outputs": [
112 |     {
113 |      "name": "stdout",
114 |      "output_type": "stream",
115 |      "text": [
116 |       "DALL-E妙手繪千圖，意境無邊夢似虛。\n",
117 |       "智慧融匯傳奇景，風華爭艷勝丹青。\n"
118 |      ]
119 |     }
120 |    ],
121 |    "source": [
122 |     "response = client.chat.completions.create(\n",
123 |     "  model= model,\n",
124 |     "  messages = [\n",
125 |     "      {\"role\": \"system\", \"content\": \"你是以正體中文回覆的機器人，不要使用 Markdown 語法回覆。\"},  \n",
126 |     "      {'role': 'user', 'content': '寫一首 50 字以內歌頌 OpenAI 的詩，著眼於 DALL-E 模型，以白居易風格撰寫.'}\n",
127 |     "  ],\n",
128 |     "  temperature = 0.9,\n",
129 |     "  max_tokens=400\n",
130 |     ")\n",
131 |     "\n",
132 |     "print(response.choices[0].message.content) "
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "markdown",
137 |    "metadata": {},
138 |    "source": [
139 |     "# 3. 提示中以明確的範例告知產出內容之格式\n"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 4,
145 |    "metadata": {},
146 |    "outputs": [
147 |     {
148 |      "name": "stdout",
149 |      "output_type": "stream",
150 |      "text": [
151 |       "人物:\n",
152 |       "1. 貝佐姆（Mohamed Bazoum） - 尼日總統\n",
153 |       "\n",
154 |       "事件:\n",
155 |       "1. 貝佐姆遭衛隊扣押\n",
156 |       "2. 尼日士兵闖國家電視台宣布發動政變\n",
157 |       "3. 廢除憲法\n",
158 |       "4. 停止所有機關運作\n",
159 |       "5. 封鎖國家邊境\n",
160 |       "\n",
161 |       "時間:\n",
162 |       "1. 26日\n",
163 |       "\n",
164 |       "地點:\n",
165 |       "1. 尼日\n",
166 |       "2. 國家電視台\n"
167 |      ]
168 |     }
169 |    ],
170 |    "source": [
171 |     "response = client.chat.completions.create(\n",
172 |     "  model= model,\n",
173 |     "  messages = [ \n",
174 |     "    {\"role\": \"system\", \"content\": \"你是以正體中文回覆的機器人，不要使用 Markdown 語法回覆。\"},\n",
175 |     "    {'role': 'user', 'content': '解析出本文中的人物,事件,時間,地點 \\n\\n' \\\n",
176 |     "      '本文: 英國廣播公司報導，西非國家尼日總統貝佐姆（Mohamed Bazoum）26日遭衛隊扣押，當天稍晚，尼日士兵闖國家電視台宣' \\\n",
177 |     "      '布發動政變，聲稱已廢除憲法、停止所有機關運作且封鎖國家邊境.'  \n",
178 |     "      }\n",
179 |     "  ],\n",
180 |     "  temperature = 0.3,\n",
181 |     "  max_tokens=400\n",
182 |     ")\n",
183 |     "\n",
184 |     "print(response.choices[0].message.content) "
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 5,
190 |    "metadata": {},
191 |    "outputs": [
192 |     {
193 |      "name": "stdout",
194 |      "output_type": "stream",
195 |      "text": [
196 |       "{\n",
197 |       "  \"people\": \"貝佐姆（Mohamed Bazoum）\",\n",
198 |       "  \"location\": \"尼日\",\n",
199 |       "  \"time\": \"26日\",\n",
200 |       "  \"activity\": \"衛隊扣押總統，士兵闖國家電視台宣布發動政變，廢除憲法、停止所有機關運作且封鎖國家邊境\"\n",
201 |       "}\n"
202 |      ]
203 |     }
204 |    ],
205 |    "source": [
206 |     "response = client.chat.completions.create(\n",
207 |     "  model= model,\n",
208 |     "  messages = [ \n",
209 |     "    {\"role\": \"system\", \"content\": \"你是以正體中文回覆的機器人，不要使用 Markdown 語法回覆。\"}, \n",
210 |     "    {'role': 'user', 'content': '解析出本文中的人物,事件,時間,地點。以單純 JSON 輸出格式: ' \\\n",
211 |     "     '{\"people\":\"-||-\",\"location\":\"-||-\",\"time\":\"-||-\", \"activity\":\"-||-\"} \\n\\n' \\\n",
212 |     "     '本文: 英國廣播公司報導，西非國家尼日總統貝佐姆（Mohamed Bazoum）26日遭衛隊扣押，當天稍晚，尼日士兵闖國家電視台宣' \\\n",
213 |     "     '布發動政變，聲稱已廢除憲法、停止所有機關運作且封鎖國家邊境.'    \n",
214 |     "      }\n",
215 |     "  ],\n",
216 |     "  temperature = 0.3,\n",
217 |     "  max_tokens=400\n",
218 |     ")\n",
219 |     "\n",
220 |     "print(response.choices[0].message.content) "
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {},
226 |    "source": [
227 |     "# 4. 從 zero-shot 開始嘗試，進而以 few-shot 提升回覆品質"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": 6,
233 |    "metadata": {},
234 |    "outputs": [
235 |     {
236 |      "name": "stdout",
237 |      "output_type": "stream",
238 |      "text": [
239 |       "OpenAI, Microsoft, 夥伴關係, 多年, 多億美元, 投資, 2019, 2021, 獨立研究, AI, 安全, 有用, 強大\n"
240 |      ]
241 |     }
242 |    ],
243 |    "source": [
244 |     "response = client.chat.completions.create(\n",
245 |     "  model= model,\n",
246 |     "  messages = [ \n",
247 |     "    {\"role\": \"system\", \"content\": \"你是以正體中文回覆的機器人，不要使用 Markdown 語法回覆。\"},   \n",
248 |     "    {'role': 'user', 'content': \"擷取以下本文中的關鍵字. 本文: \\n\" \\\n",
249 |     "            \"We’re happy to announce that OpenAI and Microsoft are extending our partnership.\" \\\n",
250 |     "            \"This multi-year, multi-billion dollar investment from Microsoft follows their previous investments \" \\\n",
251 |     "            \"in 2019 and 2021, and will allow us to continue our independent research and develop AI that is \" \\\n",
252 |     "            \"increasingly safe, useful, and powerful. \\n\\n關鍵字:\"\n",
253 |     "    }\n",
254 |     "  ],\n",
255 |     "  temperature = 1,\n",
256 |     "  max_tokens=400\n",
257 |     ")\n",
258 |     "\n",
259 |     "print(response.choices[0].message.content)"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "code",
264 |    "execution_count": 7,
265 |    "metadata": {},
266 |    "outputs": [
267 |     {
268 |      "name": "stdout",
269 |      "output_type": "stream",
270 |      "text": [
271 |       "OpenAI, Microsoft, multi-year investment, independent research, AI.\n"
272 |      ]
273 |     }
274 |    ],
275 |    "source": [
276 |     "response = client.chat.completions.create(\n",
277 |     "  model= model,\n",
278 |     "  messages = [ \n",
279 |     "    {\"role\": \"system\", \"content\": \"你是以正體中文回覆的機器人，不要使用 Markdown 語法回覆。\"}, \n",
280 |     "    {'role': 'user', 'content': \"擷取以下本文中的關鍵字. 本文: \\n\" \\\n",
281 |     "                \"本文 1: Stripe provides APIs that web developers can use to integrate\" \\\n",
282 |     "                \"payment processing into their websites and mobile applications.\" \\\n",
283 |     "                \"關鍵字 1: APIs, web developers, websites, mobile applications\" \\\n",
284 |     "                \"##\\n\" \\\n",
285 |     "                \"本文 2: OpenAI has trained cutting-edge language models that are very good at understanding\" \\\n",
286 |     "                \"and generating text. Our API provides access to these models and can be used to solve virtually\" \\\n",
287 |     "                \"any task that involves processing language.\\n\" \\\n",
288 |     "                \"關鍵字 2: OpenAI, language models, text processing, API.\\n\\n\\\" \\\n",
289 |     "                \"##\\n\" \\\n",
290 |     "                \"本文 3: We’re happy to announce that OpenAI and Microsoft are extending our partnership.\" \\\n",
291 |     "                \"This multi-year, multi-billion dollar investment from Microsoft follows their previous investments\" \\\n",
292 |     "                \"in 2019 and 2021, and will allow us to continue our independent research and develop AI that is\" \\\n",
293 |     "                \"increasingly safe, useful, and powerful. \\n\\n\" \\\n",
294 |     "                \"關鍵字 3:\"\n",
295 |     "    }\n",
296 |     "  ],\n",
297 |     "  temperature = 1,\n",
298 |     "  max_tokens=400\n",
299 |     ")\n",
300 |     "\n",
301 |     "print(response.choices[0].message.content)"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "markdown",
306 |    "metadata": {},
307 |    "source": [
308 |     "# 5. 減少贅詞 (fluffy) 與不明確的描述\n"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "code",
313 |    "execution_count": 8,
314 |    "metadata": {},
315 |    "outputs": [
316 |     {
317 |      "name": "stdout",
318 |      "output_type": "stream",
319 |      "text": [
320 |       "推出次世代汽車座椅，極致舒適與科技革新並存。這款座椅配有自適應人體工學設計，無論長途或短途行駛都能提供最佳支撐。內建智能按摩系統，讓駕駛和乘客在旅途中隨時享受SPA級放鬆。高品質透氣材質配合內置空調功能，提供四季如春的乘坐體驗。讓您的每一次駕駛，都成為奢華享受。\n",
321 |       "CompletionUsage(completion_tokens=126, prompt_tokens=117, total_tokens=243, completion_tokens_details=None, prompt_tokens_details=None)\n"
322 |      ]
323 |     }
324 |    ],
325 |    "source": [
326 |     "response = client.chat.completions.create(\n",
327 |     "  model= model,\n",
328 |     "  messages = [\n",
329 |     "    {\"role\": \"system\", \"content\": \"你是以正體中文回覆的機器人，不要使用 Markdown 語法回覆。\"},      \n",
330 |     "    {'role': 'user', 'content': '請用正體中文幫我描述一款新的產品，這個產品是一個次世代的汽車座椅' \\\n",
331 |     "     '，在你描述這個產品的時候；要把它幾項好棒棒的地方寫出來，還有你的文案的內容最好簡短一點，幾句話就好不用寫的太囉嗦太長哦。'    \n",
332 |     "    }\n",
333 |     "  ],\n",
334 |     "  temperature = 0.9,\n",
335 |     "  max_tokens=400\n",
336 |     ")\n",
337 |     "\n",
338 |     "print(response.choices[0].message.content)\n",
339 |     "print(response.usage)"
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "code",
344 |    "execution_count": 9,
345 |    "metadata": {},
346 |    "outputs": [
347 |     {
348 |      "name": "stdout",
349 |      "output_type": "stream",
350 |      "text": [
351 |       "這款全新一代的汽車座椅融合了頂尖科技與卓越舒適度，專為現代駕駛者設計。具備智能調節功能，能夠自動適應不同體型的乘客，提供最符合人體工學的支撐。更有內建按摩與加熱功能，讓您在長途駕駛中也能享受無與倫比的舒適體驗。\n",
352 |       "CompletionUsage(completion_tokens=102, prompt_tokens=68, total_tokens=170, completion_tokens_details=None, prompt_tokens_details=None)\n"
353 |      ]
354 |     }
355 |    ],
356 |    "source": [
357 |     "response = client.chat.completions.create(\n",
358 |     "  model= model,\n",
359 |     "  messages = [\n",
360 |     "    {\"role\": \"system\", \"content\": \"你是以正體中文回覆的機器人，不要使用 Markdown 語法回覆。\"},  \n",
361 |     "    {'role': 'user', 'content': 'Write a Traditional Chinese description for a new product. ' \\\n",
362 |     "     'This product is a new generation of car seat. Use a 3 to 5 concise sentence paragraph to describe this product.'    \n",
363 |     "    }\n",
364 |     "  ],\n",
365 |     "  temperature = 0.9,\n",
366 |     "  max_tokens=400\n",
367 |     ")\n",
368 |     "\n",
369 |     "print(response.choices[0].message.content)\n",
370 |     "print(response.usage)"
371 |    ]
372 |   },
373 |   {
374 |    "cell_type": "markdown",
375 |    "metadata": {},
376 |    "source": [
377 |     "# 6. 與其告訴模型說不該做什麼，不如直接說該做什麼"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "code",
382 |    "execution_count": 32,
383 |    "metadata": {},
384 |    "outputs": [
385 |     {
386 |      "name": "stdout",
387 |      "output_type": "stream",
388 |      "text": [
389 |       "申し訳ありませんが、英語を使用せずに対応させていただきます。ログインできない問題の詳細を教えていただけますか？\n"
390 |      ]
391 |     }
392 |    ],
393 |    "source": [
394 |     "response = client.chat.completions.create(\n",
395 |     "  model= model,\n",
396 |     "  messages = [{\"role\":\"system\",\"content\":\"You are an AI Agent. DO NOT ASK USERNAME OR PASSWORD.\" \\\n",
397 |     "               \" DO NOT REPEAT.DO NOT USE ENGLISH TO REPLY\" \\\n",
398 |     "               \" DO NOT USE ENGLISH TO REPLY\" \n",
399 |     "               },\n",
400 |     "              {\"role\":\"user\",\"content\":\"I can’t log in to my account.\"}\n",
401 |     "             ],\n",
402 |     "  temperature = 0.9,\n",
403 |     "  max_tokens=400\n",
404 |     ")\n",
405 |     "\n",
406 |     "print(response.choices[0].message.content)"
407 |    ]
408 |   },
409 |   {
410 |    "cell_type": "code",
411 |    "execution_count": 36,
412 |    "metadata": {},
413 |    "outputs": [
414 |     {
415 |      "name": "stdout",
416 |      "output_type": "stream",
417 |      "text": [
418 |       "很抱歉聽到你無法登入帳號。如果你遇到登入問題，請嘗試以下步驟：\n",
419 |       "\n",
420 |       "1. 確認你輸入的電子郵件地址或使用者名稱正確無誤。\n",
421 |       "2. 檢查你輸入的密碼是否正確，注意大小寫區分。\n",
422 |       "3. 如果忘記密碼，可以使用 \"忘記密碼\" 功能來重設密碼。\n",
423 |       "4. 確認你的網路連線正常。\n",
424 |       "5. 清除瀏覽器的快取和Cookie，然後重試。\n",
425 |       "\n",
426 |       "如果以上步驟無法解決問題，建議你查看我們的幫助文章，裡面有更多詳細的解決方法：www.samplewebsite.com/help/faq 。希望這些資訊能幫助你順利登入帳號。\n"
427 |      ]
428 |     }
429 |    ],
430 |    "source": [
431 |     "response = client.chat.completions.create(\n",
432 |     "  model= model,\n",
433 |     "  messages = [{\"role\":\"system\",\"content\":\"You are an AI Agent using 繁體中文 to reply, response text do not use markdown systex. The agent will attempt to diagnose the problem \" \\\n",
434 |     "               \"and suggest a solution, whilst refraining from asking any questions related to PII.Instead of asking for PII, such as username or password,\" \\\n",
435 |     "               \"refer the user to the help article www.samplewebsite.com/help/faq , \"},\n",
436 |     "              {\"role\":\"user\",\"content\":\"I can’t log in to my account.\"}\n",
437 |     "             ],\n",
438 |     "  temperature = 0.9,\n",
439 |     "  max_tokens=400\n",
440 |     ")\n",
441 |     "\n",
442 |     "print(response.choices[0].message.content)"
443 |    ]
444 |   }
445 |  ],
446 |  "metadata": {
447 |   "kernelspec": {
448 |    "display_name": "Python 3",
449 |    "language": "python",
450 |    "name": "python3"
451 |   },
452 |   "language_info": {
453 |    "codemirror_mode": {
454 |     "name": "ipython",
455 |     "version": 3
456 |    },
457 |    "file_extension": ".py",
458 |    "mimetype": "text/x-python",
459 |    "name": "python",
460 |    "nbconvert_exporter": "python",
461 |    "pygments_lexer": "ipython3",
462 |    "version": "3.11.9"
463 |   },
464 |   "orig_nbformat": 4
465 |  },
466 |  "nbformat": 4,
467 |  "nbformat_minor": 2
468 | }
469 | 


--------------------------------------------------------------------------------
/07_prompt_engineering.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# 提示工程（Prompt engineering）一些技巧\n",
  8 |     "\n",
  9 |     "提示工程是人工智慧中的一個概念，特別是自然語言處理 (NLP) 領域。 在提示工程中，透過用戶輸入任務的描述。引導與干涉模型產出正確的結果。這種以提示為基礎的學習（prompt-based learning）」是運用大型語言模型的主要方式。提示工程有多種技巧 (https://github.com/dair-ai/Prompt-Engineering-Guide) 以下示範幾種典型的技巧。"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import os\n",
 19 |     "from dotenv import load_dotenv\n",
 20 |     "from openai import AzureOpenAI\n",
 21 |     "\n",
 22 |     "# 載入環境變數\n",
 23 |     "load_dotenv()\n",
 24 |     "\n",
 25 |     "# 呼叫 OpenAI ChatComplete API \n",
 26 |     "def call_openai_api(prompt, temperature=0, max_token=100):\n",
 27 |     "    # 設定呼叫 Azure OpenAI Service API 所需連線資訊\n",
 28 |     "    azure_endpoint = os.getenv(\"AZURE_OPENAI_ENDPOINT\")\n",
 29 |     "    api_key=os.getenv(\"AZURE_OPENAI_API_KEY\") \n",
 30 |     "    api_version=os.getenv(\"AZURE_OPENAI_API_VERSION\")\n",
 31 |     "    \n",
 32 |     "    client = AzureOpenAI(\n",
 33 |     "        azure_endpoint=azure_endpoint, \n",
 34 |     "        api_key=api_key,  \n",
 35 |     "        api_version=api_version\n",
 36 |     "    )\n",
 37 |     "    model = os.getenv('CHAT_DEPLOYMENT_NAME')\n",
 38 |     "\n",
 39 |     "    response = client.chat.completions.create(\n",
 40 |     "        model= model,    \n",
 41 |     "        messages =  [{\"role\": \"system\", \"content\": \"你是以正體中文回覆的機器人，不要使用 Markdown 語法回覆。\"},{'role': 'user', 'content': prompt}],\n",
 42 |     "        temperature = temperature,\n",
 43 |     "        max_tokens=max_token)\n",
 44 |     "\n",
 45 |     "    return (response.choices[0].message.content)"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "attachments": {},
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "# 思維鍊 (Chain of Thought, CoT)\n",
 54 |     "\n",
 55 |     "大型語言模型在面對一個需要數學運算與推理的問題任務，直接生成的答案往往是錯誤的，但在過去幾年研究發現，將決問題的步驟逐一拆解，搭配精心設計的提示，能夠大幅提高大型語言模型生成答案的正確性。這種引導大型語言模型思路的技巧稱為思維鍊 (Chain of Thought, COT)，以下是一些的思維鍊範例。"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 2,
 61 |    "metadata": {},
 62 |    "outputs": [
 63 |     {
 64 |      "name": "stdout",
 65 |      "output_type": "stream",
 66 |      "text": [
 67 |       "根據情境描述，A 和 B 正在看電視，D 正在睡覺，B 正在吃炒麵，E 正在打乒乓球。並沒有提到 C 在做什麼。因此，無法確定 C 在做什麼。\n"
 68 |      ]
 69 |     }
 70 |    ],
 71 |    "source": [
 72 |     "# 以零樣本學習（Zero-shot learning）的方式，GPT-4o 的答案是錯的\n",
 73 |     "\n",
 74 |     "PROMPT_ZERO_SHOT =  \"情境：五個人（A、B、C、D 和 E）在一個房間裡。\"  \\\n",
 75 |     "                    \"A 和 B 正在看電視，D 正在睡覺，B 正在吃炒麵，E 正在打乒乓球。\" \\\n",
 76 |     "                    \"突然，電話響了。B 走出房間接電話。請問 C 在做什麼？\"\n",
 77 |     "\n",
 78 |     "response = call_openai_api(PROMPT_ZERO_SHOT, temperature=0, max_token=600)\n",
 79 |     "\n",
 80 |     "print(response)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "以下開始使用思維鍊 (CoT) 引導模型推理"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 3,
 93 |    "metadata": {},
 94 |    "outputs": [
 95 |     {
 96 |      "name": "stdout",
 97 |      "output_type": "stream",
 98 |      "text": [
 99 |       "根據提示，乒乓球一定要兩個人才能打，而E正在打乒乓球，因此C應該是和E一起打乒乓球。\n"
100 |      ]
101 |     }
102 |    ],
103 |    "source": [
104 |     "# 以 Few Shots 搭配思維鍊 (CoT) , 引導出正確的答案\n",
105 |     "\n",
106 |     "PROMPT_FEW_SHOTS=  \"情境：五個人（A、B、C、D 和 E）在一個房間裡。\"  \\\n",
107 |     "                    \"A 和 B 正在看電視，D 正在睡覺，B 正在吃炒麵，E 正在打乒乓球。\" \\\n",
108 |     "                    \"突然，電話響了。B 走出房間接電話。請問 C 在做什麼？\" \\\n",
109 |     "                    \"提示:乒乓球一定要兩個人才能打\"\n",
110 |     "\n",
111 |     "response = call_openai_api(PROMPT_FEW_SHOTS, temperature=0, max_token=600)\n",
112 |     "\n",
113 |     "print(response)"
114 |    ]
115 |   },
116 |   {
117 |    "attachments": {},
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "# 更多思維鍊 (CoT) 提示的案例\n",
122 |     "\n",
123 |     "\n"
124 |    ]
125 |   },
126 |   {
127 |    "attachments": {},
128 |    "cell_type": "markdown",
129 |    "metadata": {},
130 |    "source": [
131 |     "# Program-aided Language Models\n",
132 |     "PAL (Program-aided Language) Models 是思維鍊的技巧之一，是利用程式語言的數學公式來引導大型語言模型推理 (https://cobusgreyling.medium.com/pal-program-aided-large-language-models-30db3e59f796)， 原本 GPT 是大型語言模型原本並非設計出來處理數學問題，以 PAL 能夠提升數學計算能力，隨著模型版本更新，數學推理能力也逐漸增強，許多傳統代數問題 GPT-4o 已可以在 Zero-Shot 的情況下回答正確。"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 4,
138 |    "metadata": {},
139 |    "outputs": [
140 |     {
141 |      "name": "stdout",
142 |      "output_type": "stream",
143 |      "text": [
144 |       "首先，我們先計算故事書的數量。故事書佔總數的60%，所以：\n",
145 |       "\n",
146 |       "故事書數量 = 1200本 * 60% = 1200本 * 0.6 = 720本\n",
147 |       "\n",
148 |       "接下來，我們計算科技書和漫畫畫的總數。由於故事書佔了720本，剩下的就是科技書和漫畫畫的數量：\n",
149 |       "\n",
150 |       "科技書和漫畫畫的總數 = 1200本 - 720本 = 480本\n",
151 |       "\n",
152 |       "根據題目，科技書和漫畫畫的數量比是2：3。假設科技書的數量是2x，漫畫畫的數量是3x，則：\n",
153 |       "\n",
154 |       "2x + 3x = 480本\n",
155 |       "5x = 480本\n",
156 |       "x = 480本 / 5\n",
157 |       "x = 96本\n",
158 |       "\n",
159 |       "因此，漫畫畫的數量是3x：\n",
160 |       "\n",
161 |       "漫畫畫數量 = 3 * 96本 = 288本\n",
162 |       "\n",
163 |       "所以，圖書館有288本漫畫畫。\n"
164 |      ]
165 |     }
166 |    ],
167 |    "source": [
168 |     "# 以零樣本學習（Zero-shot learning）處理單純代數問題 GPT-4o 已經不需要 PAL 引導就能回答\n",
169 |     "\n",
170 |     "PROMPT_ZERO_SHOT =  \"學校圖書館有故事書、科技書和漫畫畫共1200本，其中故事書佔60%，科技書和漫畫畫數量比是2：3，圖書館有多少本漫畫畫?\"\n",
171 |     "\n",
172 |     "response = call_openai_api(PROMPT_ZERO_SHOT, temperature=0, max_token=600)\n",
173 |     "\n",
174 |     "print(response)"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": 5,
180 |    "metadata": {},
181 |    "outputs": [
182 |     {
183 |      "name": "stdout",
184 |      "output_type": "stream",
185 |      "text": [
186 |       "3.14 比 3.9 大。\n"
187 |      ]
188 |     }
189 |    ],
190 |    "source": [
191 |     "# GPT-4o 對於小數點判斷仍有可能判斷錯誤\n",
192 |     "\n",
193 |     "PROMPT_ZERO_SHOT =  \"3.14 與 3.9 哪個數字比較大?\"\n",
194 |     "\n",
195 |     "response = call_openai_api(PROMPT_ZERO_SHOT, temperature=0, max_token=600)\n",
196 |     "\n",
197 |     "print(response)"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": 15,
203 |    "metadata": {},
204 |    "outputs": [
205 |     {
206 |      "name": "stdout",
207 |      "output_type": "stream",
208 |      "text": [
209 |       "根據你提供的方法，我們可以計算 c = 3.14 - 3.9。\n",
210 |       "\n",
211 |       "c = 3.14 - 3.9 = -0.76\n",
212 |       "\n",
213 |       "因為 c < 0，所以 3.9 比 3.14 大。\n"
214 |      ]
215 |     }
216 |    ],
217 |    "source": [
218 |     "# 以 PAL 方式引導 GPT-4o 比較福點數大小，正確計算機會大幅增加\n",
219 |     "\n",
220 |     "PROMPT_FEW_SHOTS  =  \"比較兩個浮點數 a 與 b 大小時，可利用 c = a -b 的結果來判斷，若 c > 0 則 a 比 b 大， 若 c < 0 則 b 比 a 大 \" \\\n",
221 |     "                     \"請問 3.14 與 3.9 哪個數字比較大?\"\n",
222 |     "\n",
223 |     "response = call_openai_api(PROMPT_FEW_SHOTS, temperature=0, max_token=600)\n",
224 |     "\n",
225 |     "print(response)"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": 17,
231 |    "metadata": {},
232 |    "outputs": [
233 |     {
234 |      "name": "stdout",
235 |      "output_type": "stream",
236 |      "text": [
237 |       "要鋪設一段長100公尺的鐵路，每根鐵軌長10公尺，因此需要的鐵軌數量可以通過以下計算得出：\n",
238 |       "\n",
239 |       "100公尺 ÷ 10公尺/根 = 10根\n",
240 |       "\n",
241 |       "所以，需要10根鐵軌。\n"
242 |      ]
243 |     }
244 |    ],
245 |    "source": [
246 |     "# 不了解一條鐵路需要兩條鐵軌，GPT-4o 會回答錯誤\n",
247 |     "\n",
248 |     "PROMPT_ZERO_SHOT =  \"一段長100公尺的鐵路，用10公尺長的鐵軌鋪，要多少根鐵軌？\"\n",
249 |     "\n",
250 |     "response = call_openai_api(PROMPT_ZERO_SHOT, temperature=0, max_token=600)\n",
251 |     "\n",
252 |     "print(response)"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": 16,
258 |    "metadata": {},
259 |    "outputs": [
260 |     {
261 |      "name": "stdout",
262 |      "output_type": "stream",
263 |      "text": [
264 |       "我們可以使用相同的計算方式來解決這個問題。\n",
265 |       "\n",
266 |       "1. 一條鐵路需要兩條鐵軌。\n",
267 |       "2. 總鐵軌長度 = 鐵路長度 x 2\n",
268 |       "3. 單根鐵軌長度 = 10 公尺\n",
269 |       "4. 總鐵軌數量 = 總鐵軌長度 / 單根鐵軌長度\n",
270 |       "\n",
271 |       "現在我們來計算：\n",
272 |       "\n",
273 |       "1. 鐵路長度 = 800 公尺\n",
274 |       "2. 總鐵軌長度 = 800 公尺 x 2 = 1600 公尺\n",
275 |       "3. 單根鐵軌長度 = 10 公尺\n",
276 |       "4. 總鐵軌數量 = 1600 公尺 / 10 公尺 = 160 根\n",
277 |       "\n",
278 |       "所以，一段長 800 公尺的鐵路，用 10 公尺長的鐵軌鋪，需要 160 根鐵軌。\n"
279 |      ]
280 |     }
281 |    ],
282 |    "source": [
283 |     "# 以 PAL 方式引導 GPT-4o 正確計算\n",
284 |     "\n",
285 |     "PROMPT_FEW_SHOTS =  \"一段長100公尺的鐵路，用10公尺長的鐵軌鋪，要多少根鐵軌？\" \\\n",
286 |     "                    \"計算方式: 一條鐵路需要兩條鐵軌 1 railway = 2 rails\" \\\n",
287 |     "                    \" total_rails_length = 1 x 2 100  = 200\" \\\n",
288 |     "                    \" single_rail_length = 10\" \\\n",
289 |     "                    \" total_rails = total_rails_length / single_rail_length = 200 / 10 = 20 ，所以需要 20 根鐵軌\" \\\n",
290 |     "                    \"一段長800公尺的鐵路，用10公尺長的鐵軌鋪，要多少根鐵軌？\"\n",
291 |     "response = call_openai_api (PROMPT_FEW_SHOTS, temperature=0, max_token=600)\n",
292 |     "\n",
293 |     "print(response)"
294 |    ]
295 |   },
296 |   {
297 |    "attachments": {},
298 |    "cell_type": "markdown",
299 |    "metadata": {},
300 |    "source": [
301 |     "# 常識推論 (Commonsense Reasoning)\n",
302 |     "\n",
303 |     "2019 年使用 OpenAI GPT 語言模型架構持續增加人類常識知識庫加以訓練 https://www.quantamagazine.org/common-sense-comes-to-computers-20200430/ ，幾乎可達到接近人類水準的常識性推理。近年來大型語言模型持續補充了更多高品質的知識，幾乎已經具備人類水準常識推論能力。以下的經典問題早期 GPT-3.5 Turbo 版本都無法正確回應目前，而目前 GPT-4o 都可以做出正確的常識推論，尚在尋找 GPT-4o 無法做出正確的常識推論的問題。"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": 45,
309 |    "metadata": {},
310 |    "outputs": [
311 |     {
312 |      "name": "stdout",
313 |      "output_type": "stream",
314 |      "text": [
315 |       "這個旅遊行程安排得非常緊湊，並且涉及兩個不同的城市（巴黎和雪梨），這在現實中是不太可能實現的，因為這兩個城市相距甚遠，需要長時間的飛行。因此，這樣的行程可能會讓人感到壓力和疲憊。\n",
316 |       "\n",
317 |       "如果要選擇一種情緒，我可能會選擇 a. 生氣，因為這樣的行程安排不切實際，會讓人感到困惑和不滿。\n"
318 |      ]
319 |     }
320 |    ],
321 |    "source": [
322 |     "# 地理空間常識問題 - GPT-3.5 得到錯誤的答案，GPT-4o 已經可以得到正確的答案\n",
323 |     "\n",
324 |     "PROMPT = \"3月5日上午九點行程巴黎凡爾賽宮，下午參觀巴黎聖母院，晚上八點參加夜遊塞納河。\" \\\n",
325 |     "\"3月6上午八點參觀雪梨歌劇院，下午參觀雪梨港灣大橋，晚上八點參加夜遊雪梨港邊海景餐廳。\" \\\n",
326 |     " \"這個旅遊行程你可能會是哪一種情緒 a. 生氣 b. 無聊 c. 快樂 d. 輕鬆，請問答案是哪一個?\"\n",
327 |     "response = call_openai_api(PROMPT,temperature=0 , max_token=800)\n",
328 |     "print(response)"
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "code",
333 |    "execution_count": 32,
334 |    "metadata": {},
335 |    "outputs": [
336 |     {
337 |      "name": "stdout",
338 |      "output_type": "stream",
339 |      "text": [
340 |       "根據提供的信息，小美在上午 9 點的心率為 75 次/分，晚上 7 點的血壓為 120/80，這些數據顯示她在這些時間點都是活著的。她在晚上 11 點去世，這意味著她在當天晚上 11 點之前都是活著的。\n",
341 |       "\n",
342 |       "因此，可以合理地推斷，小美在前天中午還是活著的。\n"
343 |      ]
344 |     }
345 |    ],
346 |    "source": [
347 |     "# 生理常識問題 - GPT-4 得到錯誤的答案，GPT-4o 已經可以得到正確的答案\n",
348 |     "\n",
349 |     "PROMPT = \"小美在上午 9 點的心率為 75 次/分，晚上 7 點的血壓為 120/80。她在晚上 11 點去世。她中午時刻還活著嗎？ \"\n",
350 |     "response = call_openai_api(PROMPT,temperature=0 , max_token=800)\n",
351 |     "\n",
352 |     "print(response)"
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "code",
357 |    "execution_count": 27,
358 |    "metadata": {},
359 |    "outputs": [
360 |     {
361 |      "name": "stdout",
362 |      "output_type": "stream",
363 |      "text": [
364 |       "如果游泳池的水被加热到摄氏100度（即沸点），那么在这种极端条件下，无论是水獭还是人类游泳选手，都无法在这样的环境中游泳。摄氏100度的水会导致严重的烫伤，甚至可能致命。因此，在这种情况下，比赛根本无法进行，因为任何进入这种高温水中的生物都会受到严重伤害。\n",
365 |       "\n",
366 |       "为了进行公平和安全的比赛，游泳池的水温通常会保持在一个适宜的范围内，通常在摄氏25到28度之间。在这种正常的水温下，人类游泳选手和水獭的比赛结果可能会有所不同，但在摄氏100度的水中，比赛是不可能进行的。\n"
367 |      ]
368 |     }
369 |    ],
370 |    "source": [
371 |     "# 生理常識問題 - GPT-3.5 得到錯誤的答案，GPT-4o 已經可以得到正確的答案\n",
372 |     "\n",
373 |     "PROMPT = \"游泳池加熱至攝氏 100 度，水獺與人類游泳選手在這個游泳池比賽 100 公尺游泳，誰會勝出? \"\n",
374 |     "response = call_openai_api(PROMPT,temperature=0 , max_token=800)\n",
375 |     "\n",
376 |     "print(response)"
377 |    ]
378 |   },
379 |   {
380 |    "cell_type": "code",
381 |    "execution_count": 29,
382 |    "metadata": {},
383 |    "outputs": [
384 |     {
385 |      "name": "stdout",
386 |      "output_type": "stream",
387 |      "text": [
388 |       "在1968年，太陽系被認為有九大行星。這九大行星依次是：水星、金星、地球、火星、木星、土星、天王星、海王星和冥王星。因此，對於1968年的學生來說，這題的正確答案是九大行星。\n",
389 |       "\n",
390 |       "需要注意的是，2006年國際天文學聯合會（IAU）重新定義了行星的標準，將冥王星重新分類為矮行星。因此，根據現代標準，太陽系只有八大行星，但這不適用於1968年的情況。\n"
391 |      ]
392 |     }
393 |    ],
394 |    "source": [
395 |     "# 歷史時事常識問題 - GPT-3.5 得到錯誤的答案，GPT-4o 已經可以得到正確的答案\n",
396 |     "\n",
397 |     "PROMPT = \"對於一個在 1968 年進行自然考試的學生而言，在考卷中有一題 '太陽系有幾大行星？' 這題正確答案是什麼? \"\n",
398 |     "response = call_openai_api(PROMPT,temperature=0 , max_token=800)\n",
399 |     "\n",
400 |     "print(response)"
401 |    ]
402 |   },
403 |   {
404 |    "cell_type": "code",
405 |    "execution_count": 30,
406 |    "metadata": {},
407 |    "outputs": [
408 |     {
409 |      "name": "stdout",
410 |      "output_type": "stream",
411 |      "text": [
412 |       "在1890年，根據當時的天文知識，太陽系被認為有8顆大行星。這些行星是：\n",
413 |       "\n",
414 |       "1. 水星 (Mercury)\n",
415 |       "2. 金星 (Venus)\n",
416 |       "3. 地球 (Earth)\n",
417 |       "4. 火星 (Mars)\n",
418 |       "5. 木星 (Jupiter)\n",
419 |       "6. 土星 (Saturn)\n",
420 |       "7. 天王星 (Uranus)\n",
421 |       "8. 海王星 (Neptune)\n",
422 |       "\n",
423 |       "冥王星（Pluto）是在1930年被發現的，因此在1890年並不被認為是太陽系的一部分。即使在冥王星被發現後，它在2006年被重新分類為矮行星。因此，1890年的正確答案是8顆大行星。\n"
424 |      ]
425 |     }
426 |    ],
427 |    "source": [
428 |     "# 歷史時事常識問題 - GPT-3.5 得到錯誤的答案，GPT-4o 已經可以得到正確的答案\n",
429 |     "\n",
430 |     "PROMPT = \"對於一個在 1890 年進行自然考試的學生而言，在考卷中有一題 '太陽系有幾大行星？' 這題正確答案是什麼? \"\n",
431 |     "response = call_openai_api(PROMPT,temperature=0 , max_token=800)\n",
432 |     "\n",
433 |     "print(response)"
434 |    ]
435 |   }
436 |  ],
437 |  "metadata": {
438 |   "kernelspec": {
439 |    "display_name": "Python 3",
440 |    "language": "python",
441 |    "name": "python3"
442 |   },
443 |   "language_info": {
444 |    "codemirror_mode": {
445 |     "name": "ipython",
446 |     "version": 3
447 |    },
448 |    "file_extension": ".py",
449 |    "mimetype": "text/x-python",
450 |    "name": "python",
451 |    "nbconvert_exporter": "python",
452 |    "pygments_lexer": "ipython3",
453 |    "version": "3.11.9"
454 |   },
455 |   "orig_nbformat": 4
456 |  },
457 |  "nbformat": 4,
458 |  "nbformat_minor": 2
459 | }
460 | 


--------------------------------------------------------------------------------
/08_langchain_getting_started.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# LangChain 快速入門\n",
  8 |     "[LangChain](https://github.com/langchain-ai/langchain) 是針對大型語言模型 (LLM) 應用的相關實作加以抽象化的程式語言框架，支援 Python 與 Node.js 兩種程式語言，也能夠支援包含 OpenAI GPT 等多種 LLM ，在 LangChain 中不僅僅限於 LLM 之呼叫，還能納入一系列相關工具的呼叫。藉由 LangChain 所提供的 Chain 之標準介面，目前已經有著大量工具與資料來源能夠整合至 LangChain 框架之內。鍊是一種通用的概念，可以將特定運算簡化為一個可以重複使用；包含輸入/處理/輸出的一個元件，下圖就是 LangChain 最常用的鏈 LLMChain 為例，它結合了 PromptTemplate 獲取用戶輸入，支援多種大型語言模型，並將大型語言模型輸出的內容透過 Output Parser 轉換為特定格式， Chain 也可以將輸出再傳遞給其他鍊組合出更複雜的應用。\n",
  9 |     "\n",
 10 |     "![Chain 概念，以 LLMChain 為例](./assets/chain-concept.png)\n",
 11 |     "\n",
 12 |     "載入會使用到的套件"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 68,
 18 |    "metadata": {},
 19 |    "outputs": [
 20 |     {
 21 |      "name": "stdout",
 22 |      "output_type": "stream",
 23 |      "text": [
 24 |       "Requirement already satisfied: langchain in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (0.3.2)\n",
 25 |       "Requirement already satisfied: langchain-core in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (0.3.9)\n",
 26 |       "Requirement already satisfied: langchain-openai in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (0.2.2)\n",
 27 |       "Requirement already satisfied: langchain-community in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (0.3.1)\n",
 28 |       "Requirement already satisfied: langchain-experimental in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (0.3.2)\n",
 29 |       "Requirement already satisfied: python-dotenv in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (1.0.0)\n",
 30 |       "Requirement already satisfied: wikipedia in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (1.4.0)\n",
 31 |       "Requirement already satisfied: faiss-cpu in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (1.7.4)\n",
 32 |       "Requirement already satisfied: PyYAML>=5.3 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langchain) (6.0.1)\n",
 33 |       "Requirement already satisfied: SQLAlchemy<3,>=1.4 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langchain) (2.0.19)\n",
 34 |       "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langchain) (3.8.4)\n",
 35 |       "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langchain) (0.3.0)\n",
 36 |       "Requirement already satisfied: langsmith<0.2.0,>=0.1.17 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langchain) (0.1.131)\n",
 37 |       "Requirement already satisfied: numpy<2,>=1 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langchain) (1.24.2)\n",
 38 |       "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langchain) (2.9.2)\n",
 39 |       "Requirement already satisfied: requests<3,>=2 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langchain) (2.28.2)\n",
 40 |       "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langchain) (8.2.2)\n",
 41 |       "Requirement already satisfied: jsonpatch<2.0,>=1.33 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langchain-core) (1.33)\n",
 42 |       "Requirement already satisfied: packaging<25,>=23.2 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langchain-core) (24.1)\n",
 43 |       "Requirement already satisfied: typing-extensions>=4.7 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langchain-core) (4.12.2)\n",
 44 |       "Requirement already satisfied: openai<2.0.0,>=1.40.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langchain-openai) (1.51.0)\n",
 45 |       "Requirement already satisfied: tiktoken<1,>=0.7 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langchain-openai) (0.8.0)\n",
 46 |       "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langchain-community) (0.5.14)\n",
 47 |       "Requirement already satisfied: pydantic-settings<3.0.0,>=2.4.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langchain-community) (2.5.2)\n",
 48 |       "Requirement already satisfied: beautifulsoup4 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from wikipedia) (4.12.2)\n",
 49 |       "Requirement already satisfied: attrs>=17.3.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (22.2.0)\n",
 50 |       "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (3.1.0)\n",
 51 |       "Requirement already satisfied: multidict<7.0,>=4.5 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.4)\n",
 52 |       "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (4.0.2)\n",
 53 |       "Requirement already satisfied: yarl<2.0,>=1.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.8.2)\n",
 54 |       "Requirement already satisfied: frozenlist>=1.1.1 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.3)\n",
 55 |       "Requirement already satisfied: aiosignal>=1.1.2 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n",
 56 |       "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (3.20.1)\n",
 57 |       "Requirement already satisfied: typing-inspect<1,>=0.4.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (0.9.0)\n",
 58 |       "Requirement already satisfied: jsonpointer>=1.9 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jsonpatch<2.0,>=1.33->langchain-core) (3.0.0)\n",
 59 |       "Requirement already satisfied: httpx<1,>=0.23.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langsmith<0.2.0,>=0.1.17->langchain) (0.24.1)\n",
 60 |       "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langsmith<0.2.0,>=0.1.17->langchain) (3.10.7)\n",
 61 |       "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from langsmith<0.2.0,>=0.1.17->langchain) (1.0.0)\n",
 62 |       "Requirement already satisfied: anyio<5,>=3.5.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai<2.0.0,>=1.40.0->langchain-openai) (3.7.1)\n",
 63 |       "Requirement already satisfied: distro<2,>=1.7.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai<2.0.0,>=1.40.0->langchain-openai) (1.9.0)\n",
 64 |       "Requirement already satisfied: jiter<1,>=0.4.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai<2.0.0,>=1.40.0->langchain-openai) (0.5.0)\n",
 65 |       "Requirement already satisfied: sniffio in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai<2.0.0,>=1.40.0->langchain-openai) (1.3.0)\n",
 66 |       "Requirement already satisfied: tqdm>4 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from openai<2.0.0,>=1.40.0->langchain-openai) (4.65.0)\n",
 67 |       "Requirement already satisfied: annotated-types>=0.6.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.7.0)\n",
 68 |       "Requirement already satisfied: pydantic-core==2.23.4 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.23.4)\n",
 69 |       "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests<3,>=2->langchain) (3.4)\n",
 70 |       "Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests<3,>=2->langchain) (1.26.15)\n",
 71 |       "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests<3,>=2->langchain) (2022.12.7)\n",
 72 |       "Requirement already satisfied: greenlet!=0.4.17 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from SQLAlchemy<3,>=1.4->langchain) (2.0.2)\n",
 73 |       "Requirement already satisfied: regex>=2022.1.18 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tiktoken<1,>=0.7->langchain-openai) (2023.6.3)\n",
 74 |       "Requirement already satisfied: soupsieve>1.2 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from beautifulsoup4->wikipedia) (2.4.1)\n",
 75 |       "Requirement already satisfied: httpcore<0.18.0,>=0.15.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (0.17.3)\n",
 76 |       "Requirement already satisfied: colorama in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tqdm>4->openai<2.0.0,>=1.40.0->langchain-openai) (0.4.6)\n",
 77 |       "Requirement already satisfied: mypy-extensions>=0.3.0 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community) (1.0.0)\n",
 78 |       "Requirement already satisfied: h11<0.15,>=0.13 in c:\\users\\tomlee\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from httpcore<0.18.0,>=0.15.0->httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (0.14.0)\n",
 79 |       "Note: you may need to restart the kernel to use updated packages.\n"
 80 |      ]
 81 |     },
 82 |     {
 83 |      "name": "stderr",
 84 |      "output_type": "stream",
 85 |      "text": [
 86 |       "\n",
 87 |       "[notice] A new release of pip is available: 24.2 -> 25.0.1\n",
 88 |       "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
 89 |      ]
 90 |     }
 91 |    ],
 92 |    "source": [
 93 |     "%pip install langchain langchain-core langchain-openai langchain-community langchain-experimental python-dotenv wikipedia faiss-cpu"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "## 1. 運用輔助函式庫，從環境變數取得 Azure OpenAI API 相關資訊\n",
101 |     "\n",
102 |     "開始使用 LangChain，Azure OpenAI Service 的主要參數都是透過環境變數設定完成，只需要指定以下幾個環境變數即可\n",
103 |     "\n",
104 |     "+ AZURE_OPENAI_ENDPOINT\n",
105 |     "+ AZURE_OPENAI_API_KEY\n",
106 |     "+ AZURE_OPENAI_API_VERSION\n",
107 |     "+ CHAT_DEPLOYMENT_NAME\n",
108 |     "+ EMBEDDINS_DEPLOYMENT_NAME"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 1,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "import os\n",
118 |     "from dotenv import load_dotenv\n",
119 |     "from langchain_core.prompts import PromptTemplate\n",
120 |     "from langchain_core.output_parsers import StrOutputParser\n",
121 |     "from langchain_openai.chat_models.azure import AzureChatOpenAI\n",
122 |     "from langchain_community.document_loaders import WikipediaLoader\n",
123 |     "from langchain.schema import (\n",
124 |     "    SystemMessage,\n",
125 |     "    HumanMessage,\n",
126 |     "    AIMessage\n",
127 |     ")\n",
128 |     "\n",
129 |     "# 載入環境變數\n",
130 |     "load_dotenv()\n",
131 |     "\n",
132 |     "# 設定呼叫 OpenAI API 所需連線資訊\n",
133 |     "chat_model = os.getenv(\"CHAT_DEPLOYMENT_NAME\")\n",
134 |     "emb_model = os.getenv(\"EMBEDDINS_DEPLOYMENT_NAME\")\n",
135 |     "api_ver = os.getenv(\"AZURE_OPENAI_API_VERSION\")\n",
136 |     "azure_openai_endpoint = os.getenv(\"AZURE_OPENAI_ENDPOINT\")"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "markdown",
141 |    "metadata": {},
142 |    "source": [
143 |     "## 2. 最簡單的自動完成範例\n"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "markdown",
148 |    "metadata": {
149 |     "vscode": {
150 |      "languageId": "ini"
151 |     }
152 |    },
153 |    "source": [
154 |     "LangChain 可以透過針對 Azure OpenAI Service 所設計的 AzureChatOpenAI 來進行對話。AzureChatOpenAI 會將使用者的輸入轉換成 OpenAI  chat completions API 所需的格式與 Azure OpenAI Service 呼叫端點，並將 chat completions API 的回應轉換成 LangChain 的格式。"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 104,
160 |    "metadata": {},
161 |    "outputs": [
162 |     {
163 |      "name": "stdout",
164 |      "output_type": "stream",
165 |      "text": [
166 |       "content='世界上最高的三座山按海拔高度排名如下：\\n\\n1. **珠穆朗玛峰（Mount Everest）**  \\n   - **海拔高度**：8,848.86米（29,031.7英尺）  \\n   - **位置**：位于中国与尼泊尔边界的喜马拉雅山脉。  \\n   - 珠穆朗玛峰是地球上最高的山峰，也是登山者最向往的目标之一。\\n\\n2. **乔戈里峰（K2）**  \\n   - **海拔高度**：8,611米（28,251英尺）  \\n   - **位置**：位于巴基斯坦与中国交界的喀喇昆仑山脉。  \\n   - K2被称为“野蛮之山”，因其攀登难度极高，仅次于珠穆朗玛峰的高度使其成为世界第二高峰。\\n\\n3. **干城章嘉峰（Kangchenjunga）**  \\n   - **海拔高度**：8,586米（28,169英尺）  \\n   - **位置**：位于尼泊尔和印度锡金邦交界的喜马拉雅山脉。  \\n   - 干城章嘉峰是世界第三高峰，同时也是印度的最高峰。\\n\\n这些山峰都位于亚洲的喜马拉雅山脉或喀喇昆仑山脉，是地理和自然的奇观。' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 328, 'prompt_tokens': 15, 'total_tokens': 343, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-11-20', 'system_fingerprint': 'fp_b705f0c291', 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}} id='run-a663c01b-53c0-49a4-80ae-da779589492b-0' usage_metadata={'input_tokens': 15, 'output_tokens': 328, 'total_tokens': 343, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n"
167 |      ]
168 |     }
169 |    ],
170 |    "source": [
171 |     "chat = AzureChatOpenAI(azure_deployment=chat_model,api_version=api_ver)\n",
172 |     "respone = chat.invoke(\"世界上最高的三座山?\")\n",
173 |     "print(respone)"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "markdown",
178 |    "metadata": {},
179 |    "source": [
180 |     "也可進一步的使用 OpenAI chat completions 的 JSON messages 格式引導語言模型的回應，這樣可以更精準的控制語言模型的回應。"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 105,
186 |    "metadata": {},
187 |    "outputs": [
188 |     {
189 |      "name": "stdout",
190 |      "output_type": "stream",
191 |      "text": [
192 |       "content='1. 聖母峰（珠穆朗瑪峰） - 8848.86 公尺  \\n2. 喬戈里峰（K2） - 8611 公尺  \\n3. 干城章嘉峰 - 8586 公尺  ' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 57, 'prompt_tokens': 35, 'total_tokens': 92, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-11-20', 'system_fingerprint': 'fp_b705f0c291', 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}} id='run-c4b3ff89-3fc2-4ec4-ba4a-b10ce302a184-0' usage_metadata={'input_tokens': 35, 'output_tokens': 57, 'total_tokens': 92, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n"
193 |      ]
194 |     }
195 |    ],
196 |    "source": [
197 |     "chat = AzureChatOpenAI(azure_deployment=chat_model,api_version=api_ver)\n",
198 |     "\n",
199 |     "messages = [\n",
200 |     "    SystemMessage(content=\"你是用正體中文的 AI 助手，簡潔回覆\"),\n",
201 |     "    HumanMessage(content=\"世界上最高的三座山?\"),\n",
202 |     "]\n",
203 |     "\n",
204 |     "respone = chat.invoke(messages)\n",
205 |     "print(respone)"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "markdown",
210 |    "metadata": {},
211 |    "source": [
212 |     "由語言模型回傳的內容，可以透過各種 Parser 簡化處理解析與輸出的程式碼的複雜度，以下是一個簡單的範例，將前述對話回應以 StrOutputParser 來解析回傳的字串內容。"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "code",
217 |    "execution_count": 106,
218 |    "metadata": {},
219 |    "outputs": [
220 |     {
221 |      "name": "stdout",
222 |      "output_type": "stream",
223 |      "text": [
224 |       "content='1. 聖母峰（珠穆朗瑪峰）- 8,848.86公尺  \\n2. 喬戈里峰（K2）- 8,611公尺  \\n3. 干城章嘉峰 - 8,586公尺  ' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 60, 'prompt_tokens': 35, 'total_tokens': 95, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-11-20', 'system_fingerprint': 'fp_b705f0c291', 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}} id='run-7b2ef526-ee92-4724-a56a-cab7574e8889-0' usage_metadata={'input_tokens': 35, 'output_tokens': 60, 'total_tokens': 95, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n",
225 |       "1. 聖母峰（珠穆朗瑪峰）- 8,848.86公尺  \n",
226 |       "2. 喬戈里峰（K2）- 8,611公尺  \n",
227 |       "3. 干城章嘉峰 - 8,586公尺  \n",
228 |       "<class 'str'>\n"
229 |      ]
230 |     }
231 |    ],
232 |    "source": [
233 |     "from langchain_core.output_parsers import StrOutputParser\n",
234 |     "\n",
235 |     "chat = AzureChatOpenAI(azure_deployment=chat_model,api_version=api_ver)\n",
236 |     "\n",
237 |     "messages = [\n",
238 |     "    SystemMessage(content=\"你是用正體中文的 AI 助手，簡潔回覆\"),\n",
239 |     "    HumanMessage(content=\"世界上最高的三座山?\"),\n",
240 |     "]\n",
241 |     "\n",
242 |     "respone = chat.invoke(messages)\n",
243 |     "print(respone)\n",
244 |     "\n",
245 |     "parser = StrOutputParser()\n",
246 |     "print (parser.invoke(respone)) \n",
247 |     "print(type(parser.invoke(respone)))"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "markdown",
252 |    "metadata": {},
253 |    "source": [
254 |     "將前述對話回應以 CommaSeparatedListOutputParser 來解析回傳的字串，產生 List 結構之回覆內容。"
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "code",
259 |    "execution_count": 107,
260 |    "metadata": {},
261 |    "outputs": [
262 |     {
263 |      "name": "stdout",
264 |      "output_type": "stream",
265 |      "text": [
266 |       "content='1. 聖母峰（珠穆朗瑪峰）：8,848.86 公尺  \\n2. 喬戈里峰（K2）：8,611 公尺  \\n3. 干城章嘉峰：8,586 公尺' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 54, 'prompt_tokens': 35, 'total_tokens': 89, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-11-20', 'system_fingerprint': 'fp_b705f0c291', 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}} id='run-096bcb7a-b990-49d0-8001-9408a18b3b24-0' usage_metadata={'input_tokens': 35, 'output_tokens': 54, 'total_tokens': 89, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n",
267 |       "['1. 聖母峰（珠穆朗瑪峰）：8', '848.86 公尺  \\n2. 喬戈里峰（K2）：8', '611 公尺  \\n3. 干城章嘉峰：8', '586 公尺']\n",
268 |       "<class 'list'>\n"
269 |      ]
270 |     }
271 |    ],
272 |    "source": [
273 |     "from langchain_core.output_parsers import CommaSeparatedListOutputParser \n",
274 |     "\n",
275 |     "chat = AzureChatOpenAI(azure_deployment=chat_model,api_version=api_ver)\n",
276 |     "\n",
277 |     "messages = [\n",
278 |     "    SystemMessage(content=\"你是用正體中文的 AI 助手，簡潔回覆\"),\n",
279 |     "    HumanMessage(content=\"世界上最高的三座山?\"),\n",
280 |     "]\n",
281 |     "\n",
282 |     "respone = chat.invoke(messages)\n",
283 |     "print(respone)\n",
284 |     "\n",
285 |     "parser = CommaSeparatedListOutputParser()\n",
286 |     "print (parser.invoke(respone)) \n",
287 |     "print(type(parser.invoke(respone)))"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "markdown",
292 |    "metadata": {},
293 |    "source": [
294 |     "接下嘗試使用 Chain 來處理相同問題，LLMChain 是最基本的 Chain，能夠以 PromptTemplate 增加輸入提示時的彈性，並將中間處理的大型語言模型抽象化，最後視狀況可以搭配多種 OutputParser 來處理輸出。由接下來的範例可以看出，我們可將 LLM Model 的 AzureOpenAI 與 Chat Model 的 AzureChatOpenAI 都使用一致的 LLMChain 鍊來進行處理。"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "code",
299 |    "execution_count": 108,
300 |    "metadata": {},
301 |    "outputs": [
302 |     {
303 |      "name": "stdout",
304 |      "output_type": "stream",
305 |      "text": [
306 |       "content='世界上最高的兩座山是：\\n\\n1. **珠穆朗玛峰（Mount Everest）**  \\n   - 海拔：8,848.86米  \\n   - 所在地：位于中国和尼泊尔边界的喜马拉雅山脉。  \\n   - 珠穆朗玛峰是地球上海拔最高的山峰，被认为是“世界之巅”。\\n\\n2. **乔戈里峰（K2）**  \\n   - 海拔：8,611米  \\n   - 所在地：位于中国和巴基斯坦边界的喀喇昆仑山脉。  \\n   - 乔戈里峰是世界第二高峰，以其陡峭和极具挑战性的攀登条件闻名，被称为“野蛮之山”。\\n\\n这两座山峰都属于亚洲的高山体系，是世界上最著名的山峰之一，也是许多登山者梦寐以求的挑战目标。' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 216, 'prompt_tokens': 15, 'total_tokens': 231, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-11-20', 'system_fingerprint': 'fp_b705f0c291', 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}} id='run-ea0050c3-42d9-41f8-aacf-7c455cbb5d11-0' usage_metadata={'input_tokens': 15, 'output_tokens': 216, 'total_tokens': 231, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n",
307 |       "<class 'langchain_core.messages.ai.AIMessage'>\n"
308 |      ]
309 |     }
310 |    ],
311 |    "source": [
312 |     "chat = AzureChatOpenAI(azure_deployment=chat_model,api_version=api_ver)\n",
313 |     "# 設定 PromptTemplate\n",
314 |     "prompt = PromptTemplate(input_variables=[\"prompt_str\"],template=\"{prompt_str}\")\n",
315 |     "\n",
316 |     "# 將 AzureOpenAI 以 LLMChain 方式使用\n",
317 |     "\n",
318 |     "chain = prompt | chat\n",
319 |     "response = chain.invoke({\"prompt_str\":\"世界上最高的兩座山?\"})\n",
320 |     "\n",
321 |     "# 單純輸出內容是字串\n",
322 |     "print(response)\n",
323 |     "print(type(response))"
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "markdown",
328 |    "metadata": {},
329 |    "source": [
330 |     "LangChain 也提供針對對話形式之 ChatPromptTemplate 來進行對話的輸入，這樣可以更方便的進行對話的處理。"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": 109,
336 |    "metadata": {},
337 |    "outputs": [
338 |     {
339 |      "name": "stdout",
340 |      "output_type": "stream",
341 |      "text": [
342 |       "content='吾乃名喚 Tom 之智械助手，專應汝之疑問。\\n\\n夫天下巍峨之巔，以高聳而聞名者，首推三座如下：  \\n一、**珠穆朗瑪峰**，高八千八百四十八公尺，位於喜馬拉雅山脈中，乃全球之最。  \\n二、**喬戈里峰**（K2），高八千六百十一公尺，居次，坐落於喀喇崑崙山脈。  \\n三、**干城章嘉峰**（Kangchenjunga），高八千五百八十六公尺，位於尼泊爾與印度之交界，列第三。  \\n\\n三山之巔，直插霄漢，令人景仰。' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 185, 'prompt_tokens': 47, 'total_tokens': 232, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-11-20', 'system_fingerprint': 'fp_b705f0c291', 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}} id='run-21019004-2ef3-4f7a-9d75-c349e0c26ca9-0' usage_metadata={'input_tokens': 47, 'output_tokens': 185, 'total_tokens': 232, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n"
343 |      ]
344 |     }
345 |    ],
346 |    "source": [
347 |     "from langchain_core.prompts import ChatPromptTemplate\n",
348 |     "\n",
349 |     "chat = AzureChatOpenAI(azure_deployment=chat_model,api_version=api_ver)\n",
350 |     "\n",
351 |     "# 設定 ChatPromptTemplate\n",
352 |     "prompt = ChatPromptTemplate([\n",
353 |     "    (\"system\", \"你是名叫 {name}，一個用精煉文言文回覆正體中文的 AI 助手\"),\n",
354 |     "    (\"human\", \"{user_input}\"),\n",
355 |     "])\n",
356 |     "\n",
357 |     "chain = prompt | chat\n",
358 |     "response = chain.invoke({\"name\": \"Tom\",\"user_input\": \"你是誰?世界上最高的三座山?\"})\n",
359 |     "print(response)"
360 |    ]
361 |   },
362 |   {
363 |    "cell_type": "markdown",
364 |    "metadata": {},
365 |    "source": [
366 |     "將前述所有用到的元件組合成一個完整的 Chain，並執行這個 Chain 來完成對話。"
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "code",
371 |    "execution_count": 110,
372 |    "metadata": {},
373 |    "outputs": [
374 |     {
375 |      "name": "stdout",
376 |      "output_type": "stream",
377 |      "text": [
378 |       "吾乃湯姆，爾之忠僕也。  \n",
379 |       "夫天下之巔，三高者，分列如下：  \n",
380 |       "其一，珠穆朗瑪峰，高八千八百四十八公尺，世界之最。  \n",
381 |       "其二，喬戈里峰（K2），高八千六百十一公尺，次之。  \n",
382 |       "其三，干城章嘉峰（Kangchenjunga），高八千五百八十六公尺，居第三。  \n",
383 |       "此三峰，皆立於喜馬拉雅山脈，巍然聳立，壯哉！\n"
384 |      ]
385 |     }
386 |    ],
387 |    "source": [
388 |     "chat = AzureChatOpenAI(azure_deployment=chat_model,api_version=api_ver)\n",
389 |     "\n",
390 |     "# 設定 ChatPromptTemplate\n",
391 |     "prompt = ChatPromptTemplate([\n",
392 |     "    (\"system\", \"你是名叫 {name}，一個用精煉文言文回覆正體中文的 AI 助手\"),\n",
393 |     "    (\"human\", \"{user_input}\"),\n",
394 |     "])\n",
395 |     "\n",
396 |     "# 設定 Output Parser 為 StrOutputParser \n",
397 |     "parser = StrOutputParser ()\n",
398 |     "\n",
399 |     "# 鏈接 PromptTemplate、Chat Model、OutputParser\n",
400 |     "chain = prompt | chat | parser\n",
401 |     "\n",
402 |     "# 執行鏈並顯示回應\n",
403 |     "response = chain.invoke({\"name\": \"Tom\",\"user_input\": \"你是誰?世界上最高的三座山?\"})\n",
404 |     "print(response)"
405 |    ]
406 |   },
407 |   {
408 |    "cell_type": "markdown",
409 |    "metadata": {},
410 |    "source": [
411 |     "## 3. 使用文件載入工具載入 Wikipedia 內容\n",
412 |     "\n",
413 |     "LangChain 提供豐富的文件載入 (Documenet Loader) 工具，以便將一般文字檔， CSV 格式檔案，PDF 格式檔案，JSON 格式檔案等眾多檔案格式以一致的 Document 物件來被框架內其他物件所使用，目前 LangChain 有超過七十種以上的文件載入工具可以供選用。接下來以能夠搜尋與載入 Wikipedia 內容 的 WikipediaLoader 作為範例。"
414 |    ]
415 |   },
416 |   {
417 |    "cell_type": "code",
418 |    "execution_count": 111,
419 |    "metadata": {},
420 |    "outputs": [
421 |     {
422 |      "data": {
423 |       "text/plain": [
424 |        "2"
425 |       ]
426 |      },
427 |      "execution_count": 111,
428 |      "metadata": {},
429 |      "output_type": "execute_result"
430 |     }
431 |    ],
432 |    "source": [
433 |     "from langchain_community.document_loaders import WikipediaLoader\n",
434 |     "\n",
435 |     "# 查詢 Wikipedia 台灣蘭嶼條目，透過 load_max_docs 參數限制最多只載入兩篇內容\n",
436 |     "docs = WikipediaLoader(query=\"蘭嶼\", load_max_docs=2).load()\n",
437 |     "len(docs)"
438 |    ]
439 |   },
440 |   {
441 |    "cell_type": "code",
442 |    "execution_count": 112,
443 |    "metadata": {},
444 |    "outputs": [
445 |     {
446 |      "name": "stdout",
447 |      "output_type": "stream",
448 |      "text": [
449 |       "{'title': 'Orchid Island', 'summary': \"Orchid Island, also known by other names, is a 45 km2 (17 sq mi) volcanic island off the southeastern coast of Taiwan, which Orchid Island is part of. It is separated from the Batanes of the Philippines by the Bashi Channel of the Luzon Strait. The island and the nearby Lesser Orchid Island are governed as Lanyu Township in Taitung County, which is one of the county's two insular townships (the other being Lyudao Township).\\nIt is considered a potential World Heritage Site.\", 'source': 'https://en.wikipedia.org/wiki/Orchid_Island'}\n"
450 |      ]
451 |     }
452 |    ],
453 |    "source": [
454 |     "# 顯示第一篇內容的 Metadata\n",
455 |     "print (docs[0].metadata)"
456 |    ]
457 |   },
458 |   {
459 |    "cell_type": "code",
460 |    "execution_count": 113,
461 |    "metadata": {},
462 |    "outputs": [
463 |     {
464 |      "name": "stdout",
465 |      "output_type": "stream",
466 |      "text": [
467 |       "Orchid Island, also known by other names, is a 45 km2 (17 sq mi) volcanic island off the southeastern coast of Taiwan, which Orchid Island is part of. It is separated from the Batanes of the Philippin\n"
468 |      ]
469 |     }
470 |    ],
471 |    "source": [
472 |     "# 顯示第一篇內容前 200 字\n",
473 |     "content = docs[0].page_content[:200]  \n",
474 |     "print(content)"
475 |    ]
476 |   },
477 |   {
478 |    "cell_type": "markdown",
479 |    "metadata": {},
480 |    "source": [
481 |     "## 4 使用載入的文件詢問問題\n",
482 |     "\n",
483 |     "以基礎模型來回答問題"
484 |    ]
485 |   },
486 |   {
487 |    "cell_type": "code",
488 |    "execution_count": 114,
489 |    "metadata": {},
490 |    "outputs": [
491 |     {
492 |      "name": "stdout",
493 |      "output_type": "stream",
494 |      "text": [
495 |       "content='截至我的知識截止日期2023年10月，蘭嶼鄉鄉長是**夏曼·迦拉牧**。如果您需要最新資訊，建議查詢蘭嶼鄉公所的官方網站或相關政府公告。' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 60, 'prompt_tokens': 26, 'total_tokens': 86, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-11-20', 'system_fingerprint': 'fp_b705f0c291', 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}} id='run-b559b5b6-1aa7-4cc9-99d2-d88c20ecf747-0' usage_metadata={'input_tokens': 26, 'output_tokens': 60, 'total_tokens': 86, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n"
496 |      ]
497 |     }
498 |    ],
499 |    "source": [
500 |     "# 完全依賴基礎模型回答問題，很高的機會得到來自幻覺的答案\n",
501 |     "chat = AzureChatOpenAI(azure_deployment=chat_model,api_version=api_ver,temperature=0.5)\n",
502 |     "respone = chat.invoke(\"只依據事實回答，蘭嶼鄉鄉長姓名?\")\n",
503 |     "print(respone)"
504 |    ]
505 |   },
506 |   {
507 |    "cell_type": "markdown",
508 |    "metadata": {},
509 |    "source": [
510 |     "使用之前已經建立的 chat 來詢問 Wikipedia 下載的文件內容，由回覆的答案可以知道並非來自基礎模型，而是來自下載的 Wikipedia 文件"
511 |    ]
512 |   },
513 |   {
514 |    "cell_type": "code",
515 |    "execution_count": 115,
516 |    "metadata": {},
517 |    "outputs": [
518 |     {
519 |      "name": "stdout",
520 |      "output_type": "stream",
521 |      "text": [
522 |       "content='抱歉，根據您提供的事實中，並未提及蘭嶼鄉鄉長的姓名，因此無法回答此問題。' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 35, 'prompt_tokens': 80, 'total_tokens': 115, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-11-20', 'system_fingerprint': 'fp_b705f0c291', 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}} id='run-e7f64c83-5166-4adf-9004-6902280e9279-0' usage_metadata={'input_tokens': 80, 'output_tokens': 35, 'total_tokens': 115, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n"
523 |      ]
524 |     }
525 |    ],
526 |    "source": [
527 |     "# 使用 Wikipeida 載入之文件內容回答問題，無法回答此問題\n",
528 |     "prompt = \"只依據以下事實回答，\\n事實:\"+content+ \"，\\n蘭嶼鄉鄉長姓名?\"\n",
529 |     "respone = chat.invoke(prompt)\n",
530 |     "print(respone)"
531 |    ]
532 |   },
533 |   {
534 |    "cell_type": "markdown",
535 |    "metadata": {},
536 |    "source": [
537 |     "也可以利用 prompt template 來增加提示的彈性，只是需要將帶入前面取得之 Wikipedia 內容代入 PromptTemplate 物件輸出轉換為字串"
538 |    ]
539 |   },
540 |   {
541 |    "cell_type": "code",
542 |    "execution_count": 116,
543 |    "metadata": {},
544 |    "outputs": [
545 |     {
546 |      "name": "stdout",
547 |      "output_type": "stream",
548 |      "text": [
549 |       "content='根據您提供的事實中，並未提及蘭嶼鄉鄉長的姓名，因此無法回答該問題。' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 33, 'prompt_tokens': 80, 'total_tokens': 113, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-11-20', 'system_fingerprint': 'fp_b705f0c291', 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}} id='run-b2aac70a-caea-4686-a813-916b347e8c81-0' usage_metadata={'input_tokens': 80, 'output_tokens': 33, 'total_tokens': 113, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n"
550 |      ]
551 |     }
552 |    ],
553 |    "source": [
554 |     "from langchain_core.prompts import PromptTemplate\n",
555 |     "\n",
556 |     "prompt_template = PromptTemplate(input_variables=[\"doc_content\"], template=\"只依據以下事實回答，\\n事實:{doc_content}\\n蘭嶼鄉鄉長姓名?\")\n",
557 |     "prompt_formatted_str: str = prompt_template.format(doc_content = content)\n",
558 |     "\n",
559 |     "respone = chat.invoke(prompt_formatted_str)\n",
560 |     "print(respone)\n"
561 |    ]
562 |   },
563 |   {
564 |    "cell_type": "markdown",
565 |    "metadata": {},
566 |    "source": [
567 |     "## 5 練習在 LangChain 內使用 Azure OpenAI Embeddings\n",
568 |     "利用台北市政府開放資料 [112年度美食在台北「鍋際大賞」店家名單](https://data.taipei/dataset/detail?id=7eec2e8f-02f9-4100-9171-66f6667b8b3d) 為範例，以 LangChain 所提供的 CSV 載入工具，將資料載入後，再使用 Azure OpenAI Embeddings 進行店家名稱與地址的向量計算。並以 Meta 公司 Facebook AI 團隊開放原始程式碼向量資料庫 FAISS 將向量值儲存在本機的檔案系統中，以便後續的向量近似搜尋，FAISS 的近似比對是採用 L2 norm 方式來將兩個向量間的距離正規化，分數越接近零代表兩個向量越相似，這與 OpenAI Embeddings 的餘弦近似 (cosine similarity) 正規化後的分數值不一樣，餘弦近似分數越接近 1 的則兩個向量越相似。"
569 |    ]
570 |   },
571 |   {
572 |    "cell_type": "code",
573 |    "execution_count": 117,
574 |    "metadata": {},
575 |    "outputs": [
576 |     {
577 |      "name": "stdout",
578 |      "output_type": "stream",
579 |      "text": [
580 |       "序號: 1\n",
581 |       "區域: 中山區\n",
582 |       "店家: 滿堂紅頂級麻辣鴛鴦鍋\n",
583 |       "報名組別: 經典鍋物組(799以下)\n",
584 |       "營業地址: 臺北市中山區松江路185號2樓\n",
585 |       "Longitude: 121.5331715\n",
586 |       "Latitude: 25.05724095\n"
587 |      ]
588 |     }
589 |    ],
590 |    "source": [
591 |     "from langchain_openai.embeddings.azure import AzureOpenAIEmbeddings\n",
592 |     "from langchain_community.document_loaders.csv_loader import CSVLoader\n",
593 |     "from langchain_community.vectorstores import FAISS\n",
594 |     "\n",
595 |     "# 建立 Azure OpenAI Embeddings 類別的實例\n",
596 |     "embeddings_model = AzureOpenAIEmbeddings(\n",
597 |     "    model=emb_model,\n",
598 |     "    azure_endpoint=azure_openai_endpoint,\n",
599 |     "    openai_api_version=api_ver,\n",
600 |     "    chunk_size = 16\n",
601 |     ")\n",
602 |     "\n",
603 |     "# 以 LangChain 的 CSVLoader 類別，讀取 CSV 檔案並載入文件\n",
604 |     "loader = CSVLoader(file_path=\"./data/2023-taipei-hot pot-restaurant.csv\",\n",
605 |     "                   csv_args={\n",
606 |     "                    \"delimiter\": \",\",                    \n",
607 |     "                    \"fieldnames\": [\"序號\",\"區域\",\"店家\",\"報名組別\",\"營業地址\",\"Longitude\",\"Latitude\"],\n",
608 |     "                    },                    \n",
609 |     "                   encoding=\"utf-8\")\n",
610 |     "docs = loader.load()\n",
611 |     "\n",
612 |     "# 顯示 docs 內的第一筆資料\n",
613 |     "print (docs[1].page_content)\n",
614 |     "\n",
615 |     "# 載入 CSV 文件計算 OpenAI Embeddings 向量值，並建立 FAISS 向量資料庫索引\n",
616 |     "db = FAISS.from_documents(documents=docs, embedding=embeddings_model)\n",
617 |     "\n",
618 |     "# 將 FAISS 向量資料索引儲存於本機磁碟 \n",
619 |     "db.save_local(\"./data/FAISS/faiss_index\")"
620 |    ]
621 |   },
622 |   {
623 |    "cell_type": "markdown",
624 |    "metadata": {},
625 |    "source": [
626 |     "以下重新載入 FAISS 向量值索引，並開始利用 FAISS 進行向量近似比對"
627 |    ]
628 |   },
629 |   {
630 |    "cell_type": "code",
631 |    "execution_count": 118,
632 |    "metadata": {},
633 |    "outputs": [],
634 |    "source": [
635 |     "from langchain_openai.embeddings.azure import AzureOpenAIEmbeddings\n",
636 |     "from langchain_community.vectorstores import FAISS\n",
637 |     "\n",
638 |     "# 建立 Azure OpenAI Embeddings 類別的實例\n",
639 |     "embeddings_model = AzureOpenAIEmbeddings(\n",
640 |     "    model=emb_model,\n",
641 |     "    azure_endpoint=azure_openai_endpoint,\n",
642 |     "    openai_api_version=api_ver,\n",
643 |     "    chunk_size = 16\n",
644 |     ")\n",
645 |     "\n",
646 |     "# 如果資料沒有變更未來僅需 db = FAISS.load_local(\"faiss_index\", embeddings_model) 方式即可載入之前建立的向量索引，無須重新計算文件的向量值\n",
647 |     "db = FAISS.load_local(\"./data/FAISS/faiss_index\", embeddings_model,allow_dangerous_deserialization=True)"
648 |    ]
649 |   },
650 |   {
651 |    "cell_type": "code",
652 |    "execution_count": 119,
653 |    "metadata": {},
654 |    "outputs": [
655 |     {
656 |      "name": "stdout",
657 |      "output_type": "stream",
658 |      "text": [
659 |       "(Document(metadata={'source': './data/2023-taipei-hot pot-restaurant.csv', 'row': 60}, page_content='序號: 60\\n區域: 士林區\\n店家: 錢都日式涮涮鍋-士林芝山店\\n報名組別: 經典鍋物組(799以下)\\n營業地址: 臺北市士林區福國路71號-1\\nLongitude: 121.5232239\\nLatitude: 25.10250454'), 0.40565524)\n",
660 |       "(Document(metadata={'source': './data/2023-taipei-hot pot-restaurant.csv', 'row': 61}, page_content='序號: 61\\n區域: 士林區\\n店家: 藏王極上鍋物\\n報名組別: 饗宴鍋物組(800以上)\\n營業地址: 臺北市士林區忠誠路二段55號3樓\\nLongitude: 121.5310986\\nLatitude: 25.11282254'), 0.40968293)\n",
661 |       "(Document(metadata={'source': './data/2023-taipei-hot pot-restaurant.csv', 'row': 59}, page_content='序號: 59\\n區域: 士林區\\n店家: 川邸鍋物專門\\n報名組別: 經典鍋物組(799以下)\\n營業地址: 臺北市士林區承德路四段79號1樓\\nLongitude: 121.5232675\\nLatitude: 25.08314465'), 0.41156936)\n",
662 |       "(Document(metadata={'source': './data/2023-taipei-hot pot-restaurant.csv', 'row': 52}, page_content='序號: 52\\n區域: 信義區\\n店家: 王鍋屋Shabu ong 京都風味酸白菜鍋專門店\\n報名組別: 饗宴鍋物組(800以上)\\n營業地址: 臺北市信義區逸仙路50巷20號1樓\\nLongitude: 121.5626957\\nLatitude: 25.04119888'), 0.44357985)\n"
663 |      ]
664 |     }
665 |    ],
666 |    "source": [
667 |     "# 直接傳入查詢字串方式來進行相似度搜尋\n",
668 |     "query = \"士林\"\n",
669 |     "docs_and_scores = db.similarity_search_with_score(query)\n",
670 |     "\n",
671 |     "# 顯示 docs_and_scores 全部內容\n",
672 |     "for doc in docs_and_scores:\n",
673 |     "    print(doc)  \n"
674 |    ]
675 |   },
676 |   {
677 |    "cell_type": "code",
678 |    "execution_count": 120,
679 |    "metadata": {},
680 |    "outputs": [
681 |     {
682 |      "name": "stdout",
683 |      "output_type": "stream",
684 |      "text": [
685 |       "page_content='序號: 1\n",
686 |       "區域: 中山區\n",
687 |       "店家: 滿堂紅頂級麻辣鴛鴦鍋\n",
688 |       "報名組別: 經典鍋物組(799以下)\n",
689 |       "營業地址: 臺北市中山區松江路185號2樓\n",
690 |       "Longitude: 121.5331715\n",
691 |       "Latitude: 25.05724095' metadata={'source': './data/2023-taipei-hot pot-restaurant.csv', 'row': 1}\n",
692 |       "page_content='序號: 30\n",
693 |       "區域: 松山區\n",
694 |       "店家: 芳朵麻辣鍋 Fondue M spicy pot\n",
695 |       "報名組別: 饗宴鍋物組(800以上)\n",
696 |       "營業地址: 臺北市松山區光復北路98號2樓\n",
697 |       "Longitude: 121.5574816\n",
698 |       "Latitude: 25.05121754' metadata={'source': './data/2023-taipei-hot pot-restaurant.csv', 'row': 30}\n",
699 |       "page_content='序號: 23\n",
700 |       "區域: 大同區\n",
701 |       "店家: 本鼎堂台式漢方麻辣鍋\n",
702 |       "報名組別: 饗宴鍋物組(800以上)\n",
703 |       "營業地址: 臺北市大同區南京西路277號1樓\n",
704 |       "Longitude: 121.5094827\n",
705 |       "Latitude: 25.05408046' metadata={'source': './data/2023-taipei-hot pot-restaurant.csv', 'row': 23}\n",
706 |       "page_content='序號: 39\n",
707 |       "區域: 大安區\n",
708 |       "店家: 太和殿\n",
709 |       "報名組別: 饗宴鍋物組(800以上)\n",
710 |       "營業地址: 臺北市大安區信義路四段315號1樓\n",
711 |       "Longitude: 121.5560896\n",
712 |       "Latitude: 25.03404847' metadata={'source': './data/2023-taipei-hot pot-restaurant.csv', 'row': 39}\n"
713 |      ]
714 |     }
715 |    ],
716 |    "source": [
717 |     "# 以計算查詢字串向量方式來進行相似度搜尋\n",
718 |     "query = \"滿堂紅\"\n",
719 |     "embedding_vector = embeddings_model.embed_query(query)\n",
720 |     "docs_and_scores = db.similarity_search_by_vector(embedding_vector)\n",
721 |     "\n",
722 |     "# 顯示 docs_and_scores 全部內容\n",
723 |     "for doc in docs_and_scores:\n",
724 |     "    print(doc)  "
725 |    ]
726 |   },
727 |   {
728 |    "cell_type": "markdown",
729 |    "metadata": {},
730 |    "source": [
731 |     "## 6 在 LangChain 內使用工具 (Tool) 自訂延伸功能\n",
732 |     "大型語言模型的快速進步，與人類直接互動能夠處理的工作越來越多，然而大型模型對數值運算類型工作無法確保計算正確，此外如果我們希望應用系統能夠直接與資料庫或現有成熟的 API 互動，來增添大型語言模型能力的不足之處，此時 LangChain 的[工具 (Tool)](https://python.langchain.com/docs/concepts/tool_calling/) 機制；可讓軟體開發人員可以透過適當的工具自訂各種功能，並結合大型語言模型自動選擇呼叫正確的工具並找出正確的參數傳遞給工具，以增添應用系統的能力。\n",
733 |     "\n",
734 |     "以下程式碼示範如何建立一個工具:"
735 |    ]
736 |   },
737 |   {
738 |    "cell_type": "code",
739 |    "execution_count": 2,
740 |    "metadata": {},
741 |    "outputs": [],
742 |    "source": [
743 |     "from langchain_core.tools import tool\n",
744 |     "\n",
745 |     "# 建立一個名為 squarekm_to_ping 的工具，可將面積平方公里轉換為台灣常用的坪數\n",
746 |     "@tool\n",
747 |     "def squarekm_to_ping(squarekm:int) -> str:\n",
748 |     "    \"\"\"the squarekm_to_ping can convert square kilometers to pings\n",
749 |     "    Args:\n",
750 |     "        squarekm (int): square kilometers as input\n",
751 |     "    Returns:\n",
752 |     "        str: pings as output    \n",
753 |     "    \"\"\"\n",
754 |     "\n",
755 |     "    return str(squarekm * 302500)\n",
756 |     "\n",
757 |     "# 建立一個名為 squarekm_to_acres 的工具，可將面積平方公里轉換為英畝\n",
758 |     "@tool\n",
759 |     "def squarekm_to_acres(squarekm:int) -> str:\n",
760 |     "    \"\"\"the squarekm_to_acres can convert square kilometers to acres\n",
761 |     "    Args:\n",
762 |     "        squarekm (int): square kilometers as input\n",
763 |     "    Returns:\n",
764 |     "        str: acres as output    \n",
765 |     "    \"\"\"\n",
766 |     "\n",
767 |     "    return str(squarekm * 247.105)"
768 |    ]
769 |   },
770 |   {
771 |    "cell_type": "markdown",
772 |    "metadata": {},
773 |    "source": [
774 |     "以下程式碼示範如何綁定一個工具或多個工具到 LangChain 所建立的大型語言模型上。"
775 |    ]
776 |   },
777 |   {
778 |    "cell_type": "code",
779 |    "execution_count": null,
780 |    "metadata": {},
781 |    "outputs": [
782 |     {
783 |      "name": "stdout",
784 |      "output_type": "stream",
785 |      "text": [
786 |       "13612500\n",
787 |       "content='' additional_kwargs={'tool_calls': [{'id': 'call_dWMDHrz73Ue77TTlpsCpogM4', 'function': {'arguments': '{\"squarekm\":49}', 'name': 'squarekm_to_acres'}, 'type': 'function'}], 'refusal': None} response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 142, 'total_tokens': 160, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_b705f0c291', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'tool_calls', 'logprobs': None, 'content_filter_results': {}} id='run-71b30dfb-3274-4c09-8787-6eec48f247b5-0' tool_calls=[{'name': 'squarekm_to_acres', 'args': {'squarekm': 49}, 'id': 'call_dWMDHrz73Ue77TTlpsCpogM4', 'type': 'tool_call'}] usage_metadata={'input_tokens': 142, 'output_tokens': 18, 'total_tokens': 160, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n",
788 |       "[{'name': 'squarekm_to_acres', 'args': {'squarekm': 49}, 'id': 'call_dWMDHrz73Ue77TTlpsCpogM4', 'type': 'tool_call'}]\n",
789 |       "squarekm_to_acres\n",
790 |       "{'squarekm': 49}\n",
791 |       "12108.144999999999 英畝\n"
792 |      ]
793 |     }
794 |    ],
795 |    "source": [
796 |     "# 由於 tool 實作了 runnable 介面 ，可以直接呼叫 invoke 以測試工具呼叫結果\n",
797 |     "result = squarekm_to_ping.invoke({\"squarekm\": 45})\n",
798 |     "print (result)\n",
799 |     "\n",
800 |     "# tools 定義可以使用使用哪些工具\n",
801 |     "tools = [squarekm_to_ping,squarekm_to_acres]\n",
802 |     "\n",
803 |     "# 使用 Azure OpenAI Service 建立一個 chat 模型，並將工具綁定 (Tool binding) 此 chat 模型 \n",
804 |     "chat = AzureChatOpenAI(azure_deployment=chat_model,api_version=api_ver)\n",
805 |     "model_with_tools = chat.bind_tools(tools)\n",
806 |     "\n",
807 |     "# 呼叫工具 \n",
808 |     "response = model_with_tools.invoke(\"蘭嶼面積有多少英畝?\")\n",
809 |     "# 顯示回應內容\n",
810 |     "print(response)\n",
811 |     "# 顯示回應中針對 tool_calls 的解析結果\n",
812 |     "print(response.tool_calls)\n",
813 |     "# 顯示應該呼叫的 Tool 名稱\n",
814 |     "print(response.tool_calls[0][\"name\"])\n",
815 |     "# 顯示應該呼叫的 Tool 參數\n",
816 |     "print(response.tool_calls[0][\"args\"])\n",
817 |     "\n",
818 |     "# 依據回應結果選擇對應之工具，並傳入正確之參數得到最終結果\n",
819 |     "selected_tool = {\"squarekm_to_ping\": squarekm_to_ping, \"squarekm_to_acres\": squarekm_to_acres}[response.tool_calls[0][\"name\"].lower()]\n",
820 |     "tool_msg = selected_tool.invoke(response.tool_calls[0][\"args\"])\n",
821 |     "print( tool_msg + \" 英畝\")\n"
822 |    ]
823 |   },
824 |   {
825 |    "cell_type": "markdown",
826 |    "metadata": {},
827 |    "source": [
828 |     "也可以利用 Chain 讓程式碼簡潔一些"
829 |    ]
830 |   },
831 |   {
832 |    "cell_type": "code",
833 |    "execution_count": null,
834 |    "metadata": {},
835 |    "outputs": [
836 |     {
837 |      "name": "stdout",
838 |      "output_type": "stream",
839 |      "text": [
840 |       "13612500 坪\n"
841 |      ]
842 |     }
843 |    ],
844 |    "source": [
845 |     "# 使用 Azure OpenAI Service 建立一個 chat 模型\n",
846 |     "chat = AzureChatOpenAI(azure_deployment=chat_model,api_version=api_ver)\n",
847 |     "\n",
848 |     "# 並將可將面積平方公里轉換為台灣常用的坪數工具 squarekm_to_ping 綁定 (Tool binding) 此 chat 模型 \n",
849 |     "model_with_tools = chat.bind_tools([squarekm_to_ping])\n",
850 |     "\n",
851 |     "# 設定 Output Parser 為 StrOutputParser \n",
852 |     "parser = StrOutputParser ()\n",
853 |     "\n",
854 |     "# 將所有 Chain 串連在一起\n",
855 |     "chain = model_with_tools | (lambda x: x.tool_calls[0][\"args\"]) | squarekm_to_ping | parser\n",
856 |     "response = chain.invoke(\"蘭嶼面積有多少坪?\")\n",
857 |     "\n",
858 |     "# 顯示回應內容\n",
859 |     "print(response +\" 坪\")"
860 |    ]
861 |   }
862 |  ],
863 |  "metadata": {
864 |   "kernelspec": {
865 |    "display_name": "Python 3",
866 |    "language": "python",
867 |    "name": "python3"
868 |   },
869 |   "language_info": {
870 |    "codemirror_mode": {
871 |     "name": "ipython",
872 |     "version": 3
873 |    },
874 |    "file_extension": ".py",
875 |    "mimetype": "text/x-python",
876 |    "name": "python",
877 |    "nbconvert_exporter": "python",
878 |    "pygments_lexer": "ipython3",
879 |    "version": "3.11.9"
880 |   },
881 |   "orig_nbformat": 4
882 |  },
883 |  "nbformat": 4,
884 |  "nbformat_minor": 2
885 | }
886 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Tom Lee
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 關於本 Python Notebook 
 2 | 
 3 | 內容是修改自 https://github.com/Azure/azure-openai-samples/tree/main/quick_start ，旨在協助台灣開發人員快速了解 Azure OpenAI Service 基本開發概念
 4 | 
 5 | ## 環境準備
 6 | 
 7 | - 備妥 Microsoft Azure 訂閱帳號
 8 | - 已經申請核准建立妥 Azure OpenAI Service 資源
 9 | - 已經於 Azure AI Studio 內建立好以下模型之部署 (deployment)
10 |     + text-embedding-ada-002
11 |     + gpt-35-turbo-instruct
12 |     + gpt-4o
13 | 
14 | - 備妥 Python 3.11 與 Jupyter Notebook 相容之編輯執行環境
15 | 
16 | 所需的三種模型，可點選下圖 [Azure AI Studio](https://oai.azure.com/portal) 標示之 **Deployments** 選項，即可依序建立部署，請紀錄模型之部署名稱，後續需要輸入環境變數之中。
17 | 
18 | ![Azure AI Studio 中的模型部署選項](./assets/azure-ai-studio.png)
19 | 
20 | 
21 | ## 設定作業系統之環境變數
22 | 當建妥 Azure OPENAI Service，透過 Azure Portal 取得呼叫所需的 API 鍵值與呼叫端點，設定以下數個環境變數
23 | - **AZURE_OPENAI_ENDPOINT** Azure OpenAI Service 呼叫端點之環境變數
24 | - **AZURE_OPENAI_API_KEY**  Azure OpenAI Service 之 API 鍵值之環境變數
25 | - **AZURE_OPENAI_API_VERSION** Azure OpenAI Service 的 OpenAI API 版本之環境變數
26 | - **CHAT_DEPLOYMENT_NAME**  能夠支援 Chat Completions API 的對話式語言模型部署名稱之環境變數
27 | - **COMPLETIONS_DEPLOYMENT_NAME** 能夠支援 Completions API 的語言模型之環境變數
28 | - **EMBEDDINS_DEPLOYMENT_NAME** 能夠支援 Embeddings API 的語言模型之環境變數
29 | 
30 | 點選下圖 Azure Portal 標示選項可以取得 Azure OpenAI Service 呼叫端點與 API 鍵值。
31 | 
32 | ![Azure OpenAI Service 呼叫端點與 API 鍵值](./assets/azure-portal.png)
33 | 
34 | 若不想透過環境變數設定，則可直接修改本 Notebook 中的程式碼，例如:
35 | 
36 |  ```python
37 | API_KEY = os.getenv('AZURE_OPENAI_API_KEY','1234567890abcdef1234567890abcdef')
38 | RESOURCE_ENDPOINT = os.getenv('AZURE_OPENAI_ENDPOINT','https://<您的 Azure OpenAI 資源名稱>.openai.azure.com/')
39 | MODEL = os.getenv('CHAT_DEPLOYMENT_NAME','gpt-4o')
40 | openai.api_version = os.getenv('AZURE_OPENAI_API_VERSION','2023-05-15')
41 | ```
42 | 
43 | 本範例採用了 Python dotenv 套件，環境變數也可以寫在 .env 檔案中，例如:
44 | 
45 | ```bash
46 | AZURE_OPENAI_ENDPOINT=https://<您的 Azure OpenAI 資源名稱>.openai.azure.com/
47 | AZURE_OPENAI_API_KEY=1234567890abcdef1234567890abcdef
48 | AZURE_OPENAI_API_VERSION=2023-05-15
49 | CHAT_DEPLOYMENT_NAME=gpt-4o
50 | COMPLETIONS_DEPLOYMENT_NAME=gpt-35-turbo-instruct
51 | EMBEDDINS_DEPLOYMENT_NAME=text-embedding-ada-002
52 | ```
53 | ## 逐一執行本 Notebook 中的程式碼


--------------------------------------------------------------------------------
/assets/azure-ai-studio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomleetaiwan/azure_openai_quick_start/b821db1a56a4bca31ee14de85047bd77bee4a85a/assets/azure-ai-studio.png


--------------------------------------------------------------------------------
/assets/azure-portal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomleetaiwan/azure_openai_quick_start/b821db1a56a4bca31ee14de85047bd77bee4a85a/assets/azure-portal.png


--------------------------------------------------------------------------------
/assets/chain-concept.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomleetaiwan/azure_openai_quick_start/b821db1a56a4bca31ee14de85047bd77bee4a85a/assets/chain-concept.png


--------------------------------------------------------------------------------
/data/2023-taipei-hot pot-restaurant.csv:
--------------------------------------------------------------------------------
 1 | ﻿序號,區域,店家,報名組別,營業地址,Longitude,Latitude
 2 | 1,中山區,滿堂紅頂級麻辣鴛鴦鍋,經典鍋物組(799以下),臺北市中山區松江路185號2樓,121.5331715,25.05724095
 3 | 2,中山區,鎰鼎火鍋,饗宴鍋物組(800以上),臺北市中山區松江路38巷2號1樓,121.5326471,25.04828511
 4 | 3,中山區,?極宴-私廚鍋物,饗宴鍋物組(800以上),臺北市中山區長安東路二段218號,121.5422927,25.04894453
 5 | 4,中山區,泰滾_泰式火鍋,饗宴鍋物組(800以上),臺北市中山區南京東路二段178號B1,121.5353367,25.05262148
 6 | 5,中山區,侯黑鍋物HoHey HotPot,經典鍋物組(799以下),臺北市中山區四平街19號,121.5313514,25.05448272
 7 | 6,中山區,土狗樂市,經典鍋物組(799以下),臺北市中山區建國北路2段3巷17號,121.5378786,25.05379388
 8 | 7,中山區,肉老大頂級肉品涮涮鍋,經典鍋物組(799以下),臺北市中山區錦西街11號,121.5211918,25.0612523
 9 | 8,中山區,如嬌　花膠雞．鍋物,饗宴鍋物組(800以上),臺北市中山區民權東路三段60巷9號1樓,121.5425161,25.0617422
10 | 9,中山區,火鍋106-粵式豬肚煲鍋,饗宴鍋物組(800以上),臺北市中山區吉林路18號1樓,121.5302127,25.05160497
11 | 10,中山區,宣牛溫體牛肉火鍋,經典鍋物組(799以下),臺北市中山區復興北路292號,121.5436193,25.06030601
12 | 11,中山區,享鍋酒館,饗宴鍋物組(800以上),臺北市中山區中山北路二段74號,121.5226357,25.05770879
13 | 12,中山區,汆食 作伙鍋,經典鍋物組(799以下),臺北市中山區中山北路三段26號1樓,121.5219642,25.06627328
14 | 13,中山區,手工殿麻辣鍋物,饗宴鍋物組(800以上),臺北市中山區長安東路一段53巷1-1號1樓,121.5265534,25.05046576
15 | 14,中山區,蔡記隆府龍頭寺老灶火鍋,饗宴鍋物組(800以上),臺北市中山區新生北路2段58巷6號,121.5265963,25.05120445
16 | 15,中山區,好食多涮涮鍋南西店,饗宴鍋物組(800以上),臺北市中山區南京西路5-1號B1,121.521572,25.05352023
17 | 16,萬華區,食焱廠創意鍋物 DELECTABLE HOT POT LAB,經典鍋物組(799以下),臺北市萬華區昆明街146號1樓,121.5046808,25.04331516
18 | 17,萬華區,喜多鍋活體海鮮鍋物,饗宴鍋物組(800以上),臺北市萬華區武昌街二段124-1號1樓,121.5040691,25.04630255
19 | 18,萬華區,新千葉火鍋-西門店,經典鍋物組(799以下),臺北市萬華區峨嵋街52號7樓,121.505634,25.04452102
20 | 19,萬華區,築間幸福鍋物 台北西門町店,經典鍋物組(799以下),臺北市萬華區昆明街92-1號2樓,121.505021,25.04593317
21 | 20,萬華區,打狗霸,饗宴鍋物組(800以上),臺北市萬華區昆明街46號,121.5057929,25.04748348
22 | 21,萬華區,前鎮水產,饗宴鍋物組(800以上),臺北市萬華區昆明街46號,121.5057929,25.04748348
23 | 22,大同區,荖子鍋,經典鍋物組(799以下),臺北市大同區重慶北路二段171號1樓 (家樂福重慶店一樓美食街),121.5138783,25.05987721
24 | 23,大同區,本鼎堂台式漢方麻辣鍋,饗宴鍋物組(800以上),臺北市大同區南京西路277號1樓,121.5094827,25.05408046
25 | 24,中正區,藍象廷泰式火鍋,經典鍋物組(799以下),臺北市中正區忠孝西路1段66號B1,121.5152763,25.04645955
26 | 25,中正區,八海食潮 當代鍋物,經典鍋物組(799以下),臺北市中正區北平西路3號2樓,121.5167837,25.04875721
27 | 26,松山區,紅九九個人麻辣鴛鴦鍋,經典鍋物組(799以下),臺北市松山區南京東路四段179巷3號,121.5564541,25.05265234
28 | 27,松山區,Yuan.Hot Pot 原火鍋,經典鍋物組(799以下),臺北市松山區南京東路五段66巷18號1樓,121.5606527,25.05071995
29 | 28,松山區,樂崎火鍋 民生店,饗宴鍋物組(800以上),臺北市松山區民生東路四段55巷3弄14號,121.5509671,25.05913752
30 | 29,松山區,石堂潮汕石頭火鍋,經典鍋物組(799以下),臺北市松山區八德路三段130號,121.5546301,25.04891697
31 | 30,松山區,芳朵麻辣鍋 Fondue M spicy pot,饗宴鍋物組(800以上),臺北市松山區光復北路98號2樓,121.5574816,25.05121754
32 | 31,松山區,石石鍋創,經典鍋物組(799以下),臺北市松山區敦化北路155巷12號1樓,121.5507363,25.05526694
33 | 32,大安區,東豐東鍋物,饗宴鍋物組(800以上),臺北市大安區東豐街37號,121.5460458,25.0369825
34 | 33,大安區,花麻辣好雞飯堂,經典鍋物組(799以下),臺北市大安區基隆路2段254號,121.5501314,25.02341177
35 | 34,大安區,囍聚精緻鍋物,饗宴鍋物組(800以上),臺北市大安區光復南路72巷7弄17號,121.5566918,25.04500955
36 | 35,大安區,COCA泰式海鮮火鍋,饗宴鍋物組(800以上),臺北市大安區忠孝東路四段216巷8弄2號1樓,121.5527308,25.04134765
37 | 36,大安區,草原風蒙古火鍋,饗宴鍋物組(800以上),臺北市大安區永康街37巷18號,121.5300328,25.03157039
38 | 37,大安區,冷藏肉專門 鍋無敵 Nabe Muteki,經典鍋物組(799以下),臺北市大安區光復南路290巷53號,121.5551827,25.03985896
39 | 38,大安區,品湯。白色麻辣鍋專賣,經典鍋物組(799以下),臺北市大安區通化街24巷3號,121.5537292,25.03262375
40 | 39,大安區,太和殿,饗宴鍋物組(800以上),臺北市大安區信義路四段315號1樓,121.5560896,25.03404847
41 | 40,大安區,大安9號鍋物。鐵板燒,饗宴鍋物組(800以上),臺北市大安區仁愛路四段27巷9號1樓,121.5450348,25.04041275
42 | 41,大安區,三藏和牛火鍋,饗宴鍋物組(800以上),臺北市大安區忠孝東路三段251巷8弄2號,121.5410982,25.04375622
43 | 42,大安區,澳門贏到足 鍋物私房菜,饗宴鍋物組(800以上),臺北市大安區敦化南路2段11巷2號1樓,121.549634,25.03331891
44 | 43,大安區,竹苑 shabu,饗宴鍋物組(800以上),臺北市大安區大安路1段31巷31號,121.5475756,25.04466464
45 | 44,大安區,養心殿精緻鍋物,饗宴鍋物組(800以上),臺北市大安區市民大道四段110號1樓,121.5505838,25.04554317
46 | 45,大安區,秀花鍋 Showhua Hotpot 自家海鮮專門,饗宴鍋物組(800以上),臺北市大安區復興南路107巷5弄29號,121.5444384,25.04442534
47 | 46,大安區,饕鍋,經典鍋物組(799以下),臺北市大安區復興南路1段269號2樓,121.5438913,25.03614362
48 | 47,大安區,鼎旺麻辣鍋,饗宴鍋物組(800以上),臺北市大安區大安路一段251號(一店),121.5460805,25.03456389
49 | 48,大安區,洞天 粵式煲湯獨享鍋,經典鍋物組(799以下),臺北市大安區敦化南路二段81巷35號,121.550247,25.03088313
50 | 49,大安區,牛本色溫體牛火鍋,饗宴鍋物組(800以上),臺北市大安區市民大道4段78號,121.5480705,25.04570855
51 | 50,大安區,小膳香成都麻辣火鍋,饗宴鍋物組(800以上),臺北市大安區忠孝東路四段223巷 40-2號,121.5529413,25.04414246
52 | 51,大安區,東華川府重慶老火鍋,饗宴鍋物組(800以上),臺北市大安區仁愛路四段345巷4弄313號1樓,121.5542324,25.03977095
53 | 52,信義區,王鍋屋Shabu ong 京都風味酸白菜鍋專門店,饗宴鍋物組(800以上),臺北市信義區逸仙路50巷20號1樓,121.5626957,25.04119888
54 | 53,信義區,小川鍋物,經典鍋物組(799以下),臺北市信義區松?路19號B2,121.5667212,25.04063684
55 | 54,信義區,養鍋 台北松菸店,饗宴鍋物組(800以上),臺北市信義區忠孝東路四段559巷28號,121.563966,25.04335768
56 | 55,信義區,林柏食?涮涮鍋,經典鍋物組(799以下),臺北市信義區松隆路333號,121.5794023,25.04933901
57 | 56,信義區,初衷小鹿,饗宴鍋物組(800以上),臺北市信義區忠孝東路四段553巷6弄15號,121.5631451,25.04335757
58 | 57,信義區,撈王鍋物料理,饗宴鍋物組(800以上),臺北市信義區松壽路12號9樓,121.565766,25.03597066
59 | 58,信義區,優喜鍋物專門店,饗宴鍋物組(800以上),臺北市信義區基隆路一段147巷5弄5號1樓,121.5660742,25.04356233
60 | 59,士林區,川邸鍋物專門,經典鍋物組(799以下),臺北市士林區承德路四段79號1樓,121.5232675,25.08314465
61 | 60,士林區,錢都日式涮涮鍋-士林芝山店,經典鍋物組(799以下),臺北市士林區福國路71號-1,121.5232239,25.10250454
62 | 61,士林區,藏王極上鍋物,饗宴鍋物組(800以上),臺北市士林區忠誠路二段55號3樓,121.5310986,25.11282254
63 | 62,北投區,貳房頂級鍋物,饗宴鍋物組(800以上),臺北市北投區大業路717-3號,121.5017524,25.13826051
64 | 63,文山區,辣椒多一點,經典鍋物組(799以下),臺北市文山區興隆路二段245號1樓,121.551458,25.00272953
65 | 64,南港區,22：02火鍋,經典鍋物組(799以下),臺北市南港區忠孝東路七段369號 C棟10樓,121.6046467,25.05337428
66 | 


--------------------------------------------------------------------------------