├── Gradient.ipynb
├── README.md
├── Slides.pdf
└── data
    └── aoliao.txt


/Gradient.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "79a726c5",
  7 |    "metadata": {},
  8 |    "outputs": [
  9 |     {
 10 |      "name": "stdout",
 11 |      "output_type": "stream",
 12 |      "text": [
 13 |       "Note: you may need to restart the kernel to use updated packages.\n",
 14 |       "Note: you may need to restart the kernel to use updated packages.\n"
 15 |      ]
 16 |     }
 17 |    ],
 18 |    "source": [
 19 |     "%pip install llama-index --quiet\n",
 20 |     "%pip install gradientai --quiet\n",
 21 |     "%mkdir -p data"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 2,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "# Create a document containing information about 奥利奥, the channel's mascotte.s\n",
 31 |     "with open('data/aoliao.txt', 'w') as f:\n",
 32 |     "    f.write(\"奥利奥 is a 3 years old cat that teaches deep learning models on its YouTube channel along with his friend Umar Jamil.\\n\" +\n",
 33 |     "            \"They're both passionate about machine learning and deep learning, and 奥利奥 is very fast in learning new concepts.\\n\" +\n",
 34 |     "            \"So far, the duo has made videos on Large Language Models, Stable Diffusion and Transformer models, including the popular model LLaMA 2.\\n\" +\n",
 35 |     "            \"Apart from machine learning, 奥利奥 likes to play with his friend Umar, especially when he is recording videos for their YouTube channel.\\n\")"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 1,
 41 |    "id": "4baffaa2",
 42 |    "metadata": {},
 43 |    "outputs": [
 44 |     {
 45 |      "ename": "UnauthorizedException",
 46 |      "evalue": "(401)\nReason: Unauthorized\nHTTP response headers: HTTPHeaderDict({'strict-transport-security': 'max-age=63072000; includeSubDomains; preload', 'referrer-policy': 'no-referrer', 'x-content-type-options': 'nosniff', 'x-download-options': 'noopen', 'x-frame-options': 'SAMEORIGIN', 'x-xss-protection': '1; mode=block', 'content-type': 'application/json; charset=utf-8', 'etag': '\"hwxzxeosy8q\"', 'vary': 'Accept-Encoding', 'X-Cloud-Trace-Context': '0cfb129e0a8afd4ebe932c81e9e12595;o=1', 'Date': 'Thu, 23 Nov 2023 03:19:17 GMT', 'Server': 'Google Frontend', 'Content-Length': '26', 'Via': '1.1 google', 'Alt-Svc': 'h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000'})\nHTTP response body: {\"message\":\"Unauthorized\"}\n",
 47 |      "output_type": "error",
 48 |      "traceback": [
 49 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 50 |       "\u001b[0;31mUnauthorizedException\u001b[0m                     Traceback (most recent call last)",
 51 |       "\u001b[1;32m/home/kira/projects/retrieval-augmented-generation-notes/Gradient.ipynb Cell 3\u001b[0m line \u001b[0;36m1\n\u001b[1;32m     <a href='vscode-notebook-cell://wsl%2Bubuntu/home/kira/projects/retrieval-augmented-generation-notes/Gradient.ipynb#W2sdnNjb2RlLXJlbW90ZQ%3D%3D?line=9'>10</a>\u001b[0m question \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mDo you know anyone named 奥利奥?\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m     <a href='vscode-notebook-cell://wsl%2Bubuntu/home/kira/projects/retrieval-augmented-generation-notes/Gradient.ipynb#W2sdnNjb2RlLXJlbW90ZQ%3D%3D?line=11'>12</a>\u001b[0m \u001b[39m# You can also use a model adapter you've trained with GradientModelAdapterLLM\u001b[39;00m\n\u001b[0;32m---> <a href='vscode-notebook-cell://wsl%2Bubuntu/home/kira/projects/retrieval-augmented-generation-notes/Gradient.ipynb#W2sdnNjb2RlLXJlbW90ZQ%3D%3D?line=12'>13</a>\u001b[0m llm \u001b[39m=\u001b[39m GradientBaseModelLLM(base_model_slug\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mllama2-7b-chat\u001b[39;49m\u001b[39m\"\u001b[39;49m,max_tokens\u001b[39m=\u001b[39;49m\u001b[39m100\u001b[39;49m)\n\u001b[1;32m     <a href='vscode-notebook-cell://wsl%2Bubuntu/home/kira/projects/retrieval-augmented-generation-notes/Gradient.ipynb#W2sdnNjb2RlLXJlbW90ZQ%3D%3D?line=14'>15</a>\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mWithout RAG: \u001b[39m\u001b[39m{\u001b[39;00mllm\u001b[39m.\u001b[39mcomplete(question)\u001b[39m.\u001b[39mtext\u001b[39m.\u001b[39mstrip()\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m)\n\u001b[1;32m     <a href='vscode-notebook-cell://wsl%2Bubuntu/home/kira/projects/retrieval-augmented-generation-notes/Gradient.ipynb#W2sdnNjb2RlLXJlbW90ZQ%3D%3D?line=15'>16</a>\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39m'\u001b[39m)\n",
 52 |       "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/llama_index/llms/gradient.py:133\u001b[0m, in \u001b[0;36mGradientBaseModelLLM.__init__\u001b[0;34m(self, access_token, base_model_slug, host, max_tokens, workspace_id, callback_manager, is_chat_model)\u001b[0m\n\u001b[1;32m    112\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\n\u001b[1;32m    113\u001b[0m     \u001b[39mself\u001b[39m,\n\u001b[1;32m    114\u001b[0m     \u001b[39m*\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    121\u001b[0m     is_chat_model: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m,\n\u001b[1;32m    122\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    123\u001b[0m     \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\n\u001b[1;32m    124\u001b[0m         access_token\u001b[39m=\u001b[39maccess_token,\n\u001b[1;32m    125\u001b[0m         base_model_slug\u001b[39m=\u001b[39mbase_model_slug,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    130\u001b[0m         is_chat_model\u001b[39m=\u001b[39mis_chat_model,\n\u001b[1;32m    131\u001b[0m     )\n\u001b[0;32m--> 133\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_model \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_gradient\u001b[39m.\u001b[39;49mget_base_model(\n\u001b[1;32m    134\u001b[0m         base_model_slug\u001b[39m=\u001b[39;49mbase_model_slug,\n\u001b[1;32m    135\u001b[0m     )\n",
 53 |       "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/_gradient.py:92\u001b[0m, in \u001b[0;36mGradient.get_base_model\u001b[0;34m(self, base_model_slug)\u001b[0m\n\u001b[1;32m     91\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mget_base_model\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39m*\u001b[39m, base_model_slug: \u001b[39mstr\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m BaseModel:\n\u001b[0;32m---> 92\u001b[0m     models: List[BaseModel] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mlist_models(only_base\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n\u001b[1;32m     93\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mnext\u001b[39m(\n\u001b[1;32m     94\u001b[0m         \u001b[39mfilter\u001b[39m(\u001b[39mlambda\u001b[39;00m model: model\u001b[39m.\u001b[39m_slug \u001b[39m==\u001b[39m base_model_slug, models)\n\u001b[1;32m     95\u001b[0m     )\n",
 54 |       "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/_gradient.py:118\u001b[0m, in \u001b[0;36mGradient.list_models\u001b[0;34m(self, only_base)\u001b[0m\n\u001b[1;32m    117\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mlist_models\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39m*\u001b[39m, only_base: \u001b[39mbool\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m List[Model]:  \u001b[39m# type: ignore\u001b[39;00m\n\u001b[0;32m--> 118\u001b[0m     response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_models_api\u001b[39m.\u001b[39;49mlist_models(\n\u001b[1;32m    119\u001b[0m         x_gradient_workspace_id\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_workspace_id, only_base\u001b[39m=\u001b[39;49monly_base\n\u001b[1;32m    120\u001b[0m     )\n\u001b[1;32m    122\u001b[0m     \u001b[39mdef\u001b[39;00m \u001b[39mdeserialize_model\u001b[39m(\n\u001b[1;32m    123\u001b[0m         api_model: Any,\n\u001b[1;32m    124\u001b[0m     ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Model:\n\u001b[1;32m    125\u001b[0m         \u001b[39mif\u001b[39;00m api_model\u001b[39m.\u001b[39mtype \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mbaseModel\u001b[39m\u001b[39m\"\u001b[39m:\n",
 55 |       "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/pydantic/decorator.py:40\u001b[0m, in \u001b[0;36mpydantic.decorator.validate_arguments.validate.wrapper_function\u001b[0;34m()\u001b[0m\n",
 56 |       "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/pydantic/decorator.py:134\u001b[0m, in \u001b[0;36mpydantic.decorator.ValidatedFunction.call\u001b[0;34m()\u001b[0m\n",
 57 |       "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/pydantic/decorator.py:206\u001b[0m, in \u001b[0;36mpydantic.decorator.ValidatedFunction.execute\u001b[0;34m()\u001b[0m\n",
 58 |       "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/api/models_api.py:862\u001b[0m, in \u001b[0;36mModelsApi.list_models\u001b[0;34m(self, x_gradient_workspace_id, only_base, **kwargs)\u001b[0m\n\u001b[1;32m    860\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39m'\u001b[39m\u001b[39m_preload_content\u001b[39m\u001b[39m'\u001b[39m \u001b[39min\u001b[39;00m kwargs:\n\u001b[1;32m    861\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mError! Please call the list_models_with_http_info method with `_preload_content` instead and obtain raw data from ApiResponse.raw_data\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m--> 862\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mlist_models_with_http_info(x_gradient_workspace_id, only_base, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
 59 |       "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/pydantic/decorator.py:40\u001b[0m, in \u001b[0;36mpydantic.decorator.validate_arguments.validate.wrapper_function\u001b[0;34m()\u001b[0m\n",
 60 |       "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/pydantic/decorator.py:134\u001b[0m, in \u001b[0;36mpydantic.decorator.ValidatedFunction.call\u001b[0;34m()\u001b[0m\n",
 61 |       "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/pydantic/decorator.py:206\u001b[0m, in \u001b[0;36mpydantic.decorator.ValidatedFunction.execute\u001b[0;34m()\u001b[0m\n",
 62 |       "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/api/models_api.py:964\u001b[0m, in \u001b[0;36mModelsApi.list_models_with_http_info\u001b[0;34m(self, x_gradient_workspace_id, only_base, **kwargs)\u001b[0m\n\u001b[1;32m    957\u001b[0m _auth_settings \u001b[39m=\u001b[39m [\u001b[39m'\u001b[39m\u001b[39mAccessToken\u001b[39m\u001b[39m'\u001b[39m]  \u001b[39m# noqa: E501\u001b[39;00m\n\u001b[1;32m    959\u001b[0m _response_types_map \u001b[39m=\u001b[39m {\n\u001b[1;32m    960\u001b[0m     \u001b[39m'\u001b[39m\u001b[39m200\u001b[39m\u001b[39m'\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mListModelsSuccess\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m    961\u001b[0m     \u001b[39m'\u001b[39m\u001b[39m4XX\u001b[39m\u001b[39m'\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mListModelsError\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m    962\u001b[0m }\n\u001b[0;32m--> 964\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mapi_client\u001b[39m.\u001b[39;49mcall_api(\n\u001b[1;32m    965\u001b[0m     \u001b[39m'\u001b[39;49m\u001b[39m/models\u001b[39;49m\u001b[39m'\u001b[39;49m, \u001b[39m'\u001b[39;49m\u001b[39mGET\u001b[39;49m\u001b[39m'\u001b[39;49m,\n\u001b[1;32m    966\u001b[0m     _path_params,\n\u001b[1;32m    967\u001b[0m     _query_params,\n\u001b[1;32m    968\u001b[0m     _header_params,\n\u001b[1;32m    969\u001b[0m     body\u001b[39m=\u001b[39;49m_body_params,\n\u001b[1;32m    970\u001b[0m     post_params\u001b[39m=\u001b[39;49m_form_params,\n\u001b[1;32m    971\u001b[0m     files\u001b[39m=\u001b[39;49m_files,\n\u001b[1;32m    972\u001b[0m     response_types_map\u001b[39m=\u001b[39;49m_response_types_map,\n\u001b[1;32m    973\u001b[0m     auth_settings\u001b[39m=\u001b[39;49m_auth_settings,\n\u001b[1;32m    974\u001b[0m     async_req\u001b[39m=\u001b[39;49m_params\u001b[39m.\u001b[39;49mget(\u001b[39m'\u001b[39;49m\u001b[39masync_req\u001b[39;49m\u001b[39m'\u001b[39;49m),\n\u001b[1;32m    975\u001b[0m     _return_http_data_only\u001b[39m=\u001b[39;49m_params\u001b[39m.\u001b[39;49mget(\u001b[39m'\u001b[39;49m\u001b[39m_return_http_data_only\u001b[39;49m\u001b[39m'\u001b[39;49m),  \u001b[39m# noqa: E501\u001b[39;49;00m\n\u001b[1;32m    976\u001b[0m     _preload_content\u001b[39m=\u001b[39;49m_params\u001b[39m.\u001b[39;49mget(\u001b[39m'\u001b[39;49m\u001b[39m_preload_content\u001b[39;49m\u001b[39m'\u001b[39;49m, \u001b[39mTrue\u001b[39;49;00m),\n\u001b[1;32m    977\u001b[0m     _request_timeout\u001b[39m=\u001b[39;49m_params\u001b[39m.\u001b[39;49mget(\u001b[39m'\u001b[39;49m\u001b[39m_request_timeout\u001b[39;49m\u001b[39m'\u001b[39;49m),\n\u001b[1;32m    978\u001b[0m     collection_formats\u001b[39m=\u001b[39;49m_collection_formats,\n\u001b[1;32m    979\u001b[0m     _request_auth\u001b[39m=\u001b[39;49m_params\u001b[39m.\u001b[39;49mget(\u001b[39m'\u001b[39;49m\u001b[39m_request_auth\u001b[39;49m\u001b[39m'\u001b[39;49m))\n",
 63 |       "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/api_client.py:410\u001b[0m, in \u001b[0;36mApiClient.call_api\u001b[0;34m(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_types_map, auth_settings, async_req, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _request_auth)\u001b[0m\n\u001b[1;32m    368\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Makes the HTTP request (synchronous) and returns deserialized data.\u001b[39;00m\n\u001b[1;32m    369\u001b[0m \n\u001b[1;32m    370\u001b[0m \u001b[39mTo make an async_req request, set the async_req parameter.\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    407\u001b[0m \u001b[39m    then the method will return the response directly.\u001b[39;00m\n\u001b[1;32m    408\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m    409\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m async_req:\n\u001b[0;32m--> 410\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m__call_api(resource_path, method,\n\u001b[1;32m    411\u001b[0m                            path_params, query_params, header_params,\n\u001b[1;32m    412\u001b[0m                            body, post_params, files,\n\u001b[1;32m    413\u001b[0m                            response_types_map, auth_settings,\n\u001b[1;32m    414\u001b[0m                            _return_http_data_only, collection_formats,\n\u001b[1;32m    415\u001b[0m                            _preload_content, _request_timeout, _host,\n\u001b[1;32m    416\u001b[0m                            _request_auth)\n\u001b[1;32m    418\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mpool\u001b[39m.\u001b[39mapply_async(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m__call_api, (resource_path,\n\u001b[1;32m    419\u001b[0m                                                method, path_params,\n\u001b[1;32m    420\u001b[0m                                                query_params,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    428\u001b[0m                                                _request_timeout,\n\u001b[1;32m    429\u001b[0m                                                _host, _request_auth))\n",
 64 |       "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/api_client.py:225\u001b[0m, in \u001b[0;36mApiClient.__call_api\u001b[0;34m(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_types_map, auth_settings, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _request_auth)\u001b[0m\n\u001b[1;32m    223\u001b[0m     \u001b[39mif\u001b[39;00m e\u001b[39m.\u001b[39mbody:\n\u001b[1;32m    224\u001b[0m         e\u001b[39m.\u001b[39mbody \u001b[39m=\u001b[39m e\u001b[39m.\u001b[39mbody\u001b[39m.\u001b[39mdecode(\u001b[39m'\u001b[39m\u001b[39mutf-8\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m--> 225\u001b[0m     \u001b[39mraise\u001b[39;00m e\n\u001b[1;32m    227\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlast_response \u001b[39m=\u001b[39m response_data\n\u001b[1;32m    229\u001b[0m return_data \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m \u001b[39m# assuming derialization is not needed\u001b[39;00m\n",
 65 |       "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/api_client.py:215\u001b[0m, in \u001b[0;36mApiClient.__call_api\u001b[0;34m(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_types_map, auth_settings, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _request_auth)\u001b[0m\n\u001b[1;32m    211\u001b[0m     url \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m?\u001b[39m\u001b[39m\"\u001b[39m \u001b[39m+\u001b[39m url_query\n\u001b[1;32m    213\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m    214\u001b[0m     \u001b[39m# perform request and return response\u001b[39;00m\n\u001b[0;32m--> 215\u001b[0m     response_data \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mrequest(\n\u001b[1;32m    216\u001b[0m         method, url,\n\u001b[1;32m    217\u001b[0m         query_params\u001b[39m=\u001b[39;49mquery_params,\n\u001b[1;32m    218\u001b[0m         headers\u001b[39m=\u001b[39;49mheader_params,\n\u001b[1;32m    219\u001b[0m         post_params\u001b[39m=\u001b[39;49mpost_params, body\u001b[39m=\u001b[39;49mbody,\n\u001b[1;32m    220\u001b[0m         _preload_content\u001b[39m=\u001b[39;49m_preload_content,\n\u001b[1;32m    221\u001b[0m         _request_timeout\u001b[39m=\u001b[39;49m_request_timeout)\n\u001b[1;32m    222\u001b[0m \u001b[39mexcept\u001b[39;00m ApiException \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m    223\u001b[0m     \u001b[39mif\u001b[39;00m e\u001b[39m.\u001b[39mbody:\n",
 66 |       "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/api_client.py:436\u001b[0m, in \u001b[0;36mApiClient.request\u001b[0;34m(self, method, url, query_params, headers, post_params, body, _preload_content, _request_timeout)\u001b[0m\n\u001b[1;32m    434\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Makes the HTTP request using RESTClient.\"\"\"\u001b[39;00m\n\u001b[1;32m    435\u001b[0m \u001b[39mif\u001b[39;00m method \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mGET\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[0;32m--> 436\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mrest_client\u001b[39m.\u001b[39;49mget_request(url,\n\u001b[1;32m    437\u001b[0m                                 query_params\u001b[39m=\u001b[39;49mquery_params,\n\u001b[1;32m    438\u001b[0m                                 _preload_content\u001b[39m=\u001b[39;49m_preload_content,\n\u001b[1;32m    439\u001b[0m                                 _request_timeout\u001b[39m=\u001b[39;49m_request_timeout,\n\u001b[1;32m    440\u001b[0m                                 headers\u001b[39m=\u001b[39;49mheaders)\n\u001b[1;32m    441\u001b[0m \u001b[39melif\u001b[39;00m method \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mHEAD\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[1;32m    442\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mrest_client\u001b[39m.\u001b[39mhead_request(url,\n\u001b[1;32m    443\u001b[0m                                  query_params\u001b[39m=\u001b[39mquery_params,\n\u001b[1;32m    444\u001b[0m                                  _preload_content\u001b[39m=\u001b[39m_preload_content,\n\u001b[1;32m    445\u001b[0m                                  _request_timeout\u001b[39m=\u001b[39m_request_timeout,\n\u001b[1;32m    446\u001b[0m                                  headers\u001b[39m=\u001b[39mheaders)\n",
 67 |       "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/rest.py:243\u001b[0m, in \u001b[0;36mRESTClientObject.get_request\u001b[0;34m(self, url, headers, query_params, _preload_content, _request_timeout)\u001b[0m\n\u001b[1;32m    241\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mget_request\u001b[39m(\u001b[39mself\u001b[39m, url, headers\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, query_params\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, _preload_content\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m,\n\u001b[1;32m    242\u001b[0m         _request_timeout\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m):\n\u001b[0;32m--> 243\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mrequest(\u001b[39m\"\u001b[39;49m\u001b[39mGET\u001b[39;49m\u001b[39m\"\u001b[39;49m, url,\n\u001b[1;32m    244\u001b[0m                         headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m    245\u001b[0m                         _preload_content\u001b[39m=\u001b[39;49m_preload_content,\n\u001b[1;32m    246\u001b[0m                         _request_timeout\u001b[39m=\u001b[39;49m_request_timeout,\n\u001b[1;32m    247\u001b[0m                         query_params\u001b[39m=\u001b[39;49mquery_params)\n",
 68 |       "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/rest.py:226\u001b[0m, in \u001b[0;36mRESTClientObject.request\u001b[0;34m(self, method, url, query_params, headers, body, post_params, _preload_content, _request_timeout)\u001b[0m\n\u001b[1;32m    223\u001b[0m     \u001b[39mraise\u001b[39;00m BadRequestException(http_resp\u001b[39m=\u001b[39mr)\n\u001b[1;32m    225\u001b[0m \u001b[39mif\u001b[39;00m r\u001b[39m.\u001b[39mstatus \u001b[39m==\u001b[39m \u001b[39m401\u001b[39m:\n\u001b[0;32m--> 226\u001b[0m     \u001b[39mraise\u001b[39;00m UnauthorizedException(http_resp\u001b[39m=\u001b[39mr)\n\u001b[1;32m    228\u001b[0m \u001b[39mif\u001b[39;00m r\u001b[39m.\u001b[39mstatus \u001b[39m==\u001b[39m \u001b[39m403\u001b[39m:\n\u001b[1;32m    229\u001b[0m     \u001b[39mraise\u001b[39;00m ForbiddenException(http_resp\u001b[39m=\u001b[39mr)\n",
 69 |       "\u001b[0;31mUnauthorizedException\u001b[0m: (401)\nReason: Unauthorized\nHTTP response headers: HTTPHeaderDict({'strict-transport-security': 'max-age=63072000; includeSubDomains; preload', 'referrer-policy': 'no-referrer', 'x-content-type-options': 'nosniff', 'x-download-options': 'noopen', 'x-frame-options': 'SAMEORIGIN', 'x-xss-protection': '1; mode=block', 'content-type': 'application/json; charset=utf-8', 'etag': '\"hwxzxeosy8q\"', 'vary': 'Accept-Encoding', 'X-Cloud-Trace-Context': '0cfb129e0a8afd4ebe932c81e9e12595;o=1', 'Date': 'Thu, 23 Nov 2023 03:19:17 GMT', 'Server': 'Google Frontend', 'Content-Length': '26', 'Via': '1.1 google', 'Alt-Svc': 'h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000'})\nHTTP response body: {\"message\":\"Unauthorized\"}\n"
 70 |      ]
 71 |     }
 72 |    ],
 73 |    "source": [
 74 |     "from llama_index.llms import GradientBaseModelLLM\n",
 75 |     "from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n",
 76 |     "from llama_index.embeddings import GradientEmbedding\n",
 77 |     "\n",
 78 |     "import os\n",
 79 |     "# Paste values from Gradient's websites\n",
 80 |     "os.environ[\"GRADIENT_ACCESS_TOKEN\"] = \"PASTE YOUR ACCESS TOKEN\"\n",
 81 |     "os.environ[\"GRADIENT_WORKSPACE_ID\"] = \"PASTE YOUR WORKSPACE ID\"\n",
 82 |     "\n",
 83 |     "question = \"Do you know anyone named 奥利奥?\"\n",
 84 |     "\n",
 85 |     "# You can also use a model adapter you've trained with GradientModelAdapterLLM\n",
 86 |     "llm = GradientBaseModelLLM(base_model_slug=\"llama2-7b-chat\",max_tokens=100)\n",
 87 |     "\n",
 88 |     "print(f'Without RAG: {llm.complete(question)}')\n",
 89 |     "print(f'')\n",
 90 |     "\n",
 91 |     "documents = SimpleDirectoryReader(\"./data\").load_data() # Documents to index \n",
 92 |     "embed_model = GradientEmbedding(gradient_model_slug=\"bge-large\") # The model used to generate embeddings\n",
 93 |     "service_context = ServiceContext.from_defaults(chunk_size=1024, llm=llm, embed_model=embed_model) # The service context defines the LLM and the embedding model to be used by the query engine\n",
 94 |     "\n",
 95 |     "index = VectorStoreIndex.from_documents(documents, service_context=service_context)\n",
 96 |     "query_engine = index.as_query_engine()\n",
 97 |     "\n",
 98 |     "response = query_engine.query(question)\n",
 99 |     "print(f'With RAG: {response}')"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": []
108 |   }
109 |  ],
110 |  "metadata": {
111 |   "kernelspec": {
112 |    "display_name": "Python 3.9.12 ('.venv': venv)",
113 |    "language": "python",
114 |    "name": "python3"
115 |   },
116 |   "language_info": {
117 |    "codemirror_mode": {
118 |     "name": "ipython",
119 |     "version": 3
120 |    },
121 |    "file_extension": ".py",
122 |    "mimetype": "text/x-python",
123 |    "name": "python",
124 |    "nbconvert_exporter": "python",
125 |    "pygments_lexer": "ipython3",
126 |    "version": "3.11.3"
127 |   },
128 |   "vscode": {
129 |    "interpreter": {
130 |     "hash": "5ae9fa2777630f93d325d67fd0c37f7375ed1afcb20dd85f425eb8692a47ff3f"
131 |    }
132 |   }
133 |  },
134 |  "nbformat": 4,
135 |  "nbformat_minor": 5
136 | }
137 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # retrieval-augmented-generation-notes
2 | Slides for "Retrieval Augmented Generation" video: https://www.youtube.com/watch?v=rhZgXNdhWDY
3 | 


--------------------------------------------------------------------------------
/Slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hkproj/retrieval-augmented-generation-notes/2ee02798083a63f00e9a8b80f9b1c70a93fed3e2/Slides.pdf


--------------------------------------------------------------------------------
/data/aoliao.txt:
--------------------------------------------------------------------------------
1 | 奥利奥 is a 3 years old cat that teaches deep learning models on its YouTube channel along with his friend Umar Jamil.
2 | They're both passionate about machine learning and deep learning, and 奥利奥 is very fast in learning new concepts.
3 | So far, the duo has made videos on Large Language Models, Stable Diffusion and Transformer models, including the popular model LLaMA 2.
4 | Apart from machine learning, 奥利奥 likes to play with his friend Umar, especially when he is recording videos for their YouTube channel.
5 | 


--------------------------------------------------------------------------------