├── Gradient.ipynb
├── README.md
├── Slides.pdf
└── data
└── aoliao.txt
/Gradient.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "79a726c5",
7 | "metadata": {},
8 | "outputs": [
9 | {
10 | "name": "stdout",
11 | "output_type": "stream",
12 | "text": [
13 | "Note: you may need to restart the kernel to use updated packages.\n",
14 | "Note: you may need to restart the kernel to use updated packages.\n"
15 | ]
16 | }
17 | ],
18 | "source": [
19 | "%pip install llama-index --quiet\n",
20 | "%pip install gradientai --quiet\n",
21 | "%mkdir -p data"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 2,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "# Create a document containing information about 奥利奥, the channel's mascotte.s\n",
31 | "with open('data/aoliao.txt', 'w') as f:\n",
32 | " f.write(\"奥利奥 is a 3 years old cat that teaches deep learning models on its YouTube channel along with his friend Umar Jamil.\\n\" +\n",
33 | " \"They're both passionate about machine learning and deep learning, and 奥利奥 is very fast in learning new concepts.\\n\" +\n",
34 | " \"So far, the duo has made videos on Large Language Models, Stable Diffusion and Transformer models, including the popular model LLaMA 2.\\n\" +\n",
35 | " \"Apart from machine learning, 奥利奥 likes to play with his friend Umar, especially when he is recording videos for their YouTube channel.\\n\")"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 1,
41 | "id": "4baffaa2",
42 | "metadata": {},
43 | "outputs": [
44 | {
45 | "ename": "UnauthorizedException",
46 | "evalue": "(401)\nReason: Unauthorized\nHTTP response headers: HTTPHeaderDict({'strict-transport-security': 'max-age=63072000; includeSubDomains; preload', 'referrer-policy': 'no-referrer', 'x-content-type-options': 'nosniff', 'x-download-options': 'noopen', 'x-frame-options': 'SAMEORIGIN', 'x-xss-protection': '1; mode=block', 'content-type': 'application/json; charset=utf-8', 'etag': '\"hwxzxeosy8q\"', 'vary': 'Accept-Encoding', 'X-Cloud-Trace-Context': '0cfb129e0a8afd4ebe932c81e9e12595;o=1', 'Date': 'Thu, 23 Nov 2023 03:19:17 GMT', 'Server': 'Google Frontend', 'Content-Length': '26', 'Via': '1.1 google', 'Alt-Svc': 'h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000'})\nHTTP response body: {\"message\":\"Unauthorized\"}\n",
47 | "output_type": "error",
48 | "traceback": [
49 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
50 | "\u001b[0;31mUnauthorizedException\u001b[0m Traceback (most recent call last)",
51 | "\u001b[1;32m/home/kira/projects/retrieval-augmented-generation-notes/Gradient.ipynb Cell 3\u001b[0m line \u001b[0;36m1\n\u001b[1;32m 10\u001b[0m question \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mDo you know anyone named 奥利奥?\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 12\u001b[0m \u001b[39m# You can also use a model adapter you've trained with GradientModelAdapterLLM\u001b[39;00m\n\u001b[0;32m---> 13\u001b[0m llm \u001b[39m=\u001b[39m GradientBaseModelLLM(base_model_slug\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mllama2-7b-chat\u001b[39;49m\u001b[39m\"\u001b[39;49m,max_tokens\u001b[39m=\u001b[39;49m\u001b[39m100\u001b[39;49m)\n\u001b[1;32m 15\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mWithout RAG: \u001b[39m\u001b[39m{\u001b[39;00mllm\u001b[39m.\u001b[39mcomplete(question)\u001b[39m.\u001b[39mtext\u001b[39m.\u001b[39mstrip()\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m)\n\u001b[1;32m 16\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39m'\u001b[39m)\n",
52 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/llama_index/llms/gradient.py:133\u001b[0m, in \u001b[0;36mGradientBaseModelLLM.__init__\u001b[0;34m(self, access_token, base_model_slug, host, max_tokens, workspace_id, callback_manager, is_chat_model)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\n\u001b[1;32m 113\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 114\u001b[0m \u001b[39m*\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 121\u001b[0m is_chat_model: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m,\n\u001b[1;32m 122\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 123\u001b[0m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\n\u001b[1;32m 124\u001b[0m access_token\u001b[39m=\u001b[39maccess_token,\n\u001b[1;32m 125\u001b[0m base_model_slug\u001b[39m=\u001b[39mbase_model_slug,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 130\u001b[0m is_chat_model\u001b[39m=\u001b[39mis_chat_model,\n\u001b[1;32m 131\u001b[0m )\n\u001b[0;32m--> 133\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_model \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_gradient\u001b[39m.\u001b[39;49mget_base_model(\n\u001b[1;32m 134\u001b[0m base_model_slug\u001b[39m=\u001b[39;49mbase_model_slug,\n\u001b[1;32m 135\u001b[0m )\n",
53 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/_gradient.py:92\u001b[0m, in \u001b[0;36mGradient.get_base_model\u001b[0;34m(self, base_model_slug)\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mget_base_model\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39m*\u001b[39m, base_model_slug: \u001b[39mstr\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m BaseModel:\n\u001b[0;32m---> 92\u001b[0m models: List[BaseModel] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mlist_models(only_base\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n\u001b[1;32m 93\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mnext\u001b[39m(\n\u001b[1;32m 94\u001b[0m \u001b[39mfilter\u001b[39m(\u001b[39mlambda\u001b[39;00m model: model\u001b[39m.\u001b[39m_slug \u001b[39m==\u001b[39m base_model_slug, models)\n\u001b[1;32m 95\u001b[0m )\n",
54 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/_gradient.py:118\u001b[0m, in \u001b[0;36mGradient.list_models\u001b[0;34m(self, only_base)\u001b[0m\n\u001b[1;32m 117\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mlist_models\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39m*\u001b[39m, only_base: \u001b[39mbool\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m List[Model]: \u001b[39m# type: ignore\u001b[39;00m\n\u001b[0;32m--> 118\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_models_api\u001b[39m.\u001b[39;49mlist_models(\n\u001b[1;32m 119\u001b[0m x_gradient_workspace_id\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_workspace_id, only_base\u001b[39m=\u001b[39;49monly_base\n\u001b[1;32m 120\u001b[0m )\n\u001b[1;32m 122\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mdeserialize_model\u001b[39m(\n\u001b[1;32m 123\u001b[0m api_model: Any,\n\u001b[1;32m 124\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Model:\n\u001b[1;32m 125\u001b[0m \u001b[39mif\u001b[39;00m api_model\u001b[39m.\u001b[39mtype \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mbaseModel\u001b[39m\u001b[39m\"\u001b[39m:\n",
55 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/pydantic/decorator.py:40\u001b[0m, in \u001b[0;36mpydantic.decorator.validate_arguments.validate.wrapper_function\u001b[0;34m()\u001b[0m\n",
56 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/pydantic/decorator.py:134\u001b[0m, in \u001b[0;36mpydantic.decorator.ValidatedFunction.call\u001b[0;34m()\u001b[0m\n",
57 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/pydantic/decorator.py:206\u001b[0m, in \u001b[0;36mpydantic.decorator.ValidatedFunction.execute\u001b[0;34m()\u001b[0m\n",
58 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/api/models_api.py:862\u001b[0m, in \u001b[0;36mModelsApi.list_models\u001b[0;34m(self, x_gradient_workspace_id, only_base, **kwargs)\u001b[0m\n\u001b[1;32m 860\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39m'\u001b[39m\u001b[39m_preload_content\u001b[39m\u001b[39m'\u001b[39m \u001b[39min\u001b[39;00m kwargs:\n\u001b[1;32m 861\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mError! Please call the list_models_with_http_info method with `_preload_content` instead and obtain raw data from ApiResponse.raw_data\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m--> 862\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mlist_models_with_http_info(x_gradient_workspace_id, only_base, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
59 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/pydantic/decorator.py:40\u001b[0m, in \u001b[0;36mpydantic.decorator.validate_arguments.validate.wrapper_function\u001b[0;34m()\u001b[0m\n",
60 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/pydantic/decorator.py:134\u001b[0m, in \u001b[0;36mpydantic.decorator.ValidatedFunction.call\u001b[0;34m()\u001b[0m\n",
61 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/pydantic/decorator.py:206\u001b[0m, in \u001b[0;36mpydantic.decorator.ValidatedFunction.execute\u001b[0;34m()\u001b[0m\n",
62 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/api/models_api.py:964\u001b[0m, in \u001b[0;36mModelsApi.list_models_with_http_info\u001b[0;34m(self, x_gradient_workspace_id, only_base, **kwargs)\u001b[0m\n\u001b[1;32m 957\u001b[0m _auth_settings \u001b[39m=\u001b[39m [\u001b[39m'\u001b[39m\u001b[39mAccessToken\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m# noqa: E501\u001b[39;00m\n\u001b[1;32m 959\u001b[0m _response_types_map \u001b[39m=\u001b[39m {\n\u001b[1;32m 960\u001b[0m \u001b[39m'\u001b[39m\u001b[39m200\u001b[39m\u001b[39m'\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mListModelsSuccess\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 961\u001b[0m \u001b[39m'\u001b[39m\u001b[39m4XX\u001b[39m\u001b[39m'\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mListModelsError\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 962\u001b[0m }\n\u001b[0;32m--> 964\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mapi_client\u001b[39m.\u001b[39;49mcall_api(\n\u001b[1;32m 965\u001b[0m \u001b[39m'\u001b[39;49m\u001b[39m/models\u001b[39;49m\u001b[39m'\u001b[39;49m, \u001b[39m'\u001b[39;49m\u001b[39mGET\u001b[39;49m\u001b[39m'\u001b[39;49m,\n\u001b[1;32m 966\u001b[0m _path_params,\n\u001b[1;32m 967\u001b[0m _query_params,\n\u001b[1;32m 968\u001b[0m _header_params,\n\u001b[1;32m 969\u001b[0m body\u001b[39m=\u001b[39;49m_body_params,\n\u001b[1;32m 970\u001b[0m post_params\u001b[39m=\u001b[39;49m_form_params,\n\u001b[1;32m 971\u001b[0m files\u001b[39m=\u001b[39;49m_files,\n\u001b[1;32m 972\u001b[0m response_types_map\u001b[39m=\u001b[39;49m_response_types_map,\n\u001b[1;32m 973\u001b[0m auth_settings\u001b[39m=\u001b[39;49m_auth_settings,\n\u001b[1;32m 974\u001b[0m async_req\u001b[39m=\u001b[39;49m_params\u001b[39m.\u001b[39;49mget(\u001b[39m'\u001b[39;49m\u001b[39masync_req\u001b[39;49m\u001b[39m'\u001b[39;49m),\n\u001b[1;32m 975\u001b[0m _return_http_data_only\u001b[39m=\u001b[39;49m_params\u001b[39m.\u001b[39;49mget(\u001b[39m'\u001b[39;49m\u001b[39m_return_http_data_only\u001b[39;49m\u001b[39m'\u001b[39;49m), \u001b[39m# noqa: E501\u001b[39;49;00m\n\u001b[1;32m 976\u001b[0m _preload_content\u001b[39m=\u001b[39;49m_params\u001b[39m.\u001b[39;49mget(\u001b[39m'\u001b[39;49m\u001b[39m_preload_content\u001b[39;49m\u001b[39m'\u001b[39;49m, \u001b[39mTrue\u001b[39;49;00m),\n\u001b[1;32m 977\u001b[0m _request_timeout\u001b[39m=\u001b[39;49m_params\u001b[39m.\u001b[39;49mget(\u001b[39m'\u001b[39;49m\u001b[39m_request_timeout\u001b[39;49m\u001b[39m'\u001b[39;49m),\n\u001b[1;32m 978\u001b[0m collection_formats\u001b[39m=\u001b[39;49m_collection_formats,\n\u001b[1;32m 979\u001b[0m _request_auth\u001b[39m=\u001b[39;49m_params\u001b[39m.\u001b[39;49mget(\u001b[39m'\u001b[39;49m\u001b[39m_request_auth\u001b[39;49m\u001b[39m'\u001b[39;49m))\n",
63 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/api_client.py:410\u001b[0m, in \u001b[0;36mApiClient.call_api\u001b[0;34m(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_types_map, auth_settings, async_req, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _request_auth)\u001b[0m\n\u001b[1;32m 368\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Makes the HTTP request (synchronous) and returns deserialized data.\u001b[39;00m\n\u001b[1;32m 369\u001b[0m \n\u001b[1;32m 370\u001b[0m \u001b[39mTo make an async_req request, set the async_req parameter.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[39m then the method will return the response directly.\u001b[39;00m\n\u001b[1;32m 408\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m async_req:\n\u001b[0;32m--> 410\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m__call_api(resource_path, method,\n\u001b[1;32m 411\u001b[0m path_params, query_params, header_params,\n\u001b[1;32m 412\u001b[0m body, post_params, files,\n\u001b[1;32m 413\u001b[0m response_types_map, auth_settings,\n\u001b[1;32m 414\u001b[0m _return_http_data_only, collection_formats,\n\u001b[1;32m 415\u001b[0m _preload_content, _request_timeout, _host,\n\u001b[1;32m 416\u001b[0m _request_auth)\n\u001b[1;32m 418\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mpool\u001b[39m.\u001b[39mapply_async(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m__call_api, (resource_path,\n\u001b[1;32m 419\u001b[0m method, path_params,\n\u001b[1;32m 420\u001b[0m query_params,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 428\u001b[0m _request_timeout,\n\u001b[1;32m 429\u001b[0m _host, _request_auth))\n",
64 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/api_client.py:225\u001b[0m, in \u001b[0;36mApiClient.__call_api\u001b[0;34m(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_types_map, auth_settings, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _request_auth)\u001b[0m\n\u001b[1;32m 223\u001b[0m \u001b[39mif\u001b[39;00m e\u001b[39m.\u001b[39mbody:\n\u001b[1;32m 224\u001b[0m e\u001b[39m.\u001b[39mbody \u001b[39m=\u001b[39m e\u001b[39m.\u001b[39mbody\u001b[39m.\u001b[39mdecode(\u001b[39m'\u001b[39m\u001b[39mutf-8\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m--> 225\u001b[0m \u001b[39mraise\u001b[39;00m e\n\u001b[1;32m 227\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlast_response \u001b[39m=\u001b[39m response_data\n\u001b[1;32m 229\u001b[0m return_data \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m \u001b[39m# assuming derialization is not needed\u001b[39;00m\n",
65 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/api_client.py:215\u001b[0m, in \u001b[0;36mApiClient.__call_api\u001b[0;34m(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_types_map, auth_settings, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _request_auth)\u001b[0m\n\u001b[1;32m 211\u001b[0m url \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m?\u001b[39m\u001b[39m\"\u001b[39m \u001b[39m+\u001b[39m url_query\n\u001b[1;32m 213\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 214\u001b[0m \u001b[39m# perform request and return response\u001b[39;00m\n\u001b[0;32m--> 215\u001b[0m response_data \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mrequest(\n\u001b[1;32m 216\u001b[0m method, url,\n\u001b[1;32m 217\u001b[0m query_params\u001b[39m=\u001b[39;49mquery_params,\n\u001b[1;32m 218\u001b[0m headers\u001b[39m=\u001b[39;49mheader_params,\n\u001b[1;32m 219\u001b[0m post_params\u001b[39m=\u001b[39;49mpost_params, body\u001b[39m=\u001b[39;49mbody,\n\u001b[1;32m 220\u001b[0m _preload_content\u001b[39m=\u001b[39;49m_preload_content,\n\u001b[1;32m 221\u001b[0m _request_timeout\u001b[39m=\u001b[39;49m_request_timeout)\n\u001b[1;32m 222\u001b[0m \u001b[39mexcept\u001b[39;00m ApiException \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 223\u001b[0m \u001b[39mif\u001b[39;00m e\u001b[39m.\u001b[39mbody:\n",
66 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/api_client.py:436\u001b[0m, in \u001b[0;36mApiClient.request\u001b[0;34m(self, method, url, query_params, headers, post_params, body, _preload_content, _request_timeout)\u001b[0m\n\u001b[1;32m 434\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Makes the HTTP request using RESTClient.\"\"\"\u001b[39;00m\n\u001b[1;32m 435\u001b[0m \u001b[39mif\u001b[39;00m method \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mGET\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[0;32m--> 436\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mrest_client\u001b[39m.\u001b[39;49mget_request(url,\n\u001b[1;32m 437\u001b[0m query_params\u001b[39m=\u001b[39;49mquery_params,\n\u001b[1;32m 438\u001b[0m _preload_content\u001b[39m=\u001b[39;49m_preload_content,\n\u001b[1;32m 439\u001b[0m _request_timeout\u001b[39m=\u001b[39;49m_request_timeout,\n\u001b[1;32m 440\u001b[0m headers\u001b[39m=\u001b[39;49mheaders)\n\u001b[1;32m 441\u001b[0m \u001b[39melif\u001b[39;00m method \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mHEAD\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[1;32m 442\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mrest_client\u001b[39m.\u001b[39mhead_request(url,\n\u001b[1;32m 443\u001b[0m query_params\u001b[39m=\u001b[39mquery_params,\n\u001b[1;32m 444\u001b[0m _preload_content\u001b[39m=\u001b[39m_preload_content,\n\u001b[1;32m 445\u001b[0m _request_timeout\u001b[39m=\u001b[39m_request_timeout,\n\u001b[1;32m 446\u001b[0m headers\u001b[39m=\u001b[39mheaders)\n",
67 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/rest.py:243\u001b[0m, in \u001b[0;36mRESTClientObject.get_request\u001b[0;34m(self, url, headers, query_params, _preload_content, _request_timeout)\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mget_request\u001b[39m(\u001b[39mself\u001b[39m, url, headers\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, query_params\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, _preload_content\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m,\n\u001b[1;32m 242\u001b[0m _request_timeout\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m):\n\u001b[0;32m--> 243\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mrequest(\u001b[39m\"\u001b[39;49m\u001b[39mGET\u001b[39;49m\u001b[39m\"\u001b[39;49m, url,\n\u001b[1;32m 244\u001b[0m headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 245\u001b[0m _preload_content\u001b[39m=\u001b[39;49m_preload_content,\n\u001b[1;32m 246\u001b[0m _request_timeout\u001b[39m=\u001b[39;49m_request_timeout,\n\u001b[1;32m 247\u001b[0m query_params\u001b[39m=\u001b[39;49mquery_params)\n",
68 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/rest.py:226\u001b[0m, in \u001b[0;36mRESTClientObject.request\u001b[0;34m(self, method, url, query_params, headers, body, post_params, _preload_content, _request_timeout)\u001b[0m\n\u001b[1;32m 223\u001b[0m \u001b[39mraise\u001b[39;00m BadRequestException(http_resp\u001b[39m=\u001b[39mr)\n\u001b[1;32m 225\u001b[0m \u001b[39mif\u001b[39;00m r\u001b[39m.\u001b[39mstatus \u001b[39m==\u001b[39m \u001b[39m401\u001b[39m:\n\u001b[0;32m--> 226\u001b[0m \u001b[39mraise\u001b[39;00m UnauthorizedException(http_resp\u001b[39m=\u001b[39mr)\n\u001b[1;32m 228\u001b[0m \u001b[39mif\u001b[39;00m r\u001b[39m.\u001b[39mstatus \u001b[39m==\u001b[39m \u001b[39m403\u001b[39m:\n\u001b[1;32m 229\u001b[0m \u001b[39mraise\u001b[39;00m ForbiddenException(http_resp\u001b[39m=\u001b[39mr)\n",
69 | "\u001b[0;31mUnauthorizedException\u001b[0m: (401)\nReason: Unauthorized\nHTTP response headers: HTTPHeaderDict({'strict-transport-security': 'max-age=63072000; includeSubDomains; preload', 'referrer-policy': 'no-referrer', 'x-content-type-options': 'nosniff', 'x-download-options': 'noopen', 'x-frame-options': 'SAMEORIGIN', 'x-xss-protection': '1; mode=block', 'content-type': 'application/json; charset=utf-8', 'etag': '\"hwxzxeosy8q\"', 'vary': 'Accept-Encoding', 'X-Cloud-Trace-Context': '0cfb129e0a8afd4ebe932c81e9e12595;o=1', 'Date': 'Thu, 23 Nov 2023 03:19:17 GMT', 'Server': 'Google Frontend', 'Content-Length': '26', 'Via': '1.1 google', 'Alt-Svc': 'h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000'})\nHTTP response body: {\"message\":\"Unauthorized\"}\n"
70 | ]
71 | }
72 | ],
73 | "source": [
74 | "from llama_index.llms import GradientBaseModelLLM\n",
75 | "from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n",
76 | "from llama_index.embeddings import GradientEmbedding\n",
77 | "\n",
78 | "import os\n",
79 | "# Paste values from Gradient's websites\n",
80 | "os.environ[\"GRADIENT_ACCESS_TOKEN\"] = \"PASTE YOUR ACCESS TOKEN\"\n",
81 | "os.environ[\"GRADIENT_WORKSPACE_ID\"] = \"PASTE YOUR WORKSPACE ID\"\n",
82 | "\n",
83 | "question = \"Do you know anyone named 奥利奥?\"\n",
84 | "\n",
85 | "# You can also use a model adapter you've trained with GradientModelAdapterLLM\n",
86 | "llm = GradientBaseModelLLM(base_model_slug=\"llama2-7b-chat\",max_tokens=100)\n",
87 | "\n",
88 | "print(f'Without RAG: {llm.complete(question)}')\n",
89 | "print(f'')\n",
90 | "\n",
91 | "documents = SimpleDirectoryReader(\"./data\").load_data() # Documents to index \n",
92 | "embed_model = GradientEmbedding(gradient_model_slug=\"bge-large\") # The model used to generate embeddings\n",
93 | "service_context = ServiceContext.from_defaults(chunk_size=1024, llm=llm, embed_model=embed_model) # The service context defines the LLM and the embedding model to be used by the query engine\n",
94 | "\n",
95 | "index = VectorStoreIndex.from_documents(documents, service_context=service_context)\n",
96 | "query_engine = index.as_query_engine()\n",
97 | "\n",
98 | "response = query_engine.query(question)\n",
99 | "print(f'With RAG: {response}')"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": null,
105 | "metadata": {},
106 | "outputs": [],
107 | "source": []
108 | }
109 | ],
110 | "metadata": {
111 | "kernelspec": {
112 | "display_name": "Python 3.9.12 ('.venv': venv)",
113 | "language": "python",
114 | "name": "python3"
115 | },
116 | "language_info": {
117 | "codemirror_mode": {
118 | "name": "ipython",
119 | "version": 3
120 | },
121 | "file_extension": ".py",
122 | "mimetype": "text/x-python",
123 | "name": "python",
124 | "nbconvert_exporter": "python",
125 | "pygments_lexer": "ipython3",
126 | "version": "3.11.3"
127 | },
128 | "vscode": {
129 | "interpreter": {
130 | "hash": "5ae9fa2777630f93d325d67fd0c37f7375ed1afcb20dd85f425eb8692a47ff3f"
131 | }
132 | }
133 | },
134 | "nbformat": 4,
135 | "nbformat_minor": 5
136 | }
137 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # retrieval-augmented-generation-notes
2 | Slides for "Retrieval Augmented Generation" video: https://www.youtube.com/watch?v=rhZgXNdhWDY
3 |
--------------------------------------------------------------------------------
/Slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hkproj/retrieval-augmented-generation-notes/2ee02798083a63f00e9a8b80f9b1c70a93fed3e2/Slides.pdf
--------------------------------------------------------------------------------
/data/aoliao.txt:
--------------------------------------------------------------------------------
1 | 奥利奥 is a 3 years old cat that teaches deep learning models on its YouTube channel along with his friend Umar Jamil.
2 | They're both passionate about machine learning and deep learning, and 奥利奥 is very fast in learning new concepts.
3 | So far, the duo has made videos on Large Language Models, Stable Diffusion and Transformer models, including the popular model LLaMA 2.
4 | Apart from machine learning, 奥利奥 likes to play with his friend Umar, especially when he is recording videos for their YouTube channel.
5 |
--------------------------------------------------------------------------------