├── Gradient.ipynb ├── README.md ├── Slides.pdf └── data └── aoliao.txt /Gradient.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "79a726c5", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "Note: you may need to restart the kernel to use updated packages.\n", 14 | "Note: you may need to restart the kernel to use updated packages.\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "%pip install llama-index --quiet\n", 20 | "%pip install gradientai --quiet\n", 21 | "%mkdir -p data" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "# Create a document containing information about 奥利奥, the channel's mascotte.s\n", 31 | "with open('data/aoliao.txt', 'w') as f:\n", 32 | " f.write(\"奥利奥 is a 3 years old cat that teaches deep learning models on its YouTube channel along with his friend Umar Jamil.\\n\" +\n", 33 | " \"They're both passionate about machine learning and deep learning, and 奥利奥 is very fast in learning new concepts.\\n\" +\n", 34 | " \"So far, the duo has made videos on Large Language Models, Stable Diffusion and Transformer models, including the popular model LLaMA 2.\\n\" +\n", 35 | " \"Apart from machine learning, 奥利奥 likes to play with his friend Umar, especially when he is recording videos for their YouTube channel.\\n\")" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 1, 41 | "id": "4baffaa2", 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "ename": "UnauthorizedException", 46 | "evalue": "(401)\nReason: Unauthorized\nHTTP response headers: HTTPHeaderDict({'strict-transport-security': 'max-age=63072000; includeSubDomains; preload', 'referrer-policy': 'no-referrer', 'x-content-type-options': 'nosniff', 'x-download-options': 'noopen', 'x-frame-options': 'SAMEORIGIN', 'x-xss-protection': '1; mode=block', 'content-type': 'application/json; charset=utf-8', 'etag': '\"hwxzxeosy8q\"', 'vary': 'Accept-Encoding', 'X-Cloud-Trace-Context': '0cfb129e0a8afd4ebe932c81e9e12595;o=1', 'Date': 'Thu, 23 Nov 2023 03:19:17 GMT', 'Server': 'Google Frontend', 'Content-Length': '26', 'Via': '1.1 google', 'Alt-Svc': 'h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000'})\nHTTP response body: {\"message\":\"Unauthorized\"}\n", 47 | "output_type": "error", 48 | "traceback": [ 49 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 50 | "\u001b[0;31mUnauthorizedException\u001b[0m Traceback (most recent call last)", 51 | "\u001b[1;32m/home/kira/projects/retrieval-augmented-generation-notes/Gradient.ipynb Cell 3\u001b[0m line \u001b[0;36m1\n\u001b[1;32m 10\u001b[0m question \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mDo you know anyone named 奥利奥?\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 12\u001b[0m \u001b[39m# You can also use a model adapter you've trained with GradientModelAdapterLLM\u001b[39;00m\n\u001b[0;32m---> 13\u001b[0m llm \u001b[39m=\u001b[39m GradientBaseModelLLM(base_model_slug\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mllama2-7b-chat\u001b[39;49m\u001b[39m\"\u001b[39;49m,max_tokens\u001b[39m=\u001b[39;49m\u001b[39m100\u001b[39;49m)\n\u001b[1;32m 15\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mWithout RAG: \u001b[39m\u001b[39m{\u001b[39;00mllm\u001b[39m.\u001b[39mcomplete(question)\u001b[39m.\u001b[39mtext\u001b[39m.\u001b[39mstrip()\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m)\n\u001b[1;32m 16\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39m'\u001b[39m)\n", 52 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/llama_index/llms/gradient.py:133\u001b[0m, in \u001b[0;36mGradientBaseModelLLM.__init__\u001b[0;34m(self, access_token, base_model_slug, host, max_tokens, workspace_id, callback_manager, is_chat_model)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\n\u001b[1;32m 113\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 114\u001b[0m \u001b[39m*\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 121\u001b[0m is_chat_model: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m,\n\u001b[1;32m 122\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 123\u001b[0m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\n\u001b[1;32m 124\u001b[0m access_token\u001b[39m=\u001b[39maccess_token,\n\u001b[1;32m 125\u001b[0m base_model_slug\u001b[39m=\u001b[39mbase_model_slug,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 130\u001b[0m is_chat_model\u001b[39m=\u001b[39mis_chat_model,\n\u001b[1;32m 131\u001b[0m )\n\u001b[0;32m--> 133\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_model \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_gradient\u001b[39m.\u001b[39;49mget_base_model(\n\u001b[1;32m 134\u001b[0m base_model_slug\u001b[39m=\u001b[39;49mbase_model_slug,\n\u001b[1;32m 135\u001b[0m )\n", 53 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/_gradient.py:92\u001b[0m, in \u001b[0;36mGradient.get_base_model\u001b[0;34m(self, base_model_slug)\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mget_base_model\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39m*\u001b[39m, base_model_slug: \u001b[39mstr\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m BaseModel:\n\u001b[0;32m---> 92\u001b[0m models: List[BaseModel] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mlist_models(only_base\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n\u001b[1;32m 93\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mnext\u001b[39m(\n\u001b[1;32m 94\u001b[0m \u001b[39mfilter\u001b[39m(\u001b[39mlambda\u001b[39;00m model: model\u001b[39m.\u001b[39m_slug \u001b[39m==\u001b[39m base_model_slug, models)\n\u001b[1;32m 95\u001b[0m )\n", 54 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/_gradient.py:118\u001b[0m, in \u001b[0;36mGradient.list_models\u001b[0;34m(self, only_base)\u001b[0m\n\u001b[1;32m 117\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mlist_models\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39m*\u001b[39m, only_base: \u001b[39mbool\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m List[Model]: \u001b[39m# type: ignore\u001b[39;00m\n\u001b[0;32m--> 118\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_models_api\u001b[39m.\u001b[39;49mlist_models(\n\u001b[1;32m 119\u001b[0m x_gradient_workspace_id\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_workspace_id, only_base\u001b[39m=\u001b[39;49monly_base\n\u001b[1;32m 120\u001b[0m )\n\u001b[1;32m 122\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mdeserialize_model\u001b[39m(\n\u001b[1;32m 123\u001b[0m api_model: Any,\n\u001b[1;32m 124\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Model:\n\u001b[1;32m 125\u001b[0m \u001b[39mif\u001b[39;00m api_model\u001b[39m.\u001b[39mtype \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mbaseModel\u001b[39m\u001b[39m\"\u001b[39m:\n", 55 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/pydantic/decorator.py:40\u001b[0m, in \u001b[0;36mpydantic.decorator.validate_arguments.validate.wrapper_function\u001b[0;34m()\u001b[0m\n", 56 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/pydantic/decorator.py:134\u001b[0m, in \u001b[0;36mpydantic.decorator.ValidatedFunction.call\u001b[0;34m()\u001b[0m\n", 57 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/pydantic/decorator.py:206\u001b[0m, in \u001b[0;36mpydantic.decorator.ValidatedFunction.execute\u001b[0;34m()\u001b[0m\n", 58 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/api/models_api.py:862\u001b[0m, in \u001b[0;36mModelsApi.list_models\u001b[0;34m(self, x_gradient_workspace_id, only_base, **kwargs)\u001b[0m\n\u001b[1;32m 860\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39m'\u001b[39m\u001b[39m_preload_content\u001b[39m\u001b[39m'\u001b[39m \u001b[39min\u001b[39;00m kwargs:\n\u001b[1;32m 861\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mError! Please call the list_models_with_http_info method with `_preload_content` instead and obtain raw data from ApiResponse.raw_data\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m--> 862\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mlist_models_with_http_info(x_gradient_workspace_id, only_base, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", 59 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/pydantic/decorator.py:40\u001b[0m, in \u001b[0;36mpydantic.decorator.validate_arguments.validate.wrapper_function\u001b[0;34m()\u001b[0m\n", 60 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/pydantic/decorator.py:134\u001b[0m, in \u001b[0;36mpydantic.decorator.ValidatedFunction.call\u001b[0;34m()\u001b[0m\n", 61 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/pydantic/decorator.py:206\u001b[0m, in \u001b[0;36mpydantic.decorator.ValidatedFunction.execute\u001b[0;34m()\u001b[0m\n", 62 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/api/models_api.py:964\u001b[0m, in \u001b[0;36mModelsApi.list_models_with_http_info\u001b[0;34m(self, x_gradient_workspace_id, only_base, **kwargs)\u001b[0m\n\u001b[1;32m 957\u001b[0m _auth_settings \u001b[39m=\u001b[39m [\u001b[39m'\u001b[39m\u001b[39mAccessToken\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m# noqa: E501\u001b[39;00m\n\u001b[1;32m 959\u001b[0m _response_types_map \u001b[39m=\u001b[39m {\n\u001b[1;32m 960\u001b[0m \u001b[39m'\u001b[39m\u001b[39m200\u001b[39m\u001b[39m'\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mListModelsSuccess\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 961\u001b[0m \u001b[39m'\u001b[39m\u001b[39m4XX\u001b[39m\u001b[39m'\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mListModelsError\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 962\u001b[0m }\n\u001b[0;32m--> 964\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mapi_client\u001b[39m.\u001b[39;49mcall_api(\n\u001b[1;32m 965\u001b[0m \u001b[39m'\u001b[39;49m\u001b[39m/models\u001b[39;49m\u001b[39m'\u001b[39;49m, \u001b[39m'\u001b[39;49m\u001b[39mGET\u001b[39;49m\u001b[39m'\u001b[39;49m,\n\u001b[1;32m 966\u001b[0m _path_params,\n\u001b[1;32m 967\u001b[0m _query_params,\n\u001b[1;32m 968\u001b[0m _header_params,\n\u001b[1;32m 969\u001b[0m body\u001b[39m=\u001b[39;49m_body_params,\n\u001b[1;32m 970\u001b[0m post_params\u001b[39m=\u001b[39;49m_form_params,\n\u001b[1;32m 971\u001b[0m files\u001b[39m=\u001b[39;49m_files,\n\u001b[1;32m 972\u001b[0m response_types_map\u001b[39m=\u001b[39;49m_response_types_map,\n\u001b[1;32m 973\u001b[0m auth_settings\u001b[39m=\u001b[39;49m_auth_settings,\n\u001b[1;32m 974\u001b[0m async_req\u001b[39m=\u001b[39;49m_params\u001b[39m.\u001b[39;49mget(\u001b[39m'\u001b[39;49m\u001b[39masync_req\u001b[39;49m\u001b[39m'\u001b[39;49m),\n\u001b[1;32m 975\u001b[0m _return_http_data_only\u001b[39m=\u001b[39;49m_params\u001b[39m.\u001b[39;49mget(\u001b[39m'\u001b[39;49m\u001b[39m_return_http_data_only\u001b[39;49m\u001b[39m'\u001b[39;49m), \u001b[39m# noqa: E501\u001b[39;49;00m\n\u001b[1;32m 976\u001b[0m _preload_content\u001b[39m=\u001b[39;49m_params\u001b[39m.\u001b[39;49mget(\u001b[39m'\u001b[39;49m\u001b[39m_preload_content\u001b[39;49m\u001b[39m'\u001b[39;49m, \u001b[39mTrue\u001b[39;49;00m),\n\u001b[1;32m 977\u001b[0m _request_timeout\u001b[39m=\u001b[39;49m_params\u001b[39m.\u001b[39;49mget(\u001b[39m'\u001b[39;49m\u001b[39m_request_timeout\u001b[39;49m\u001b[39m'\u001b[39;49m),\n\u001b[1;32m 978\u001b[0m collection_formats\u001b[39m=\u001b[39;49m_collection_formats,\n\u001b[1;32m 979\u001b[0m _request_auth\u001b[39m=\u001b[39;49m_params\u001b[39m.\u001b[39;49mget(\u001b[39m'\u001b[39;49m\u001b[39m_request_auth\u001b[39;49m\u001b[39m'\u001b[39;49m))\n", 63 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/api_client.py:410\u001b[0m, in \u001b[0;36mApiClient.call_api\u001b[0;34m(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_types_map, auth_settings, async_req, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _request_auth)\u001b[0m\n\u001b[1;32m 368\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Makes the HTTP request (synchronous) and returns deserialized data.\u001b[39;00m\n\u001b[1;32m 369\u001b[0m \n\u001b[1;32m 370\u001b[0m \u001b[39mTo make an async_req request, set the async_req parameter.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[39m then the method will return the response directly.\u001b[39;00m\n\u001b[1;32m 408\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m async_req:\n\u001b[0;32m--> 410\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m__call_api(resource_path, method,\n\u001b[1;32m 411\u001b[0m path_params, query_params, header_params,\n\u001b[1;32m 412\u001b[0m body, post_params, files,\n\u001b[1;32m 413\u001b[0m response_types_map, auth_settings,\n\u001b[1;32m 414\u001b[0m _return_http_data_only, collection_formats,\n\u001b[1;32m 415\u001b[0m _preload_content, _request_timeout, _host,\n\u001b[1;32m 416\u001b[0m _request_auth)\n\u001b[1;32m 418\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mpool\u001b[39m.\u001b[39mapply_async(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m__call_api, (resource_path,\n\u001b[1;32m 419\u001b[0m method, path_params,\n\u001b[1;32m 420\u001b[0m query_params,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 428\u001b[0m _request_timeout,\n\u001b[1;32m 429\u001b[0m _host, _request_auth))\n", 64 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/api_client.py:225\u001b[0m, in \u001b[0;36mApiClient.__call_api\u001b[0;34m(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_types_map, auth_settings, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _request_auth)\u001b[0m\n\u001b[1;32m 223\u001b[0m \u001b[39mif\u001b[39;00m e\u001b[39m.\u001b[39mbody:\n\u001b[1;32m 224\u001b[0m e\u001b[39m.\u001b[39mbody \u001b[39m=\u001b[39m e\u001b[39m.\u001b[39mbody\u001b[39m.\u001b[39mdecode(\u001b[39m'\u001b[39m\u001b[39mutf-8\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m--> 225\u001b[0m \u001b[39mraise\u001b[39;00m e\n\u001b[1;32m 227\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlast_response \u001b[39m=\u001b[39m response_data\n\u001b[1;32m 229\u001b[0m return_data \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m \u001b[39m# assuming derialization is not needed\u001b[39;00m\n", 65 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/api_client.py:215\u001b[0m, in \u001b[0;36mApiClient.__call_api\u001b[0;34m(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_types_map, auth_settings, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _request_auth)\u001b[0m\n\u001b[1;32m 211\u001b[0m url \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m?\u001b[39m\u001b[39m\"\u001b[39m \u001b[39m+\u001b[39m url_query\n\u001b[1;32m 213\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 214\u001b[0m \u001b[39m# perform request and return response\u001b[39;00m\n\u001b[0;32m--> 215\u001b[0m response_data \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mrequest(\n\u001b[1;32m 216\u001b[0m method, url,\n\u001b[1;32m 217\u001b[0m query_params\u001b[39m=\u001b[39;49mquery_params,\n\u001b[1;32m 218\u001b[0m headers\u001b[39m=\u001b[39;49mheader_params,\n\u001b[1;32m 219\u001b[0m post_params\u001b[39m=\u001b[39;49mpost_params, body\u001b[39m=\u001b[39;49mbody,\n\u001b[1;32m 220\u001b[0m _preload_content\u001b[39m=\u001b[39;49m_preload_content,\n\u001b[1;32m 221\u001b[0m _request_timeout\u001b[39m=\u001b[39;49m_request_timeout)\n\u001b[1;32m 222\u001b[0m \u001b[39mexcept\u001b[39;00m ApiException \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 223\u001b[0m \u001b[39mif\u001b[39;00m e\u001b[39m.\u001b[39mbody:\n", 66 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/api_client.py:436\u001b[0m, in \u001b[0;36mApiClient.request\u001b[0;34m(self, method, url, query_params, headers, post_params, body, _preload_content, _request_timeout)\u001b[0m\n\u001b[1;32m 434\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Makes the HTTP request using RESTClient.\"\"\"\u001b[39;00m\n\u001b[1;32m 435\u001b[0m \u001b[39mif\u001b[39;00m method \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mGET\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[0;32m--> 436\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mrest_client\u001b[39m.\u001b[39;49mget_request(url,\n\u001b[1;32m 437\u001b[0m query_params\u001b[39m=\u001b[39;49mquery_params,\n\u001b[1;32m 438\u001b[0m _preload_content\u001b[39m=\u001b[39;49m_preload_content,\n\u001b[1;32m 439\u001b[0m _request_timeout\u001b[39m=\u001b[39;49m_request_timeout,\n\u001b[1;32m 440\u001b[0m headers\u001b[39m=\u001b[39;49mheaders)\n\u001b[1;32m 441\u001b[0m \u001b[39melif\u001b[39;00m method \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mHEAD\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[1;32m 442\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mrest_client\u001b[39m.\u001b[39mhead_request(url,\n\u001b[1;32m 443\u001b[0m query_params\u001b[39m=\u001b[39mquery_params,\n\u001b[1;32m 444\u001b[0m _preload_content\u001b[39m=\u001b[39m_preload_content,\n\u001b[1;32m 445\u001b[0m _request_timeout\u001b[39m=\u001b[39m_request_timeout,\n\u001b[1;32m 446\u001b[0m headers\u001b[39m=\u001b[39mheaders)\n", 67 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/rest.py:243\u001b[0m, in \u001b[0;36mRESTClientObject.get_request\u001b[0;34m(self, url, headers, query_params, _preload_content, _request_timeout)\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mget_request\u001b[39m(\u001b[39mself\u001b[39m, url, headers\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, query_params\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, _preload_content\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m,\n\u001b[1;32m 242\u001b[0m _request_timeout\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m):\n\u001b[0;32m--> 243\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mrequest(\u001b[39m\"\u001b[39;49m\u001b[39mGET\u001b[39;49m\u001b[39m\"\u001b[39;49m, url,\n\u001b[1;32m 244\u001b[0m headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 245\u001b[0m _preload_content\u001b[39m=\u001b[39;49m_preload_content,\n\u001b[1;32m 246\u001b[0m _request_timeout\u001b[39m=\u001b[39;49m_request_timeout,\n\u001b[1;32m 247\u001b[0m query_params\u001b[39m=\u001b[39;49mquery_params)\n", 68 | "File \u001b[0;32m~/anaconda3/envs/pytorch-llamaindex/lib/python3.11/site-packages/gradientai/openapi/client/rest.py:226\u001b[0m, in \u001b[0;36mRESTClientObject.request\u001b[0;34m(self, method, url, query_params, headers, body, post_params, _preload_content, _request_timeout)\u001b[0m\n\u001b[1;32m 223\u001b[0m \u001b[39mraise\u001b[39;00m BadRequestException(http_resp\u001b[39m=\u001b[39mr)\n\u001b[1;32m 225\u001b[0m \u001b[39mif\u001b[39;00m r\u001b[39m.\u001b[39mstatus \u001b[39m==\u001b[39m \u001b[39m401\u001b[39m:\n\u001b[0;32m--> 226\u001b[0m \u001b[39mraise\u001b[39;00m UnauthorizedException(http_resp\u001b[39m=\u001b[39mr)\n\u001b[1;32m 228\u001b[0m \u001b[39mif\u001b[39;00m r\u001b[39m.\u001b[39mstatus \u001b[39m==\u001b[39m \u001b[39m403\u001b[39m:\n\u001b[1;32m 229\u001b[0m \u001b[39mraise\u001b[39;00m ForbiddenException(http_resp\u001b[39m=\u001b[39mr)\n", 69 | "\u001b[0;31mUnauthorizedException\u001b[0m: (401)\nReason: Unauthorized\nHTTP response headers: HTTPHeaderDict({'strict-transport-security': 'max-age=63072000; includeSubDomains; preload', 'referrer-policy': 'no-referrer', 'x-content-type-options': 'nosniff', 'x-download-options': 'noopen', 'x-frame-options': 'SAMEORIGIN', 'x-xss-protection': '1; mode=block', 'content-type': 'application/json; charset=utf-8', 'etag': '\"hwxzxeosy8q\"', 'vary': 'Accept-Encoding', 'X-Cloud-Trace-Context': '0cfb129e0a8afd4ebe932c81e9e12595;o=1', 'Date': 'Thu, 23 Nov 2023 03:19:17 GMT', 'Server': 'Google Frontend', 'Content-Length': '26', 'Via': '1.1 google', 'Alt-Svc': 'h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000'})\nHTTP response body: {\"message\":\"Unauthorized\"}\n" 70 | ] 71 | } 72 | ], 73 | "source": [ 74 | "from llama_index.llms import GradientBaseModelLLM\n", 75 | "from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n", 76 | "from llama_index.embeddings import GradientEmbedding\n", 77 | "\n", 78 | "import os\n", 79 | "# Paste values from Gradient's websites\n", 80 | "os.environ[\"GRADIENT_ACCESS_TOKEN\"] = \"PASTE YOUR ACCESS TOKEN\"\n", 81 | "os.environ[\"GRADIENT_WORKSPACE_ID\"] = \"PASTE YOUR WORKSPACE ID\"\n", 82 | "\n", 83 | "question = \"Do you know anyone named 奥利奥?\"\n", 84 | "\n", 85 | "# You can also use a model adapter you've trained with GradientModelAdapterLLM\n", 86 | "llm = GradientBaseModelLLM(base_model_slug=\"llama2-7b-chat\",max_tokens=100)\n", 87 | "\n", 88 | "print(f'Without RAG: {llm.complete(question)}')\n", 89 | "print(f'')\n", 90 | "\n", 91 | "documents = SimpleDirectoryReader(\"./data\").load_data() # Documents to index \n", 92 | "embed_model = GradientEmbedding(gradient_model_slug=\"bge-large\") # The model used to generate embeddings\n", 93 | "service_context = ServiceContext.from_defaults(chunk_size=1024, llm=llm, embed_model=embed_model) # The service context defines the LLM and the embedding model to be used by the query engine\n", 94 | "\n", 95 | "index = VectorStoreIndex.from_documents(documents, service_context=service_context)\n", 96 | "query_engine = index.as_query_engine()\n", 97 | "\n", 98 | "response = query_engine.query(question)\n", 99 | "print(f'With RAG: {response}')" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [] 108 | } 109 | ], 110 | "metadata": { 111 | "kernelspec": { 112 | "display_name": "Python 3.9.12 ('.venv': venv)", 113 | "language": "python", 114 | "name": "python3" 115 | }, 116 | "language_info": { 117 | "codemirror_mode": { 118 | "name": "ipython", 119 | "version": 3 120 | }, 121 | "file_extension": ".py", 122 | "mimetype": "text/x-python", 123 | "name": "python", 124 | "nbconvert_exporter": "python", 125 | "pygments_lexer": "ipython3", 126 | "version": "3.11.3" 127 | }, 128 | "vscode": { 129 | "interpreter": { 130 | "hash": "5ae9fa2777630f93d325d67fd0c37f7375ed1afcb20dd85f425eb8692a47ff3f" 131 | } 132 | } 133 | }, 134 | "nbformat": 4, 135 | "nbformat_minor": 5 136 | } 137 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # retrieval-augmented-generation-notes 2 | Slides for "Retrieval Augmented Generation" video: https://www.youtube.com/watch?v=rhZgXNdhWDY 3 | -------------------------------------------------------------------------------- /Slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkproj/retrieval-augmented-generation-notes/2ee02798083a63f00e9a8b80f9b1c70a93fed3e2/Slides.pdf -------------------------------------------------------------------------------- /data/aoliao.txt: -------------------------------------------------------------------------------- 1 | 奥利奥 is a 3 years old cat that teaches deep learning models on its YouTube channel along with his friend Umar Jamil. 2 | They're both passionate about machine learning and deep learning, and 奥利奥 is very fast in learning new concepts. 3 | So far, the duo has made videos on Large Language Models, Stable Diffusion and Transformer models, including the popular model LLaMA 2. 4 | Apart from machine learning, 奥利奥 likes to play with his friend Umar, especially when he is recording videos for their YouTube channel. 5 | --------------------------------------------------------------------------------