├── .DS_Store
├── .dockerignore
├── .gitignore
├── .python-version
├── Dockerfile
├── README.md
├── bot
    ├── app.py
    ├── backend.py
    ├── create.py
    ├── frontend.py
    └── query.py
├── notebook
    └── pinecone.ipynb
└── requirements.txt


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/teekyboy/gcp_chatbot/45239befd187c558963ff1bcf7bbfb64aaa426a1/.DS_Store


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.pyc
3 | *.pyo
4 | *.pyd
5 | *.swp
6 | *.bak
7 | *.log
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | #config.ini file
163 | config.ini
164 | 
165 | #notebook folder
166 | notebook/
167 | 
168 | #personal data
169 | chatbot.json
170 | .python-version
171 | 


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | firebase-bot
2 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM --platform=linux/amd64 python:3.10.11-buster
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | RUN apt-get update && apt-get install -y --no-install-recommends \
 6 |     build-essential \
 7 |     gcc \
 8 |     libpq-dev \
 9 |     python3-dev \
10 |     && apt-get clean \
11 |     && rm -rf /var/lib/apt/lists/*
12 | 
13 | COPY requirements.txt /app
14 | COPY chatbot.json .
15 | 
16 | RUN pip install --no-cache-dir --trusted-host pypi.python.org -r requirements.txt
17 | 
18 | COPY . /app
19 | 
20 | ENV PORT=8080
21 | 
22 | EXPOSE $PORT
23 | 
24 | CMD ["sh", "-c", "streamlit run --server.port $PORT bot/app.py"]
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Using Firebase and Pinecone to Create a Question Answering App
2 | 
3 | This code enables the creation of a database on Firbase and facilitates answering queries utilizing the said database. The vectors are preserved within Pinecone.
4 | 
5 | Queries and responses are recorded in a csv on Firebase. It is no longer necessary for the uploaded files to originate from local uploads.
6 | 
7 | Used Google Secret Manager to store all keys.
8 | 


--------------------------------------------------------------------------------
/bot/app.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | from backend import app as backend_app
 3 | from frontend import main as frontend_app
 4 | 
 5 | st.set_page_config(page_title="My App", page_icon=None, layout="wide", initial_sidebar_state="expanded")
 6 | 
 7 | def main():
 8 |     st.sidebar.title("Navigation")
 9 |     app_mode = st.sidebar.radio("What do you want to do?", ["Load New Files" , "Test My Knowledge"])
10 | 
11 |     if app_mode == "Load New Files":
12 |         backend_app()
13 |     elif app_mode == "Test My Knowledge":
14 |         frontend_app()
15 | 
16 | 
17 |     add_custom_css()
18 | 
19 | def add_custom_css():
20 |     custom_css = """
21 |     <style>
22 |         /* Set the background color for the entire app */
23 |         body {
24 |             background-color: #f5f5f5;
25 |         }
26 | 
27 |         /* Set the background color and border for the sidebar */
28 |         .sidebar .sidebar-content {
29 |             background-color: #ffffff;
30 |             border-right: 1px solid #e0e0e0;
31 |         }
32 | 
33 |         /* Set the font color and background color for the title in the sidebar */
34 |         .sidebar .sidebar-content .block-container h1 {
35 |             color: #ffffff;
36 |             background-color: #4a90e2;
37 |             padding: 10px 16px;
38 |             margin-bottom: 10px;
39 |         }
40 | 
41 |     </style>
42 |     """
43 |     st.markdown(custom_css, unsafe_allow_html=True)
44 | 
45 | if __name__ == "__main__":
46 |     main()
47 | 


--------------------------------------------------------------------------------
/bot/backend.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from google.cloud import storage
 3 | import streamlit as st
 4 | import pinecone
 5 | from create import load_documents
 6 | from langchain.vectorstores import Pinecone
 7 | 
 8 | #for cloud run
 9 | os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/app/chatbot.json'
10 | #for local run
11 | #os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../chatbot.json'
12 | 
13 | def upload_file_to_gcs(uploaded_file, folder_prefix):
14 |     storage_client = storage.Client()
15 |     bucket_name = "chatbot.appspot.com"
16 |     bucket = storage_client.get_bucket(bucket_name)
17 |     blob = bucket.blob(f"{folder_prefix}/{uploaded_file.name}")
18 |     blob.upload_from_string(uploaded_file.getvalue(), content_type=uploaded_file.type)
19 | 
20 | def create_database(docs, embeddings, PINECONE_API_KEY, PINECONE_ENV, PINECONE_INDEX):
21 |     pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
22 |     index_name = PINECONE_INDEX
23 |     db = Pinecone.from_documents(docs, embeddings, index_name=index_name)
24 |     return db
25 | 
26 | def app():
27 |     st.title("Upload New Files")
28 |     uploaded_file = st.file_uploader("Choose a file to upload", type=['txt', 'pdf', 'doc', 'docx'])
29 | 
30 |     if uploaded_file is not None:
31 |         file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type, "FileSize": uploaded_file.size}
32 |         st.write(file_details)
33 | 
34 |         if st.button("Upload and Process Documents"):
35 |             folder_prefix = "data/input"
36 |             upload_file_to_gcs(uploaded_file, folder_prefix)
37 |             docs, embeddings, PINECONE_API_KEY, PINECONE_ENV, PINECONE_INDEX = load_documents()
38 |             db = create_database(docs, embeddings, PINECONE_API_KEY, PINECONE_ENV, PINECONE_INDEX)
39 |             st.success("Database created successfully.")
40 | 
41 | if __name__ == "__main__":
42 |     app()
43 | 


--------------------------------------------------------------------------------
/bot/create.py:
--------------------------------------------------------------------------------
 1 | from langchain.document_loaders import GCSDirectoryLoader
 2 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 3 | from langchain.embeddings.openai import OpenAIEmbeddings
 4 | import os
 5 | from google.cloud import secretmanager_v1
 6 | 
 7 | os.environ['GOOGLE_CLOUD_PROJECT'] = 'chatbot'
 8 | 
 9 | def get_secret(secret_id):
10 |     project_id = os.environ['GOOGLE_CLOUD_PROJECT']
11 |     client = secretmanager_v1.SecretManagerServiceClient()
12 |     secret_name = f"projects/{project_id}/secrets/{secret_id}/versions/latest"
13 |     response = client.access_secret_version(request={"name": secret_name})
14 |     return response.payload.data.decode('UTF-8')
15 | 
16 | def load_documents():
17 |     #for cloud run
18 |     os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/app/chatbot.json'
19 |     #for local run
20 |     #os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../chatbot.json'
21 |     OPENAI_API_KEY = get_secret('openai_api_key')
22 |     PINECONE_API_KEY = get_secret('pinecone_api_key')
23 |     PINECONE_ENV = get_secret('pinecone_env')
24 |     PINECONE_INDEX = get_secret('pinecone_index')
25 |     loader = GCSDirectoryLoader(project_name="chatbot", bucket="chatbot.appspot.com", prefix="data/input")
26 |     documents = loader.load()
27 |     text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
28 |     docs = text_splitter.split_documents(documents)
29 |     embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
30 |     return docs, embeddings, PINECONE_API_KEY, PINECONE_ENV, PINECONE_INDEX
31 | 


--------------------------------------------------------------------------------
/bot/frontend.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | from query import run_query, log_question_answer
 3 | import os
 4 | from langchain.vectorstores import Pinecone
 5 | from langchain.embeddings.openai import OpenAIEmbeddings
 6 | from google.cloud import secretmanager_v1
 7 | 
 8 | os.environ['GOOGLE_CLOUD_PROJECT'] = 'chatbot'
 9 | 
10 | def get_secret(secret_id):
11 |     project_id = os.environ['GOOGLE_CLOUD_PROJECT']
12 |     client = secretmanager_v1.SecretManagerServiceClient()
13 |     secret_name = f"projects/{project_id}/secrets/{secret_id}/versions/latest"
14 |     response = client.access_secret_version(request={"name": secret_name})
15 |     return response.payload.data.decode('UTF-8')
16 | 
17 | #for cloud run
18 | os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/app/chatbot.json'
19 | #for local run
20 | #os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../chatbot.json'
21 | OPENAI_API_KEY = get_secret('openai_api_key')
22 | PINECONE_API_KEY = get_secret('pinecone_api_key')
23 | PINECONE_ENV = get_secret('pinecone_env')
24 | PINECONE_INDEX = get_secret('pinecone_index')
25 | index_name = PINECONE_INDEX
26 | embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
27 | db = Pinecone.from_existing_index(index_name, embeddings)
28 | 
29 | def main():
30 |     st.title("What do you want to know?")
31 |     st.subheader("Ask Questions")
32 |     user_input = st.text_input("Enter your question:")
33 | 
34 |     if st.button("Submit"):
35 |         if user_input:
36 |             answer = run_query(user_input, db, OPENAI_API_KEY)
37 |             st.write(answer)
38 |             log_question_answer(user_input, answer)
39 |         else:
40 |             st.error("Please enter a question before submitting.")
41 | 
42 | if __name__ == "__main__":
43 |     main()
44 | 


--------------------------------------------------------------------------------
/bot/query.py:
--------------------------------------------------------------------------------
 1 | from langchain.llms import OpenAI
 2 | from langchain.chains import RetrievalQA
 3 | import pandas as pd
 4 | from google.cloud import storage
 5 | import os
 6 | from langchain.vectorstores import Pinecone
 7 | import pinecone
 8 | from langchain.embeddings.openai import OpenAIEmbeddings
 9 | import io
10 | from google.cloud import secretmanager_v1
11 | 
12 | os.environ['GOOGLE_CLOUD_PROJECT'] = 'chatbot'
13 | 
14 | def get_secret(secret_id):
15 |     project_id = os.environ['GOOGLE_CLOUD_PROJECT']
16 |     client = secretmanager_v1.SecretManagerServiceClient()
17 |     secret_name = f"projects/{project_id}/secrets/{secret_id}/versions/latest"
18 |     response = client.access_secret_version(request={"name": secret_name})
19 |     return response.payload.data.decode('UTF-8')
20 | 
21 | #for cloud run
22 | os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/app/chatbot.json'
23 | #for local run
24 | #os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../chatbot.json'
25 | OPENAI_API_KEY = get_secret('openai_api_key')
26 | PINECONE_API_KEY = get_secret('pinecone_api_key')
27 | PINECONE_ENV = get_secret('pinecone_env')
28 | PINECONE_INDEX = get_secret('pinecone_index')
29 | index_name = PINECONE_INDEX
30 | embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
31 | 
32 | pinecone.init(
33 |     api_key=PINECONE_API_KEY,
34 |     environment=PINECONE_ENV
35 | )
36 | index_name = PINECONE_INDEX
37 | 
38 | db = Pinecone.from_existing_index(index_name, embeddings)
39 | 
40 | def run_query(user_input, db, OPENAI_API_KEY):
41 |     llm = OpenAI(temperature=0.5, openai_api_key=OPENAI_API_KEY)
42 |     qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever())
43 |     answer = qa.run(user_input)
44 |     return answer
45 | 
46 | def log_question_answer(query, answer):
47 |     prefix = 'data/output/'
48 |     log_file = f'{prefix}questions_answers.csv'
49 | <<<<<<< HEAD
50 |     bucket_name = 'chatbot.com'
51 | =======
52 |     bucket_name = 'chatbot.appspot.com'
53 | >>>>>>> fabfcc05889160208b1260cc8b26beaa4f76377a
54 |     data = {'question': [query], 'answer': [answer]}
55 |     df = pd.DataFrame(data)
56 |     storage_client = storage.Client()
57 |     bucket = storage_client.get_bucket(bucket_name)
58 |     blob = storage.Blob(log_file, bucket)
59 |     if blob.exists():
60 |         content = blob.download_as_text()
61 |         existing_df = pd.read_csv(io.StringIO(content))
62 |         new_df = existing_df.append(df, ignore_index=True)
63 |     else:
64 |         new_df = df
65 |     new_content = new_df.to_csv(index=False)
66 |     blob.upload_from_string(new_content, content_type='text/csv')
67 | 


--------------------------------------------------------------------------------
/notebook/pinecone.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "id": "683953b3",
  7 |    "metadata": {},
  8 |    "source": [
  9 |     "# Creating a personal bot that lets you upload files to GC Storage and then stores the question and answer pairs in a CSV file in GC Storage.\n",
 10 |     "\n"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "id": "aac9563e",
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "from langchain.embeddings.openai import OpenAIEmbeddings\n",
 21 |     "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
 22 |     "from langchain.vectorstores import Pinecone\n",
 23 |     "from langchain.document_loaders import GCSDirectoryLoader\n",
 24 |     "from langchain.chains import RetrievalQA\n",
 25 |     "from langchain.llms import OpenAI\n",
 26 |     "import pinecone\n",
 27 |     "import os\n",
 28 |     "import configparser\n",
 29 |     "import pandas as pd\n",
 30 |     "from google.cloud import storage"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "id": "4f5e0b7b",
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '~/gcp/chatbot.json'"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "id": "b306a846",
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "config = configparser.ConfigParser()\n",
 51 |     "config.read('../config.ini')"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "id": "216df103",
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "OPENAI_API_KEY = config.get('api_key', 'openai')\n",
 62 |     "PINECONE_API_KEY = config.get('api_key', 'pinecone')\n",
 63 |     "PINECONE_ENV = config.get('env', 'pinecone')\n",
 64 |     "PINECONE_INDEX = config.get('index', 'pinecone')"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "id": "a3c3999a",
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "loader = GCSDirectoryLoader(project_name=\"chatbot\", bucket=\"chatbot.appspot.com\", prefix=\"data/input\")\n",
 75 |     "documents = loader.load()\n",
 76 |     "text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)\n",
 77 |     "docs = text_splitter.split_documents(documents)\n",
 78 |     "\n",
 79 |     "embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "id": "6e104aee",
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "pinecone.init(\n",
 90 |     "    api_key=PINECONE_API_KEY,\n",
 91 |     "    environment=PINECONE_ENV \n",
 92 |     ")\n",
 93 |     "\n",
 94 |     "index_name = PINECONE_INDEX\n",
 95 |     "\n",
 96 |     "db = Pinecone.from_documents(docs, embeddings, index_name=index_name)"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "id": "626f69ef",
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "db = Pinecone.from_existing_index(index_name, embeddings)"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "id": "4f373eb2",
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": [
116 |     "llm = OpenAI(temperature=0.2, openai_api_key=OPENAI_API_KEY)"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": null,
122 |    "id": "ee0fa12c",
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "qa = RetrievalQA.from_chain_type(llm=llm, chain_type=\"stuff\", retriever=db.as_retriever())"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": null,
132 |    "id": "9d059506",
133 |    "metadata": {},
134 |    "outputs": [],
135 |    "source": [
136 |     "query = \"What is the meaning of life?\"\n",
137 |     "answer = qa.run(query)\n",
138 |     "answer"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "id": "a359ed74",
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "def log_question_answer(query, answer):\n",
149 |     "    prefix = 'data/output/'\n",
150 |     "    log_file = f'{prefix}questions_answers.csv'\n",
151 |     "    bucket_name = 'chatbot.appspot.com'\n",
152 |     "    data = {'question': [query], 'answer': [answer]}\n",
153 |     "\n",
154 |     "    df = pd.DataFrame(data)\n",
155 |     "\n",
156 |     "    # Initialize Google Cloud Storage client\n",
157 |     "    storage_client = storage.Client()\n",
158 |     "    bucket = storage_client.get_bucket(bucket_name)\n",
159 |     "\n",
160 |     "    # Check if the file exists in the bucket\n",
161 |     "    blob = storage.Blob(log_file, bucket)\n",
162 |     "    if blob.exists():\n",
163 |     "        # Read the existing data and append the new data\n",
164 |     "        content = blob.download_as_text()\n",
165 |     "        existing_df = pd.read_csv(pd.StringIO(content))\n",
166 |     "        new_df = existing_df.append(df, ignore_index=True)\n",
167 |     "    else:\n",
168 |     "        new_df = df\n",
169 |     "\n",
170 |     "    # Upload the updated data to Google Cloud Storage\n",
171 |     "    new_content = new_df.to_csv(index=False)\n",
172 |     "    blob.upload_from_string(new_content, content_type='text/csv')\n"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": null,
178 |    "id": "841229a7",
179 |    "metadata": {},
180 |    "outputs": [],
181 |    "source": [
182 |     "log_question_answer(query, answer)"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "id": "55f33167",
189 |    "metadata": {},
190 |    "outputs": [],
191 |    "source": []
192 |   }
193 |  ],
194 |  "metadata": {
195 |   "kernelspec": {
196 |    "display_name": "Python 3 (ipykernel)",
197 |    "language": "python",
198 |    "name": "python3"
199 |   },
200 |   "language_info": {
201 |    "codemirror_mode": {
202 |     "name": "ipython",
203 |     "version": 3
204 |    },
205 |    "file_extension": ".py",
206 |    "mimetype": "text/x-python",
207 |    "name": "python",
208 |    "nbconvert_exporter": "python",
209 |    "pygments_lexer": "ipython3",
210 |    "version": "3.10.6"
211 |   }
212 |  },
213 |  "nbformat": 4,
214 |  "nbformat_minor": 5
215 | }
216 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | aiohttp==3.8.4
  2 | aiosignal==1.3.1
  3 | altair==4.2.2
  4 | antlr4-python3-runtime==4.9.3
  5 | anyio==3.6.2
  6 | argilla==1.6.0
  7 | async-timeout==4.0.2
  8 | attrs==23.1.0
  9 | backoff==2.2.1
 10 | blinker==1.6.2
 11 | cachetools==5.3.0
 12 | certifi==2022.12.7
 13 | cffi==1.15.1
 14 | charset-normalizer==3.1.0
 15 | click==8.1.3
 16 | coloredlogs==15.0.1
 17 | commonmark==0.9.1
 18 | contourpy==1.0.7
 19 | cryptography==40.0.2
 20 | cycler==0.11.0
 21 | dataclasses-json==0.5.7
 22 | decorator==5.1.1
 23 | Deprecated==1.2.13
 24 | dnspython==2.3.0
 25 | effdet==0.3.0
 26 | entrypoints==0.4
 27 | et-xmlfile==1.1.0
 28 | fastapi==0.95.1
 29 | filelock==3.12.0
 30 | flatbuffers==23.3.3
 31 | fonttools==4.39.3
 32 | frozenlist==1.3.3
 33 | fsspec==2023.4.0
 34 | gitdb==4.0.10
 35 | GitPython==3.1.31
 36 | google-api-core==2.11.0
 37 | google-auth==2.17.3
 38 | google-cloud-core==2.3.2
 39 | google-cloud-secret-manager==2.16.1
 40 | google-cloud-storage==2.8.0
 41 | google-crc32c==1.5.0
 42 | google-resumable-media==2.5.0
 43 | googleapis-common-protos==1.59.0
 44 | grpc-google-iam-v1==0.12.6
 45 | grpcio==1.54.0
 46 | grpcio-status==1.48.2
 47 | h11==0.14.0
 48 | httpcore==0.16.3
 49 | httpx==0.23.3
 50 | huggingface-hub==0.14.1
 51 | humanfriendly==10.0
 52 | idna==3.4
 53 | importlib-metadata==6.6.0
 54 | iopath==0.1.10
 55 | Jinja2==3.1.2
 56 | joblib==1.2.0
 57 | jsonschema==4.17.3
 58 | kiwisolver==1.4.4
 59 | langchain==0.0.153
 60 | layoutparser==0.3.4
 61 | loguru==0.7.0
 62 | lxml==4.9.2
 63 | Markdown==3.4.3
 64 | markdown-it-py==2.2.0
 65 | MarkupSafe==2.1.2
 66 | marshmallow==3.19.0
 67 | marshmallow-enum==1.5.1
 68 | matplotlib==3.7.1
 69 | mdurl==0.1.2
 70 | monotonic==1.6
 71 | mpmath==1.3.0
 72 | msg-parser==1.2.0
 73 | multidict==6.0.4
 74 | mypy-extensions==1.0.0
 75 | networkx==3.1
 76 | nltk==3.8.1
 77 | numexpr==2.8.4
 78 | numpy==1.23.5
 79 | olefile==0.46
 80 | omegaconf==2.3.0
 81 | onnxruntime==1.14.1
 82 | openai==0.27.5
 83 | openapi-schema-pydantic==1.2.4
 84 | opencv-python==4.7.0.72
 85 | openpyxl==3.1.2
 86 | packaging==23.1
 87 | pandas==1.5.3
 88 | pdf2image==1.16.3
 89 | pdfminer.six==20221105
 90 | pdfplumber==0.9.0
 91 | Pillow==9.5.0
 92 | pinecone-client==2.2.1
 93 | portalocker==2.7.0
 94 | proto-plus==1.22.2
 95 | protobuf==3.20.3
 96 | pyarrow==11.0.0
 97 | pyasn1==0.5.0
 98 | pyasn1-modules==0.3.0
 99 | pycocotools==2.0.6
100 | pycparser==2.21
101 | pydantic==1.10.7
102 | pydeck==0.8.1b0
103 | Pygments==2.15.1
104 | Pympler==1.0.1
105 | pypandoc==1.11
106 | pyparsing==3.0.9
107 | pyrsistent==0.19.3
108 | pytesseract==0.3.10
109 | python-dateutil==2.8.2
110 | python-docx==0.8.11
111 | python-magic==0.4.27
112 | python-multipart==0.0.6
113 | python-pptx==0.6.21
114 | pytz==2023.3
115 | pytz-deprecation-shim==0.1.0.post0
116 | PyYAML==6.0
117 | regex==2023.3.23
118 | requests==2.29.0
119 | rfc3986==1.5.0
120 | rich==13.0.1
121 | rsa==4.9
122 | scipy==1.10.1
123 | six==1.16.0
124 | smmap==5.0.0
125 | sniffio==1.3.0
126 | SQLAlchemy==2.0.11
127 | starlette==0.26.1
128 | streamlit==1.22.0
129 | sympy==1.11.1
130 | tenacity==8.2.2
131 | tiktoken==0.3.3
132 | timm==0.6.13
133 | tokenizers==0.13.3
134 | toml==0.10.2
135 | toolz==0.12.0
136 | torch==2.0.0
137 | torchvision==0.15.1
138 | tornado==6.3.1
139 | tqdm==4.65.0
140 | transformers==4.28.1
141 | typing-inspect==0.8.0
142 | typing_extensions==4.5.0
143 | tzdata==2023.3
144 | tzlocal==4.3
145 | unstructured==0.6.2
146 | unstructured-inference==0.4.2
147 | urllib3==1.26.15
148 | uvicorn==0.22.0
149 | validators==0.20.0
150 | Wand==0.6.11
151 | wrapt==1.14.1
152 | XlsxWriter==3.1.0
153 | yarl==1.9.2
154 | zipp==3.15.0
155 | 


--------------------------------------------------------------------------------