├── eu_commission_act.ipynb
├── .gitignore
├── streamlit.gif
├── dataset_vectorizers.py
├── pdf_loaders.py
├── README.md
├── app.py
└── freelancer_plan_compare.ipynb


/eu_commission_act.ipynb:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .chroma
2 | *__pycache__
3 | data/
4 | config.yml
5 | sw_vs_allianz.ipynb


--------------------------------------------------------------------------------
/streamlit.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rachnog/pdfs_question_answering/HEAD/streamlit.gif


--------------------------------------------------------------------------------
/dataset_vectorizers.py:
--------------------------------------------------------------------------------
 1 | from langchain.embeddings.openai import OpenAIEmbeddings
 2 | from langchain.vectorstores import Chroma
 3 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 4 | from langchain.document_loaders import TextLoader
 5 | 
 6 | class DatasetVectorizer:
 7 |     """
 8 |         A class for vectorizing datasets.
 9 |     """
10 |     def __init__(self):
11 |         pass
12 | 
13 |     def vectorize(self, text_file_paths, chunk_size=1000, chunk_overlap=500, openai_key=""):
14 |         documents = []
15 |         for text_file_path in text_file_paths:
16 |             doc_loader = TextLoader(text_file_path)
17 |             documents.extend(doc_loader.load())
18 | 
19 |         text_splitter = RecursiveCharacterTextSplitter(chunk_overlap=chunk_overlap, chunk_size=chunk_size)
20 |         texts = text_splitter.split_documents(documents)
21 | 
22 |         embeddings = OpenAIEmbeddings(openai_api_key=openai_key)
23 |         docsearch = Chroma.from_documents(texts, embeddings)
24 | 
25 |         return documents, texts, docsearch


--------------------------------------------------------------------------------
/pdf_loaders.py:
--------------------------------------------------------------------------------
 1 | # code for loading pdfs and saving them as texts
 2 | 
 3 | import PyPDF2
 4 | 
 5 | class PdfToTextLoader:
 6 |     """
 7 |         Class for loading pdfs and saving them as texts
 8 |     """
 9 | 
10 |     def __init__(self, pdf_path, output_path):
11 |         """
12 |             Args:
13 |                 pdf_path (str): path to pdf file
14 |                 output_path (str): path to save text file
15 |         """
16 |         self.pdf_path = pdf_path
17 |         self.output_path = output_path
18 | 
19 |     def load_pdf(self):
20 |         """
21 |             Loads pdf file and saves it as text file
22 |         """
23 |         with open(self.pdf_path, 'rb') as pdf_file:
24 |             pdf_reader = PyPDF2.PdfReader(pdf_file)
25 |             num_pages = len(pdf_reader.pages)
26 |             text = ''
27 |             for page in range(num_pages):
28 |                 page = pdf_reader.pages[page]
29 |                 text += page.extract_text()
30 |             with open(self.output_path, 'w') as text_file:
31 |                 text_file.write(text)
32 |         return text


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Talk to your insurance PDFs 🏥
 2 | 
 3 | This repository contains a Python script 🐍 which fetches and compares health insurance plans offered by two companies: EmblemHealth and MetroPlus. The script is designed to automate the process of gathering information and making comparisons, providing a detailed summary of the offerings from each company.
 4 | 
 5 | The Python code uses various libraries like OpenAI, Langchain, Tiktoken, and Yaml to help in this process. The main highlight of this code is the ability to ask a set of defined questions to each insurance policy, and get detailed answers using language models. 👥
 6 | 
 7 | ## What does the script do? 🤔
 8 | 
 9 | Loading Data from PDFs: 📄 The script takes as input PDF documents from the two insurance companies and converts them into text data using the PdfToTextLoader.
10 | 
11 | Vectorizing dataset: 📊 The script then vectorizes the text data. This involves transforming the text data into a form that machine learning algorithms can understand.
12 | 
13 | Asking questions: ❓ The script then asks a set of predefined questions about the insurance policies of each company. These questions relate to details about deductibles, coverage, limitations, and other important aspects.
14 | 
15 | Comparing Answers: 🆚 After gathering the responses, the script summarizes the answers from each company, providing a quick and easy way to compare the offerings of each company.
16 | 
17 | Ratings: ⭐️ The script also provides a rating system that grades each insurance policy in terms of coverage of different health procedures, flexibility for remote workers abroad, and price and compensation.
18 | 
19 | ## What does the app do?
20 | 
21 | ![](https://github.com/Rachnog/pdfs_question_answering/blob/main/streamlit.gif)
22 | 
23 | App Flow 🌊
24 | Upload Insurance Plans: Users upload two insurance plan documents in PDF format. They can also provide names for these plans.
25 | 
26 | PDF Loading and Vectorizing: The app converts PDF files into text, and then generates vector representations of the text for further processing.
27 | 
28 | User Input: Users provide a brief company description and a list of questions that they want to use to compare the plans. They also provide a set of final criteria for decision-making.
29 | 
30 | Processing and Answer Generation: The app processes the questions against each plan, gathers answers, and generates a final decision based on the provided criteria. It utilizes OpenAI's language model to analyze and understand the information in the documents and to create the final decision.
31 | 
32 | Output: The app outputs the answers to the provided questions and the final comparative decision based on the criteria. It presents these in an easy-to-understand format.
33 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | import yaml
  2 | import streamlit as st
  3 | 
  4 | from langchain import OpenAI, VectorDBQA, LLMChain
  5 | from langchain.prompts import PromptTemplate
  6 | 
  7 | from pdf_loaders import PdfToTextLoader
  8 | from dataset_vectorizers import DatasetVectorizer
  9 | 
 10 | with open("config.yml", "r") as f:
 11 |     config = yaml.safe_load(f)
 12 | 
 13 | OPENAI_API_KEY = config['OPENAI_KEY']
 14 | PDFS, NAMES, TXTS = [], [], []
 15 | CHUNK_SIZE = 1000
 16 | CHUNK_OVERLAP = 500
 17 | 
 18 | # ----- Header of the app -----
 19 | st.title("Comparing insurance plans")
 20 | st.write("This app compares insurance plans using the OpenAI API. It is a prototype and not intended for actual use.")
 21 | 
 22 | # ----- Select and upload the files one by one -----
 23 | st.header("Select the files to compare")
 24 | st.write("The files should be in PDF format.")
 25 | file_1 = st.file_uploader("File 1")
 26 | file_2 = st.file_uploader("File 2")
 27 | name_1 = st.text_input("Name of file 1", value="Plan 1")
 28 | name_2 = st.text_input("Name of file 2", value="Plan 2")
 29 | 
 30 | # ----- Load the files -----
 31 | if file_1 and file_2:
 32 | 
 33 |     with open("./data/" + file_1.name, "wb") as f:
 34 |         f.write(file_1.getbuffer())
 35 | 
 36 |     with open("./data/" + file_2.name, "wb") as f:
 37 |         f.write(file_2.getbuffer())
 38 | 
 39 |     PDFS = ["./data/" + file_1.name, "./data/" + file_2.name]
 40 |     NAMES = [name_1, name_2]
 41 | 
 42 |     for pdf_path in PDFS:
 43 |         txt_path = pdf_path.replace(".pdf", ".txt")
 44 |         pdf_loader = PdfToTextLoader(pdf_path, txt_path)
 45 |         text = pdf_loader.load_pdf()
 46 |         TXTS.append(txt_path)
 47 |     st.write("Files loaded successfully.")
 48 | 
 49 |     dataset_vectorizer = DatasetVectorizer()
 50 |     documents_1, texts_1, docsearch_1 = dataset_vectorizer.vectorize([TXTS[0]], chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, openai_key=OPENAI_API_KEY)
 51 |     documents_2, texts_2, docsearch_2 = dataset_vectorizer.vectorize([TXTS[1]], chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, openai_key=OPENAI_API_KEY)
 52 |     llm = OpenAI(model_name='text-davinci-003', temperature=0, openai_api_key=OPENAI_API_KEY)
 53 |     qa_chain_1 = VectorDBQA.from_chain_type(llm=llm, chain_type='stuff', vectorstore=docsearch_1)
 54 |     qa_chain_2 = VectorDBQA.from_chain_type(llm=llm, chain_type='stuff', vectorstore=docsearch_2)
 55 |     st.write("Files vectorized successfully.")
 56 | 
 57 |     # ----- Write questions separated by a new line -----
 58 |     st.header("Write the questions to generate a summary")
 59 | 
 60 |     st.write("Brief company description")
 61 |     company_description = st.text_area("Brief company description", value="full-remote consulting company with 100 employees")
 62 | 
 63 |     st.write("The questions should be separated by a new line.")
 64 |     questions = st.text_area("Questions", 
 65 |                              value = """
 66 | How good are the deductibles?
 67 | How is the preventive care coverage?
 68 | How this plan fits for remote workers in the US and abroad?
 69 | What is the maximum money amount that can be compensated?
 70 | Can I go to any hospital of my choice?
 71 | Are there any limitations that won\'t allow to use the insurance?
 72 | Does it cover the family members of the applicant?
 73 | What are the healthcare procedures that are not covered by the insurance?
 74 | Can I use the insurance for the dental care?
 75 | Can I use the insurance in other countries?""")
 76 |     QUESTIONS = questions.split("\n")
 77 |     QUESTIONS = [q.strip() for q in QUESTIONS if len(q) > 0]
 78 | 
 79 |     # ----- Select final criteria for decision-making -----
 80 |     st.header("Select the final criteria for decision-making")
 81 |     st.write("The criteria should be separated by a new line.")
 82 |     criteria = st.text_area("Criteria", value="""
 83 | 1. Coverage of different health procedures
 84 | 2. Flexibility for remote workers abroad
 85 | 3. Price and compensation""")
 86 |     CRITERIA = criteria.split("\n")
 87 |     CRITERIA = [c.strip() for c in CRITERIA if len(c) > 0]
 88 |     final_criteria = "".join([f"{i}. {c}\n" for i, c in enumerate(CRITERIA, 1)])
 89 | 
 90 |     # ----- Generate the intermediate answers for the document summary -----
 91 |     summary_of_answers = ""
 92 |     for q in QUESTIONS:
 93 |         print(q)
 94 |         answer_1, answer_2 = qa_chain_1.run(q), qa_chain_2.run(q)
 95 |         summary_of_answers += "Question: " + q + "\n"
 96 |         summary_of_answers += f"{NAMES[0]} answer: " + answer_1 + f";\n {NAMES[1]} answer: " + answer_2 + "\n"
 97 |         
 98 |     template = """
 99 |         I want you to act as an expert in insurance policies. I have asked two companies about their insurance policies and here are their answers:
100 |         {summary_of_answers}
101 |         I am looking for insurance for a {company_description}. I want you to tell me which company is better and why.
102 |         Give me a rating (x out of 10) for the following categories for each company separately with a short explanation (10 words max) for each category:
103 |         {final_criteria}
104 |         Your answer and final recommendation after the rating:
105 |         """
106 |     
107 |     prompt = PromptTemplate(
108 |         input_variables=["summary_of_answers", "company_description", "final_criteria"],
109 |         template=template,
110 |     )
111 |     
112 |     answer = ""
113 |     llm = OpenAI(model_name='text-davinci-003', temperature=0, openai_api_key=OPENAI_API_KEY, request_timeout=60)
114 |     chain = LLMChain(llm=llm, prompt=prompt)
115 |     answer = chain.run({"summary_of_answers": summary_of_answers, "final_criteria": final_criteria, "company_description": company_description})
116 | 
117 |     # ----- Generate the final answer -----
118 |     st.header("Final answer")
119 |     st.write(answer)
120 | 
121 | 
122 | 
123 | 


--------------------------------------------------------------------------------
/freelancer_plan_compare.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import yaml\n",
 10 |     "import tiktoken\n",
 11 |     "\n",
 12 |     "from langchain import OpenAI, VectorDBQA, LLMChain\n",
 13 |     "from langchain.prompts import PromptTemplate\n",
 14 |     "\n",
 15 |     "from pdf_loaders import PdfToTextLoader\n",
 16 |     "from dataset_vectorizers import DatasetVectorizer\n",
 17 |     "\n",
 18 |     "with open(\"config.yml\", \"r\") as f:\n",
 19 |     "    config = yaml.safe_load(f)\n",
 20 |     "\n",
 21 |     "OPENAI_API_KEY = config['OPENAI_KEY']"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "attachments": {},
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "## Loading data from PDF"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 51,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "# data taken from https://www.freelancersunion.org/insurance/health/bronze-plans-nyc/\n",
 39 |     "PDFS, NAMES, TXTS  = [\n",
 40 |     "    './data/2023_EmblemHealth_Bronze_D.pdf',\n",
 41 |     "    './data/BH2_SB_OX_2023_v1_-_Bronze_HSA_Non-Standard_Off-Exchange.pdf'\n",
 42 |     "], [\n",
 43 |     "    'EmblemHealth', \n",
 44 |     "    'MetroPlus'\n",
 45 |     "], []\n"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 52,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "for pdf_path in PDFS:\n",
 55 |     "    txt_path = pdf_path.replace(\".pdf\", \".txt\")\n",
 56 |     "    pdf_loader = PdfToTextLoader(pdf_path, txt_path)\n",
 57 |     "    text = pdf_loader.load_pdf()\n",
 58 |     "    TXTS.append(txt_path)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "attachments": {},
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "## Vectorizing dataset"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 53,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "CHUNK_SIZE = 1000\n",
 76 |     "CHUNK_OVERLAP = 500"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 54,
 82 |    "metadata": {},
 83 |    "outputs": [
 84 |     {
 85 |      "name": "stderr",
 86 |      "output_type": "stream",
 87 |      "text": [
 88 |       "Using embedded DuckDB without persistence: data will be transient\n",
 89 |       "Using embedded DuckDB without persistence: data will be transient\n"
 90 |      ]
 91 |     }
 92 |    ],
 93 |    "source": [
 94 |     "dataset_vectorizer = DatasetVectorizer()\n",
 95 |     "documents_1, texts_1, docsearch_1 = dataset_vectorizer.vectorize([TXTS[0]], chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, openai_key=OPENAI_API_KEY)\n",
 96 |     "documents_2, texts_2, docsearch_2 = dataset_vectorizer.vectorize([TXTS[1]], chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, openai_key=OPENAI_API_KEY)"
 97 |    ]
 98 |   },
 99 |   {
100 |    "attachments": {},
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "## Asking questions"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 55,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "QUESTIONS = [\n",
114 |     "    'How good are the deductibles?',\n",
115 |     "    \"How is the preventive care coverage?\",\n",
116 |     "    'How this plan fits for remote workers in the US and abroad?',\n",
117 |     "    'What is the maximum money amount that can be compensated?',\n",
118 |     "    'Can I go to any hospital of my choice?',\n",
119 |     "    'Are there any limitations that won\\'t allow to use the insurance?',\n",
120 |     "    'Does it cover the family members of the applicant?',\n",
121 |     "    'What are the healthcare procedures that are not covered by the insurance?',\n",
122 |     "    'Can I use the insurance for the dental care?',\n",
123 |     "    'Can I use the insurance in other countries?'\n",
124 |     "]"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 56,
130 |    "metadata": {},
131 |    "outputs": [
132 |     {
133 |      "name": "stderr",
134 |      "output_type": "stream",
135 |      "text": [
136 |       "/Users/oleksandrhonchar/miniforge3/lib/python3.9/site-packages/langchain/chains/retrieval_qa/base.py:185: UserWarning: `VectorDBQA` is deprecated - please use `from langchain.chains import RetrievalQA`\n",
137 |       "  warnings.warn(\n"
138 |      ]
139 |     }
140 |    ],
141 |    "source": [
142 |     "llm = OpenAI(model_name='text-davinci-003', temperature=0, openai_api_key=OPENAI_API_KEY)\n",
143 |     "qa_chain_1 = VectorDBQA.from_chain_type(llm=llm, chain_type='stuff', vectorstore=docsearch_1)\n",
144 |     "qa_chain_2 = VectorDBQA.from_chain_type(llm=llm, chain_type='stuff', vectorstore=docsearch_2)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 57,
150 |    "metadata": {},
151 |    "outputs": [
152 |     {
153 |      "name": "stdout",
154 |      "output_type": "stream",
155 |      "text": [
156 |       "How good are the deductibles?\n",
157 |       "EmblemHealth  The overall deductible is $4,700 individual / $9,400 family. There are no other specific deductibles. Certain preventive services are covered without cost-sharing and before you meet your deductible.\n",
158 |       "MetroPlus  The deductibles are $6,100 for individuals and $12,200 for families.\n",
159 |       "----------\n",
160 |       "How is the preventive care coverage?\n",
161 |       "EmblemHealth  This plan covers certain preventive services without cost-sharing and before you meet your deductible. See a list of covered preventive services at https://www.healthcare.gov/coverage/preventive-care-benefits/.\n",
162 |       "MetroPlus  Preventive care is covered in full for participating providers. Non-participating providers are not covered and the member is responsible for the full cost.\n",
163 |       "----------\n",
164 |       "How this plan fits for remote workers in the US and abroad?\n",
165 |       "EmblemHealth  This plan provides Minimum Essential Coverage and meets the Minimum Value Standard. It includes coverage for abortion services, bariatric surgery, chiropractic care, hearing aids, and infertility treatment. However, it does not cover dental care, long-term care, non-emergency care when traveling outside the US, private-duty nursing, routine foot care, routine hearing tests, and weight loss programs. Language access services are available in Spanish, Tagalog, Chinese, and Navajo. For more information about limitations and exceptions, see the plan or policy document at www.emblemhealth.com.\n",
166 |       "MetroPlus  This plan does not provide any specific coverage for remote workers in the US or abroad.\n",
167 |       "----------\n",
168 |       "What is the maximum money amount that can be compensated?\n",
169 |       "EmblemHealth  $4,840\n",
170 |       "MetroPlus  $200 per six (6) month period, plus an additional $100 per six (6) month period for a spouse.\n",
171 |       "----------\n",
172 |       "Can I go to any hospital of my choice?\n",
173 |       "EmblemHealth  No, you need to use a provider in the plan's network to pay less. You will pay the most if you use a non-participating provider, and you might receive a bill from a provider for the difference between the provider’s charge and what your plan pays (balance billing).\n",
174 |       "MetroPlus  No, you must go to a participating provider in order to be covered by the insurance. Non-participating provider services are not covered and you must pay the full cost.\n",
175 |       "----------\n",
176 |       "Are there any limitations that won't allow to use the insurance?\n",
177 |       "EmblemHealth  Yes, there are limitations that won't allow you to use the insurance. These include services such as acupuncture, cosmetic surgery, dental care, long-term care, non-emergency care when traveling outside the U.S., private-duty nursing, routine foot care, routine hearing tests, and weight loss programs.\n",
178 |       "MetroPlus  Yes, services provided by non-participating providers are not covered and you have to pay the full cost. Additionally, there is a limit of 10 visits per calendar year for non-participating providers.\n",
179 |       "----------\n",
180 |       "Does it cover the family members of the applicant?\n",
181 |       "EmblemHealth  Yes, the overall family deductible must be met before the plan begins to pay.\n",
182 |       "MetroPlus  No, it does not cover the family members of the applicant.\n",
183 |       "----------\n",
184 |       "What are the healthcare procedures that are not covered by the insurance?\n",
185 |       "EmblemHealth  Visiting a health care provider's office or clinic, diagnostic tests, imaging, prescription drugs, skilled nursing care, durable medical equipment, hospice services, children's eye exam, children's glasses, children's dental check-up, home health care, rehabilitation services, and habilitation services are not covered by the insurance.\n",
186 |       "MetroPlus  Mammograms, Sterilization Procedures for Women, Vasectomy, Bone Density Testing, Screening for Prostate Cancer, All other preventive services required by USPSTF and HRSA, Home infusion, Inpatient Medical Visits, Interruption of Pregnancy, Medically Necessary Abortions, Elective Abortions, Laboratory Procedures performed in a PCP Office, Laboratory Procedures performed in a Specialist Office, Laboratory Procedures performed in a Freestanding Laboratory Facility, Dialysis performed by a Specialist Office, Dialysis performed in a Freestanding Center, Dialysis performed as Outpatient Hospital Services, Dialysis performed at Home.\n",
187 |       "----------\n",
188 |       "Can I use the insurance for the dental care?\n",
189 |       "EmblemHealth  Yes, this plan covers children's dental check-ups with a $50 copayment after the deductible is met.\n",
190 |       "MetroPlus  Yes, you can use the insurance for the dental care. The participating provider member responsibility for cost-sharing is 50% coinsurance after the deductible for preventive, routine, major dental care, and orthodontics. Non-participating provider services are not covered and you pay the full cost.\n",
191 |       "----------\n",
192 |       "Can I use the insurance in other countries?\n",
193 |       "EmblemHealth  No, this plan does not cover non-emergency care when traveling outside the U.S.\n",
194 |       "MetroPlus  No, this insurance plan does not cover services outside of the United States.\n",
195 |       "----------\n"
196 |      ]
197 |     }
198 |    ],
199 |    "source": [
200 |     "summary_of_answers = \"\"\n",
201 |     "for q in QUESTIONS:\n",
202 |     "    print(q)\n",
203 |     "    answer_1, answer_2 = qa_chain_1.run(q), qa_chain_2.run(q)\n",
204 |     "    summary_of_answers += \"Question: \" + q + \"\\n\"\n",
205 |     "    summary_of_answers += f\"{NAMES[0]} answer: \" + answer_1 + f\";\\n {NAMES[1]} answer: \" + answer_2 + \"\\n\"\n",
206 |     "    print(NAMES[0], answer_1)\n",
207 |     "    print(NAMES[1], answer_2)\n",
208 |     "    print('-' * 10)"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": 58,
214 |    "metadata": {},
215 |    "outputs": [
216 |     {
217 |      "data": {
218 |       "text/plain": [
219 |        "1050"
220 |       ]
221 |      },
222 |      "execution_count": 58,
223 |      "metadata": {},
224 |      "output_type": "execute_result"
225 |     }
226 |    ],
227 |    "source": [
228 |     "encoder = tiktoken.encoding_for_model(\"gpt-3.5-turbo\")\n",
229 |     "len(encoder.encode(summary_of_answers))"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": 59,
235 |    "metadata": {},
236 |    "outputs": [
237 |     {
238 |      "name": "stdout",
239 |      "output_type": "stream",
240 |      "text": [
241 |       "Question: How good are the deductibles?\n",
242 |       "EmblemHealth answer:  The overall deductible is $4,700 individual / $9,400 family. There are no other specific deductibles. Certain preventive services are covered without cost-sharing and before you meet your deductible.;\n",
243 |       " MetroPlus answer:  The deductibles are $6,100 for individuals and $12,200 for families.\n",
244 |       "Question: How is the preventive care coverage?\n",
245 |       "EmblemHealth answer:  This plan covers certain preventive services without cost-sharing and before you meet your deductible. See a list of covered preventive services at https://www.healthcare.gov/coverage/preventive-care-benefits/.;\n",
246 |       " MetroPlus answer:  Preventive care is covered in full for participating providers. Non-participating providers are not covered and the member is responsible for the full cost.\n",
247 |       "Question: How this plan fits for remote workers in the US and abroad?\n",
248 |       "EmblemHealth answer:  This plan provides Minimum Essential Coverage and meets the Minimum Value Standard. It includes coverage for abortion services, bariatric surgery, chiropractic care, hearing aids, and infertility treatment. However, it does not cover dental care, long-term care, non-emergency care when traveling outside the US, private-duty nursing, routine foot care, routine hearing tests, and weight loss programs. Language access services are available in Spanish, Tagalog, Chinese, and Navajo. For more information about limitations and exceptions, see the plan or policy document at www.emblemhealth.com.;\n",
249 |       " MetroPlus answer:  This plan does not provide any specific coverage for remote workers in the US or abroad.\n",
250 |       "Question: What is the maximum money amount that can be compensated?\n",
251 |       "EmblemHealth answer:  $4,840;\n",
252 |       " MetroPlus answer:  $200 per six (6) month period, plus an additional $100 per six (6) month period for a spouse.\n",
253 |       "Question: Can I go to any hospital of my choice?\n",
254 |       "EmblemHealth answer:  No, you need to use a provider in the plan's network to pay less. You will pay the most if you use a non-participating provider, and you might receive a bill from a provider for the difference between the provider’s charge and what your plan pays (balance billing).;\n",
255 |       " MetroPlus answer:  No, you must go to a participating provider in order to be covered by the insurance. Non-participating provider services are not covered and you must pay the full cost.\n",
256 |       "Question: Are there any limitations that won't allow to use the insurance?\n",
257 |       "EmblemHealth answer:  Yes, there are limitations that won't allow you to use the insurance. These include services such as acupuncture, cosmetic surgery, dental care, long-term care, non-emergency care when traveling outside the U.S., private-duty nursing, routine foot care, routine hearing tests, and weight loss programs.;\n",
258 |       " MetroPlus answer:  Yes, services provided by non-participating providers are not covered and you have to pay the full cost. Additionally, there is a limit of 10 visits per calendar year for non-participating providers.\n",
259 |       "Question: Does it cover the family members of the applicant?\n",
260 |       "EmblemHealth answer:  Yes, the overall family deductible must be met before the plan begins to pay.;\n",
261 |       " MetroPlus answer:  No, it does not cover the family members of the applicant.\n",
262 |       "Question: What are the healthcare procedures that are not covered by the insurance?\n",
263 |       "EmblemHealth answer:  Visiting a health care provider's office or clinic, diagnostic tests, imaging, prescription drugs, skilled nursing care, durable medical equipment, hospice services, children's eye exam, children's glasses, children's dental check-up, home health care, rehabilitation services, and habilitation services are not covered by the insurance.;\n",
264 |       " MetroPlus answer:  Mammograms, Sterilization Procedures for Women, Vasectomy, Bone Density Testing, Screening for Prostate Cancer, All other preventive services required by USPSTF and HRSA, Home infusion, Inpatient Medical Visits, Interruption of Pregnancy, Medically Necessary Abortions, Elective Abortions, Laboratory Procedures performed in a PCP Office, Laboratory Procedures performed in a Specialist Office, Laboratory Procedures performed in a Freestanding Laboratory Facility, Dialysis performed by a Specialist Office, Dialysis performed in a Freestanding Center, Dialysis performed as Outpatient Hospital Services, Dialysis performed at Home.\n",
265 |       "Question: Can I use the insurance for the dental care?\n",
266 |       "EmblemHealth answer:  Yes, this plan covers children's dental check-ups with a $50 copayment after the deductible is met.;\n",
267 |       " MetroPlus answer:  Yes, you can use the insurance for the dental care. The participating provider member responsibility for cost-sharing is 50% coinsurance after the deductible for preventive, routine, major dental care, and orthodontics. Non-participating provider services are not covered and you pay the full cost.\n",
268 |       "Question: Can I use the insurance in other countries?\n",
269 |       "EmblemHealth answer:  No, this plan does not cover non-emergency care when traveling outside the U.S.;\n",
270 |       " MetroPlus answer:  No, this insurance plan does not cover services outside of the United States.\n",
271 |       "\n"
272 |      ]
273 |     }
274 |    ],
275 |    "source": [
276 |     "print(summary_of_answers)"
277 |    ]
278 |   },
279 |   {
280 |    "attachments": {},
281 |    "cell_type": "markdown",
282 |    "metadata": {},
283 |    "source": [
284 |     "## Asking to compare the answers"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "code",
289 |    "execution_count": 60,
290 |    "metadata": {},
291 |    "outputs": [],
292 |    "source": [
293 |     "template = \"\"\"\n",
294 |     "I want you to act as an expert in insurance policies. I have asked two companies about their insurance policies and here are their answers:\n",
295 |     "{summary_of_answers}\n",
296 |     "I am looking for insurance for a full-remote consulting company with 100 employees. I want you to tell me which company is better and why.\n",
297 |     "Give me a rating (x out of 10) for the following categories for each company separately with a short explanation (10 words max) for each category:\n",
298 |     "1. Coverage of different health procedures\n",
299 |     "2. Flexibility for remote workers abroad\n",
300 |     "3. Price and compensation\n",
301 |     "Your answer:\n",
302 |     "\"\"\"\n",
303 |     "\n",
304 |     "prompt = PromptTemplate(\n",
305 |     "    input_variables=[\"summary_of_answers\"],\n",
306 |     "    template=template,\n",
307 |     ")"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "code",
312 |    "execution_count": 61,
313 |    "metadata": {},
314 |    "outputs": [],
315 |    "source": [
316 |     "llm = OpenAI(model_name='text-davinci-003', temperature=0, openai_api_key=OPENAI_API_KEY, request_timeout=60)\n",
317 |     "chain = LLMChain(llm=llm, prompt=prompt)"
318 |    ]
319 |   },
320 |   {
321 |    "cell_type": "code",
322 |    "execution_count": 62,
323 |    "metadata": {},
324 |    "outputs": [],
325 |    "source": [
326 |     "answer = chain.run(summary_of_answers)"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "code",
331 |    "execution_count": 63,
332 |    "metadata": {},
333 |    "outputs": [
334 |     {
335 |      "name": "stdout",
336 |      "output_type": "stream",
337 |      "text": [
338 |       "\n",
339 |       "EmblemHealth: \n",
340 |       "1. Coverage of different health procedures: 8/10 - covers most procedures. \n",
341 |       "2. Flexibility for remote workers abroad: 4/10 - no coverage outside US. \n",
342 |       "3. Price and compensation: 7/10 - reasonable deductible.\n",
343 |       "\n",
344 |       "MetroPlus: \n",
345 |       "1. Coverage of different health procedures: 6/10 - some procedures not covered. \n",
346 |       "2. Flexibility for remote workers abroad: 2/10 - no coverage outside US. \n",
347 |       "3. Price and compensation: 8/10 - reasonable deductible and compensation.\n",
348 |       "\n",
349 |       "Overall, EmblemHealth is the better option for a full-remote consulting company with 100 employees. It offers better coverage of different health procedures and a more reasonable price and compensation.\n"
350 |      ]
351 |     }
352 |    ],
353 |    "source": [
354 |     "print(answer)"
355 |    ]
356 |   },
357 |   {
358 |    "cell_type": "code",
359 |    "execution_count": null,
360 |    "metadata": {},
361 |    "outputs": [],
362 |    "source": []
363 |   }
364 |  ],
365 |  "metadata": {
366 |   "kernelspec": {
367 |    "display_name": "Python 3 (ipykernel)",
368 |    "language": "python",
369 |    "name": "python3"
370 |   },
371 |   "language_info": {
372 |    "codemirror_mode": {
373 |     "name": "ipython",
374 |     "version": 3
375 |    },
376 |    "file_extension": ".py",
377 |    "mimetype": "text/x-python",
378 |    "name": "python",
379 |    "nbconvert_exporter": "python",
380 |    "pygments_lexer": "ipython3",
381 |    "version": "3.9.6"
382 |   }
383 |  },
384 |  "nbformat": 4,
385 |  "nbformat_minor": 2
386 | }
387 | 


--------------------------------------------------------------------------------