├── .gitignore
├── README.md
├── file.pdf
├── main.py
├── requirements.txt
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | files/*.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # chatPDF
 2 | 
 3 | Load a PDF file and ask questions via llama_index and GPT
 4 | 
 5 | ## Instructions
 6 | 
 7 | - Install the requirements
 8 | 
 9 | ```bash
10 | pip install -r requirements.txt
11 | ```
12 | 
13 | - Get a GPT API key from [OpenAI](https://platform.openai.com/account/api-keys) if you don't have one already.
14 | 
15 | - Paste your API key in a file called `.env` in the root directory of the project.
16 | 
17 | ```bash
18 | OPENAI_API_KEY=<your key here>
19 | ```
20 | 
21 | - Select a file from the menu or replace the default file `file.pdf` with the PDF you want to use.
22 | 
23 | - Run the script.
24 | 
25 | ```bash
26 | python3 main.py
27 | ```
28 | 
29 | - Ask any questions about the content of the PDF.
30 | 
31 | - You can find other loaders at [Llama Hub](https://llamahub.ai/).
32 | 
33 | - Enjoy!
34 | 
35 | 


--------------------------------------------------------------------------------
/file.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gabacode/chatPDF/d2d1e1033d58ffc5c5c4647dc2e63c7ae2dc184c/file.pdf


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | logging.basicConfig(level=logging.CRITICAL)
 4 | 
 5 | import os
 6 | from pathlib import Path
 7 | 
 8 | import openai
 9 | from dotenv import load_dotenv
10 | from langchain.chat_models import ChatOpenAI
11 | from llama_index import (
12 |     GPTVectorStoreIndex,
13 |     LLMPredictor,
14 |     ServiceContext,
15 |     StorageContext,
16 |     download_loader,
17 |     load_index_from_storage,
18 | )
19 | from utils import CACHE, FILES, models, cls, handle_save, handle_exit, initialize, select_file
20 | 
21 | load_dotenv()
22 | openai.api_key = os.environ["OPENAI_API_KEY"]
23 | history = []
24 | 
25 | llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.618, model_name=models["gpt-3"], max_tokens=256))
26 | service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, chunk_size_limit=1024)
27 | 
28 | 
29 | def make_index(file):
30 |     cls()
31 |     print("👀 Loading...")
32 | 
33 |     PDFReader = download_loader("PDFReader")
34 |     loader = PDFReader()
35 |     documents = loader.load_data(file=Path(FILES) / file)
36 | 
37 |     if os.path.exists(Path(CACHE) / file):
38 |         print("📚 Index found in cache")
39 |         return
40 |     else:
41 |         print("📚 Index not found in cache, creating it...")
42 |         index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
43 |         index.storage_context.persist(persist_dir=Path(CACHE) / file)
44 | 
45 | 
46 | def chat(file_name, index):
47 |     while True:
48 |         prompt = input("\n😎 Prompt: ")
49 |         if prompt == "exit":
50 |             handle_exit()
51 |         elif prompt == "save":
52 |             handle_save(str(file_name), history)
53 | 
54 |         query_engine = index.as_query_engine(response_mode="compact")
55 | 
56 |         response = query_engine.query(prompt)
57 |         print("\n👻 Response: " + str(response))
58 |         history.append({"user": prompt, "response": str(response)})
59 | 
60 | 
61 | def ask(file_name):
62 |     try:
63 |         print("👀 Loading...")
64 |         storage_context = StorageContext.from_defaults(persist_dir=Path(CACHE) / file_name)
65 |         index = load_index_from_storage(storage_context, service_context=service_context)
66 |         cls()
67 |         print("✅ Ready! Let's start the conversation")
68 |         print("ℹ️ Press Ctrl+C to exit")
69 |         chat(file_name, index)
70 |     except KeyboardInterrupt:
71 |         handle_exit()
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     initialize()
76 |     file = select_file()
77 |     if file:
78 |         file_name = Path(file).name
79 |         make_index(file_name)
80 |         ask(file_name)
81 |     else:
82 |         print("No files found")
83 |         handle_exit()
84 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | langchain==0.0.194
2 | llama_index==0.6.15
3 | openai==0.27.2
4 | python-dotenv==1.0.0
5 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import sys
 4 | 
 5 | 
 6 | FILES = "./files"
 7 | CACHE = f"{FILES}/.cache"
 8 | 
 9 | models = {"davinci": "text-davinci-003", "gpt-3": "gpt-3.5-turbo"}
10 | 
11 | 
12 | def initialize():
13 |     if not os.path.exists(FILES):
14 |         os.mkdir(FILES)
15 |     if not os.path.exists(CACHE):
16 |         os.mkdir(CACHE)
17 | 
18 | def cls():
19 |     os.system('cls' if os.name=='nt' else 'clear')
20 | 
21 | 
22 | def select_file():
23 |     cls()
24 |     files = [file for file in os.listdir(FILES) if file.endswith(".pdf")]
25 |     if len(files) == 0:
26 |         return "file.pdf" if os.path.exists("file.pdf") else None
27 |     print("📁 Select a file")
28 |     for i, file in enumerate(files):
29 |         print(f"{i+1}. {file}")
30 |     print()
31 | 
32 |     try:
33 |         possible_selections = [i for i in range(len(files) + 1)]
34 |         selection = int(input("Enter a number, or 0 to exit: "))
35 |         if selection == 0:
36 |             handle_exit()
37 |         elif selection not in possible_selections:
38 |             select_file()
39 |         else:
40 |             file_path = os.path.abspath(os.path.join(FILES, files[selection - 1]))
41 |     except ValueError:
42 |         select_file()
43 | 
44 |     return file_path
45 | 
46 | 
47 | def handle_exit():
48 |     print("\nGoodbye!\n")
49 |     sys.exit(1)
50 | 
51 | def handle_save(title, history):
52 |     with open(f"{title}.json", "w") as f:
53 |         json.dump(history, f)


--------------------------------------------------------------------------------