├── .gitignore
├── LICENSE
├── README.md
├── app.py
├── ingest.py
├── pet.pdf
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 AI Anytime
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Zephyr-7B-beta-RAG-Demo
2 | Zephyr 7B beta RAG Demo inside a Gradio app powered by BGE Embeddings, ChromaDB, and Zephyr 7B Beta.
3 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | from langchain import PromptTemplate, LLMChain
  2 | from langchain.llms import CTransformers
  3 | import os
  4 | from langchain.text_splitter import RecursiveCharacterTextSplitter
  5 | from langchain.vectorstores import Chroma
  6 | from langchain.chains import RetrievalQA
  7 | from langchain.embeddings import HuggingFaceBgeEmbeddings
  8 | from io import BytesIO
  9 | from langchain.document_loaders import PyPDFLoader
 10 | import gradio as gr
 11 | 
 12 | 
 13 | local_llm = "zephyr-7b-beta.Q5_K_S.gguf"
 14 | 
 15 | config = {
 16 | 'max_new_tokens': 1024,
 17 | 'repetition_penalty': 1.1,
 18 | 'temperature': 0.1,
 19 | 'top_k': 50,
 20 | 'top_p': 0.9,
 21 | 'stream': True,
 22 | 'threads': int(os.cpu_count() / 2)
 23 | }
 24 | 
 25 | llm = CTransformers(
 26 |     model=local_llm,
 27 |     model_type="mistral",
 28 |     lib="avx2", #for CPU use
 29 |     **config
 30 | )
 31 | 
 32 | print("LLM Initialized...")
 33 | 
 34 | 
 35 | prompt_template = """Use the following pieces of information to answer the user's question.
 36 | If you don't know the answer, just say that you don't know, don't try to make up an answer.
 37 | 
 38 | Context: {context}
 39 | Question: {question}
 40 | 
 41 | Only return the helpful answer below and nothing else.
 42 | Helpful answer:
 43 | """
 44 | 
 45 | model_name = "BAAI/bge-large-en"
 46 | model_kwargs = {'device': 'cpu'}
 47 | encode_kwargs = {'normalize_embeddings': False}
 48 | embeddings = HuggingFaceBgeEmbeddings(
 49 |     model_name=model_name,
 50 |     model_kwargs=model_kwargs,
 51 |     encode_kwargs=encode_kwargs
 52 | )
 53 | 
 54 | 
 55 | prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
 56 | load_vector_store = Chroma(persist_directory="stores/pet_cosine", embedding_function=embeddings)
 57 | retriever = load_vector_store.as_retriever(search_kwargs={"k":1})
 58 | # query = "what is the fastest speed for a greyhound dog?"
 59 | # semantic_search = retriever.get_relevant_documents(query)
 60 | # print(semantic_search)
 61 | 
 62 | print("######################################################################")
 63 | 
 64 | chain_type_kwargs = {"prompt": prompt}
 65 | 
 66 | # qa = RetrievalQA.from_chain_type(
 67 | #     llm=llm,
 68 | #     chain_type="stuff",
 69 | #     retriever=retriever,
 70 | #     return_source_documents = True,
 71 | #     chain_type_kwargs= chain_type_kwargs,
 72 | #     verbose=True
 73 | # )
 74 | 
 75 | # response = qa(query)
 76 | 
 77 | # print(response)
 78 | 
 79 | sample_prompts = ["what is the fastest speed for a greyhound dog?", "Why should we not feed chocolates to the dogs?", "Name two factors which might contribute to why some dogs might get scared?"]
 80 | 
 81 | def get_response(input):
 82 |   query = input
 83 |   chain_type_kwargs = {"prompt": prompt}
 84 |   qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True, chain_type_kwargs=chain_type_kwargs, verbose=True)
 85 |   response = qa(query)
 86 |   return response
 87 | 
 88 | input = gr.Text(
 89 |                 label="Prompt",
 90 |                 show_label=False,
 91 |                 max_lines=1,
 92 |                 placeholder="Enter your prompt",
 93 |                 container=False,
 94 |             )
 95 | 
 96 | iface = gr.Interface(fn=get_response, 
 97 |              inputs=input, 
 98 |              outputs="text",
 99 |              title="My Dog PetCare Bot",
100 |              description="This is a RAG implementation based on Zephyr 7B Beta LLM.",
101 |              examples=sample_prompts,
102 |              allow_screenshot=False,
103 |              allow_flagging=False
104 |              )
105 | 
106 | iface.launch()
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 |             
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/ingest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 3 | from langchain.vectorstores import Chroma
 4 | from langchain.embeddings import HuggingFaceBgeEmbeddings
 5 | from langchain.document_loaders import PyPDFLoader
 6 | 
 7 | model_name = "BAAI/bge-large-en"
 8 | model_kwargs = {'device': 'cpu'}
 9 | encode_kwargs = {'normalize_embeddings': False}
10 | embeddings = HuggingFaceBgeEmbeddings(
11 |     model_name=model_name,
12 |     model_kwargs=model_kwargs,
13 |     encode_kwargs=encode_kwargs
14 | )
15 | 
16 | loader = PyPDFLoader("pet.pdf")
17 | documents = loader.load()
18 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
19 | texts = text_splitter.split_documents(documents)
20 | 
21 | vector_store = Chroma.from_documents(texts, embeddings, collection_metadata={"hnsw:space": "cosine"}, persist_directory="stores/pet_cosine")
22 | 
23 | print("Vector Store Created.......")


--------------------------------------------------------------------------------
/pet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIAnytime/Zephyr-7B-beta-RAG-Demo/efa86fa00c8a829536d7070dc8ad4970381de30d/pet.pdf


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | chainlit 
2 | ctransformers
3 | torch
4 | sentence_transformers
5 | chromadb 
6 | langchain
7 | pypdf 
8 | PyPDF2
9 | 


--------------------------------------------------------------------------------