├── ragnarok
    ├── img
    │   └── ragnarok.png
    ├── requirements.in
    ├── requirements.txt.orig
    ├── ollama_test.py
    ├── .vscode
    │   └── launch.json
    ├── pages
    │   ├── 2_Free_Chat.py
    │   └── 1_RAGnarok_Chat.py
    ├── requirements.txt
    └── RAGnarok_Settings.py
├── launch.sh
├── LICENSE
├── .gitignore
└── README.md


/ragnarok/img/ragnarok.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GhostPack/RAGnarok/HEAD/ragnarok/img/ragnarok.png


--------------------------------------------------------------------------------
/ragnarok/requirements.in:
--------------------------------------------------------------------------------
 1 | transformers
 2 | sentence-transformers
 3 | huggingface-hub
 4 | langchain
 5 | langchain-community
 6 | llama-cpp-python
 7 | streamlit
 8 | streamlit-extras
 9 | streamlit-cookies-manager
10 | ollama


--------------------------------------------------------------------------------
/ragnarok/requirements.txt.orig:
--------------------------------------------------------------------------------
1 | transformers==4.37.0
2 | sentence-transformers==2.2.2
3 | huggingface-hub==0.20.3
4 | langchain==0.1.2
5 | langchain-community==0.0.14
6 | llama-cpp-python==0.2.26
7 | streamlit==1.30.0
8 | streamlit-extras==0.3.6
9 | streamlit-cookies-manager==0.2.0


--------------------------------------------------------------------------------
/ragnarok/ollama_test.py:
--------------------------------------------------------------------------------
 1 | from ollama import Client
 2 | client = Client(
 3 |   host='http://10.0.0.11:11434',
 4 |   headers={}
 5 | )
 6 | response = client.chat(model='llama3.2', messages=[
 7 |   {
 8 |     'role': 'user',
 9 |     'content': 'Why is the sky blue?',
10 |   },
11 | ])


--------------------------------------------------------------------------------
/launch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # create the Python virtual env and install the requirements
 4 | cd ragnarok
 5 | sudo apt install python3.11-venv
 6 | python3 -m venv venv
 7 | source venv/bin/activate
 8 | pip3 install -r requirements.txt
 9 | 
10 | # kick off the main app
11 | streamlit run RAGnarok_Settings.py
12 | 


--------------------------------------------------------------------------------
/ragnarok/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // Use IntelliSense to learn about possible attributes.
 3 |     // Hover to view descriptions of existing attributes.
 4 |     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 | 
 8 |         {
 9 |             "name": "Python: Streamlit",
10 |             "type": "debugpy",
11 |             "request": "launch",
12 |             "module": "streamlit",
13 |             "args": [
14 |                 "run",
15 |                 "RAGnarok_Settings.py",
16 |                 "--server.runOnSave=true"
17 |             ],
18 |             "jinja": true
19 |         }
20 |     ]
21 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2024, GhostPack
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its
16 |    contributors may be used to endorse or promote products derived from
17 |    this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | .DS_STORE
163 | 
164 | ssh.sh


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RAGnarok
 2 | 
 3 | RAGnarok is a [Retrieval-Augmented Generation](https://blogs.nvidia.com/blog/what-is-retrieval-augmented-generation/) chatbot frontend for [Nemesis](https://github.com/SpecterOps/Nemesis). It allows you to ask questions about text extracted from compatible documents processed by Nemesis.
 4 | 
 5 | ## RAG
 6 | 
 7 | **Short explanation:** The general idea with Retrieval-Augmented Generation (RAG) is to allow a large language model (LLM) to answer questions about documents you've indexed.
 8 | 
 9 | **Medium explanation:** RAG involves processing and turning text inputs into set-length vectors via an embedding model, which are then stored in a backend vector database. Questions to the LLM are then used to look up the "most similiar" chunks of text which are then fed into the context prompt for a LLM.
10 | 
11 | ![](https://miro.medium.com/v2/resize:fit:1400/format:webp/1*kSkeaXRvRzbJ9SrFZaMoOg.png)
12 | [*Source*](https://towardsdatascience.com/retrieval-augmented-generation-rag-from-theory-to-langchain-implementation-4e9bd5f6a4f2)
13 | 
14 | ***Longer explanation in the rest of the section :)***
15 | 
16 | ***Even Longer explanation in [this blog post](https://posts.specterops.io/summoning-ragnarok-with-your-nemesis-7c4f0577c93b).***
17 | 
18 | #### Indexing
19 | 
20 | Retrieval-augumented generation is an architecture where documents being processed undergo the following process:
21 | 
22 | 1. Plaintext is extracted from any incoming documents.
23 |    - Nemesis uses [Apache Tika](https://tika.apache.org/) to extract text from compatible documents.
24 | 2. The text is tokenized into chunks of up to X tokens, where X depends on the *context window* of the embedding model used.
25 |    - Nemesis uses Langchain's [TokenTextSplitter](https://api.python.langchain.com/en/latest/text_splitter/langchain.text_splitter.TokenTextSplitter.html), a chunk size of 510 tokens, and a 15% overlap between chunks.
26 | 3. Each chunk of text is processed by an [embedding model](https://huggingface.co/spaces/mteb/leaderboard) which turns the input text into a fixed-length vector of floats.
27 |    - As [Pinecone explains](https://www.pinecone.io/learn/vector-embeddings/), what's cool about embedding models is that the vector representations they produce preserve "semantic similiarity", meaning that more similiar chunks of text will have more similiar vectors.
28 |    - Nemesis currently uses the [TaylorAI/gte-tiny](https://huggingface.co/TaylorAI/gte-tiny) embedding model as it's fast, but others are possible.
29 | 4. Each vector and associated snippet of text is stored in a vector database.
30 |    - Nemesis uses Elasticsearch for vector storage. 
31 | 
32 | #### Semantic Search
33 | 
34 | This is the initial indexing process that Nemesis has been performing for a while. However, in order to complete a RAG-pipeline, the next steps are:
35 | 
36 | 5. Take an input prompt, such as "*What is a certificate?*" and run it through the same embedding model files were indexed with.
37 | 6. Query the vector database (e.g., Elasticsearch) for the nearest **k** vectors + associated text chunks that are "closest" to the prompt input vector.
38 |    - This will return the **k** chunks of text that are the most similiar to the input query.
39 | 7. We also use Elasticsearch's traditional(-ish) BM25 text search over the text for each chunk.
40 |    - These two lists of results are combined with [Reciprocal Rank Fusion](https://learn.microsoft.com/en-us/azure/search/hybrid-search-ranking), and the top results from the fused list are returned.
41 |    - **Note:** steps 6 and 7 happen in the `nlp` container in Nemesis. This is exposed at http://\<nemesis\>/nlp/hybrid_search 
42 | 
43 | #### Reranking
44 | 
45 | We now have the **k** most chunks of text most simliar to our input query. If we want to get a bit facier, we can execute what's called [reranking](https://www.pinecone.io/learn/series/rag/rerankers/).
46 | 
47 | 7. With reranking, the the prompt question and text results are paired up (question, text) and fed into a more powerful model (well, more powerful than the embedding model) tuned and known as a reranker. The reranker generates a simliarity score of the input prompt and text chunk.
48 |    - RAGnarok uses an adapted version of [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base) for reranking.
49 | 8. The results are then **reranked** and the top X number of results are selected.
50 | 
51 | #### LLM Processing
52 | 
53 | 9. Finally, the resulting texts are combined with a prompt to the (local) LLM. Think something along the lines of "Given these chunks of text {X}, answer this question {Y}".
54 | 


--------------------------------------------------------------------------------
/ragnarok/pages/2_Free_Chat.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | # from transformers import AutoModelForSequenceClassification, AutoTokenizer
  3 | from huggingface_hub import hf_hub_download
  4 | from llama_cpp import Llama
  5 | from streamlit_cookies_manager import CookieManager
  6 | import time
  7 | import torch
  8 | import asyncio
  9 | from ollama import AsyncClient
 10 | 
 11 | 
 12 | cookies = CookieManager()
 13 | while not cookies.ready():
 14 |     time.sleep(1)
 15 | 
 16 | if "mode" not in cookies:
 17 |     st.error("Your cookies are broken, please go back to the main settings page.")
 18 |     st.stop()
 19 | 
 20 | if "mode_index" not in st.session_state:
 21 |     st.session_state["mode_index"] = 0  # Default to the first mode
 22 | 
 23 | def get_mode_index(cookies):
 24 |     if "mode" in cookies:
 25 |         if cookies["mode"] == "Local LLM":
 26 |             return 0
 27 |         elif cookies["mode"] == "Remote Ollama Server (Local Reranker)":
 28 |             return 1
 29 |         elif cookies["mode"] == "Remote Ollama Server (No Reranker)":
 30 |             return 2
 31 |     return 0  # Default to 0 if no mode is set
 32 | 
 33 | @st.cache_resource
 34 | def get_ollama_client(ollama_url):
 35 |     client = AsyncClient(ollama_url)
 36 |     return client
 37 | 
 38 | mode_index = get_mode_index(cookies)
 39 | 
 40 | if mode_index == 0:
 41 |     if "llm_model" not in cookies and "ollama_model" not in cookies:
 42 |         st.error("Please select a LLM model on the main settings page.")
 43 |         st.stop()
 44 |     if "llm_temperature" not in cookies:
 45 |         st.error("Please select a LLM model temperature on the main settings page.")
 46 |         st.stop()
 47 | 
 48 | if mode_index == 1 or mode_index == 2:
 49 |     if "ollama_url" not in cookies:
 50 |         st.error("Please select an Ollama server on the main settings page.")
 51 |         st.stop()
 52 |     if "ollama_model" not in cookies:
 53 |         st.error("Please select an Ollama model on the main settings page.")
 54 |         st.stop()
 55 | 
 56 | if mode_index == 1 or mode_index == 2:
 57 |     if "ollama_url" not in cookies:
 58 |         st.error("Please select an Ollama server on the main settings page.")
 59 |         st.stop()
 60 |     if "ollama_model" not in cookies:
 61 |         st.error("Please select an Ollama model on the main settings page.")
 62 |         st.stop()
 63 |     if cookies["ollama_model"] == "":
 64 |         st.error("Please select an Ollama model on the main settings page.")
 65 |         st.stop()
 66 | 
 67 | 
 68 | if mode_index == 0:
 69 |     llm_generation_kwargs = {
 70 |         "max_tokens": 512,
 71 |         "stream": True, 
 72 |         "temperature": float(cookies["llm_temperature"]),
 73 |         "echo": False
 74 |     }
 75 | if mode_index == 0 or mode_index == 1:
 76 |     # check for GPU presence
 77 |     if torch.cuda.is_available():
 78 |         # traditional Nvidia cuda GPUs
 79 |         device = torch.device("cuda:0")
 80 |         n_gpu_layers = int(cookies["n_gpu_layers"])
 81 |     elif torch.backends.mps.is_available():
 82 |         # for macOS M1/M2s
 83 |         device = torch.device("mps")
 84 |         n_gpu_layers = int(cookies["n_gpu_layers"])
 85 |     else:
 86 |         device = torch.device("cpu")
 87 |         n_gpu_layers = 0
 88 | 
 89 | @st.cache_resource
 90 | def get_llm(llm_model_path, n_gpu_layers):
 91 |     llm = Llama(
 92 |         model_path=llm_model_path,
 93 |         n_ctx=8192,
 94 |         n_gpu_layers=n_gpu_layers,
 95 |         verbose=False
 96 |     )
 97 |     return llm
 98 | if mode_index == 0:
 99 |     try:
100 |         if cookies["llm_model"] == "Intel/neural-chat-7b-v3-3":
101 |             llm_model_path = hf_hub_download("TheBloke/neural-chat-7B-v3-3-GGUF", filename="neural-chat-7b-v3-3.Q5_K_M.gguf", local_files_only=True)
102 |         elif cookies["llm_model"] == "openchat-3.5-0106":
103 |             llm_model_path = hf_hub_download("TheBloke/openchat-3.5-0106-GGUF", filename="openchat-3.5-0106.Q5_K_M.gguf", local_files_only=True)
104 |         elif cookies["llm_model"] == "Starling-LM-7B-alpha":
105 |             llm_model_path = hf_hub_download("TheBloke/Starling-LM-7B-alpha-GGUF", filename="starling-lm-7b-alpha.Q5_K_M.gguf", local_files_only=True)
106 |         else:
107 |             llm_model = cookies["llm_model"]
108 |             st.error(f"Invalid llm_model: {llm_model}")
109 |     except:
110 |         with st.spinner("Downloading LLM model (this will take some time)..."):
111 |             if cookies["llm_model"] == "Intel/neural-chat-7b-v3-3":
112 |                 llm_model_path = hf_hub_download("TheBloke/neural-chat-7B-v3-3-GGUF", filename="neural-chat-7b-v3-3.Q5_K_M.gguf")
113 |             elif cookies["llm_model"] == "openchat-3.5-0106":
114 |                 llm_model_path = hf_hub_download("TheBloke/openchat-3.5-0106-GGUF", filename="openchat-3.5-0106.Q5_K_M.gguf")
115 |             elif cookies["llm_model"] == "Starling-LM-7B-alpha":
116 |                 llm_model_path = hf_hub_download("TheBloke/Starling-LM-7B-alpha-GGUF", filename="starling-lm-7b-alpha.Q5_K_M.gguf")
117 |             else:
118 |                 llm_model = cookies["llm_model"]
119 |                 st.error(f"Invalid llm_model_path: {llm_model}")
120 | 
121 |     llm = get_llm(llm_model_path, n_gpu_layers)
122 | 
123 | st.title("Free Chat With Selected Model")
124 | st.warning('*WARNING: results not guaranteed to be correct!*', icon="⚠️")
125 | 
126 | if "freeform_messages" not in st.session_state:
127 |     st.session_state.freeform_messages = []
128 | 
129 | for message in st.session_state.freeform_messages:
130 |     with st.chat_message(message["role"]):
131 |         st.markdown(message["content"])
132 | 
133 | if prompt := st.chat_input("<enter a question>"):
134 |     st.session_state.freeform_messages.append({"role": "user", "content": prompt})
135 |     with st.chat_message("user"):
136 |         st.markdown(prompt)
137 | 
138 |     with st.chat_message("assistant"):
139 |         message_placeholder = st.empty()
140 |         full_response = ""
141 | 
142 |         if "neural-chat" in cookies["llm_model"]:
143 |             single_turn_prompt = f"### System:\nYou are a helpful assistant chatbot.\n### User:\n{prompt}\n### Assistant:\n"
144 |         else:
145 |             single_turn_prompt = f"GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:"
146 | 
147 |         with st.spinner("LLM is processing the prompt..."):
148 |             start = time.time()
149 |             if mode_index == 0:
150 |                 stream = llm.create_completion(single_turn_prompt, **llm_generation_kwargs)
151 |                 for output in stream:
152 |                     full_response += (output['choices'][0]['text'] or "").split("### Assistant:\n")[-1]
153 |                     message_placeholder.markdown(full_response + "▌")
154 |             elif mode_index == 1 or mode_index == 2:
155 |                 client = get_ollama_client(cookies["ollama_url"])
156 |                 async def chat():
157 |                     full_response = ""
158 |                     # TODO: remove hardcoded model
159 |                     message = {'role': 'user', 'content': single_turn_prompt}
160 |                     async for part in await AsyncClient().chat(model='dolphin3:8b', messages=[message], stream=True):
161 |                         print(part['message']['content'], end='', flush=True)
162 |                         full_response += part['message']['content']
163 |                         message_placeholder.markdown(full_response + "▌")
164 |                     return full_response
165 |                 
166 |                 full_response = asyncio.run(chat())
167 | 
168 | 
169 |             end = time.time()
170 |             print(f"LLM generation completed in {(end - start):.2f} seconds")
171 | 
172 |     st.session_state.freeform_messages.append({"role": "assistant", "content": f"{full_response}"})
173 | 


--------------------------------------------------------------------------------
/ragnarok/requirements.txt:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is autogenerated by pip-compile with Python 3.13
  3 | # by the following command:
  4 | #
  5 | #    pip-compile
  6 | #
  7 | aiohappyeyeballs==2.6.1
  8 |     # via aiohttp
  9 | aiohttp==3.12.14
 10 |     # via langchain-community
 11 | aiosignal==1.4.0
 12 |     # via aiohttp
 13 | altair==5.5.0
 14 |     # via
 15 |     #   altex
 16 |     #   streamlit
 17 | altex==0.2.0
 18 |     # via
 19 |     #   streamlit-extras
 20 |     #   streamlit-faker
 21 | annotated-types==0.7.0
 22 |     # via pydantic
 23 | anyio==4.9.0
 24 |     # via httpx
 25 | attrs==25.3.0
 26 |     # via
 27 |     #   aiohttp
 28 |     #   jsonschema
 29 |     #   referencing
 30 | beautifulsoup4==4.13.4
 31 |     # via favicon
 32 | blinker==1.9.0
 33 |     # via streamlit
 34 | cachetools==6.1.0
 35 |     # via streamlit
 36 | certifi==2025.7.14
 37 |     # via
 38 |     #   httpcore
 39 |     #   httpx
 40 |     #   requests
 41 | cffi==1.17.1
 42 |     # via cryptography
 43 | charset-normalizer==3.4.2
 44 |     # via requests
 45 | click==8.2.1
 46 |     # via streamlit
 47 | contourpy==1.3.2
 48 |     # via matplotlib
 49 | cryptography==45.0.5
 50 |     # via streamlit-cookies-manager
 51 | cycler==0.12.1
 52 |     # via matplotlib
 53 | dataclasses-json==0.6.7
 54 |     # via langchain-community
 55 | diskcache==5.6.3
 56 |     # via llama-cpp-python
 57 | entrypoints==0.4
 58 |     # via streamlit-extras
 59 | faker==37.4.2
 60 |     # via streamlit-faker
 61 | favicon==0.7.0
 62 |     # via markdownlit
 63 | filelock==3.18.0
 64 |     # via
 65 |     #   huggingface-hub
 66 |     #   torch
 67 |     #   transformers
 68 | fonttools==4.59.0
 69 |     # via matplotlib
 70 | frozenlist==1.7.0
 71 |     # via
 72 |     #   aiohttp
 73 |     #   aiosignal
 74 | fsspec==2025.7.0
 75 |     # via
 76 |     #   huggingface-hub
 77 |     #   torch
 78 | gitdb==4.0.12
 79 |     # via gitpython
 80 | gitpython==3.1.44
 81 |     # via streamlit
 82 | greenlet==3.2.3
 83 |     # via sqlalchemy
 84 | h11==0.16.0
 85 |     # via httpcore
 86 | hf-xet==1.1.5
 87 |     # via huggingface-hub
 88 | htbuilder==0.9.0
 89 |     # via
 90 |     #   markdownlit
 91 |     #   st-annotated-text
 92 |     #   streamlit-extras
 93 | httpcore==1.0.9
 94 |     # via httpx
 95 | httpx==0.28.1
 96 |     # via
 97 |     #   langsmith
 98 |     #   ollama
 99 | httpx-sse==0.4.1
100 |     # via langchain-community
101 | huggingface-hub==0.33.4
102 |     # via
103 |     #   -r requirements.in
104 |     #   sentence-transformers
105 |     #   tokenizers
106 |     #   transformers
107 | idna==3.10
108 |     # via
109 |     #   anyio
110 |     #   httpx
111 |     #   requests
112 |     #   yarl
113 | jinja2==3.1.6
114 |     # via
115 |     #   altair
116 |     #   llama-cpp-python
117 |     #   pydeck
118 |     #   streamlit-camera-input-live
119 |     #   streamlit-image-coordinates
120 |     #   streamlit-keyup
121 |     #   torch
122 | joblib==1.5.1
123 |     # via scikit-learn
124 | jsonpatch==1.33
125 |     # via langchain-core
126 | jsonpointer==3.0.0
127 |     # via jsonpatch
128 | jsonschema==4.25.0
129 |     # via altair
130 | jsonschema-specifications==2025.4.1
131 |     # via jsonschema
132 | kiwisolver==1.4.8
133 |     # via matplotlib
134 | langchain==0.3.26
135 |     # via
136 |     #   -r requirements.in
137 |     #   langchain-community
138 | langchain-community==0.3.27
139 |     # via -r requirements.in
140 | langchain-core==0.3.71
141 |     # via
142 |     #   langchain
143 |     #   langchain-community
144 |     #   langchain-text-splitters
145 | langchain-text-splitters==0.3.8
146 |     # via langchain
147 | langsmith==0.4.8
148 |     # via
149 |     #   langchain
150 |     #   langchain-community
151 |     #   langchain-core
152 | llama-cpp-python==0.3.14
153 |     # via -r requirements.in
154 | lxml==6.0.0
155 |     # via markdownlit
156 | markdown==3.8.2
157 |     # via
158 |     #   markdownlit
159 |     #   pymdown-extensions
160 | markdownlit==0.0.7
161 |     # via streamlit-extras
162 | markupsafe==3.0.2
163 |     # via jinja2
164 | marshmallow==3.26.1
165 |     # via dataclasses-json
166 | matplotlib==3.10.3
167 |     # via streamlit-faker
168 | mpmath==1.3.0
169 |     # via sympy
170 | multidict==6.6.3
171 |     # via
172 |     #   aiohttp
173 |     #   yarl
174 | mypy-extensions==1.1.0
175 |     # via typing-inspect
176 | narwhals==1.48.0
177 |     # via
178 |     #   altair
179 |     #   plotly
180 | networkx==3.5
181 |     # via torch
182 | numpy==2.3.1
183 |     # via
184 |     #   contourpy
185 |     #   langchain-community
186 |     #   llama-cpp-python
187 |     #   matplotlib
188 |     #   pandas
189 |     #   pydeck
190 |     #   scikit-learn
191 |     #   scipy
192 |     #   streamlit
193 |     #   transformers
194 | nvidia-cublas-cu12==12.6.4.1
195 |     # via
196 |     #   nvidia-cudnn-cu12
197 |     #   nvidia-cusolver-cu12
198 |     #   torch
199 | nvidia-cuda-cupti-cu12==12.6.80
200 |     # via torch
201 | nvidia-cuda-nvrtc-cu12==12.6.77
202 |     # via torch
203 | nvidia-cuda-runtime-cu12==12.6.77
204 |     # via torch
205 | nvidia-cudnn-cu12==9.5.1.17
206 |     # via torch
207 | nvidia-cufft-cu12==11.3.0.4
208 |     # via torch
209 | nvidia-cufile-cu12==1.11.1.6
210 |     # via torch
211 | nvidia-curand-cu12==10.3.7.77
212 |     # via torch
213 | nvidia-cusolver-cu12==11.7.1.2
214 |     # via torch
215 | nvidia-cusparse-cu12==12.5.4.2
216 |     # via
217 |     #   nvidia-cusolver-cu12
218 |     #   torch
219 | nvidia-cusparselt-cu12==0.6.3
220 |     # via torch
221 | nvidia-nccl-cu12==2.26.2
222 |     # via torch
223 | nvidia-nvjitlink-cu12==12.6.85
224 |     # via
225 |     #   nvidia-cufft-cu12
226 |     #   nvidia-cusolver-cu12
227 |     #   nvidia-cusparse-cu12
228 |     #   torch
229 | nvidia-nvtx-cu12==12.6.77
230 |     # via torch
231 | ollama==0.5.1
232 |     # via -r requirements.in
233 | orjson==3.11.0
234 |     # via langsmith
235 | packaging==25.0
236 |     # via
237 |     #   altair
238 |     #   huggingface-hub
239 |     #   langchain-core
240 |     #   langsmith
241 |     #   marshmallow
242 |     #   matplotlib
243 |     #   plotly
244 |     #   streamlit
245 |     #   transformers
246 | pandas==2.3.1
247 |     # via
248 |     #   altex
249 |     #   streamlit
250 | pillow==11.3.0
251 |     # via
252 |     #   matplotlib
253 |     #   sentence-transformers
254 |     #   streamlit
255 | plotly==6.2.0
256 |     # via streamlit-extras
257 | prometheus-client==0.22.1
258 |     # via streamlit-extras
259 | propcache==0.3.2
260 |     # via
261 |     #   aiohttp
262 |     #   yarl
263 | protobuf==6.31.1
264 |     # via
265 |     #   streamlit
266 |     #   streamlit-extras
267 | pyarrow==21.0.0
268 |     # via streamlit
269 | pycparser==2.22
270 |     # via cffi
271 | pydantic==2.11.7
272 |     # via
273 |     #   langchain
274 |     #   langchain-core
275 |     #   langsmith
276 |     #   ollama
277 |     #   pydantic-settings
278 | pydantic-core==2.33.2
279 |     # via pydantic
280 | pydantic-settings==2.10.1
281 |     # via langchain-community
282 | pydeck==0.9.1
283 |     # via streamlit
284 | pymdown-extensions==10.16
285 |     # via markdownlit
286 | pyparsing==3.2.3
287 |     # via matplotlib
288 | python-dateutil==2.9.0.post0
289 |     # via
290 |     #   matplotlib
291 |     #   pandas
292 | python-dotenv==1.1.1
293 |     # via pydantic-settings
294 | pytz==2025.2
295 |     # via pandas
296 | pyyaml==6.0.2
297 |     # via
298 |     #   huggingface-hub
299 |     #   langchain
300 |     #   langchain-community
301 |     #   langchain-core
302 |     #   pymdown-extensions
303 |     #   transformers
304 | referencing==0.36.2
305 |     # via
306 |     #   jsonschema
307 |     #   jsonschema-specifications
308 | regex==2024.11.6
309 |     # via transformers
310 | requests==2.32.4
311 |     # via
312 |     #   favicon
313 |     #   huggingface-hub
314 |     #   langchain
315 |     #   langchain-community
316 |     #   langsmith
317 |     #   requests-toolbelt
318 |     #   streamlit
319 |     #   transformers
320 | requests-toolbelt==1.0.0
321 |     # via langsmith
322 | rpds-py==0.26.0
323 |     # via
324 |     #   jsonschema
325 |     #   referencing
326 | safetensors==0.5.3
327 |     # via transformers
328 | scikit-learn==1.7.1
329 |     # via sentence-transformers
330 | scipy==1.16.0
331 |     # via
332 |     #   scikit-learn
333 |     #   sentence-transformers
334 | sentence-transformers==5.0.0
335 |     # via -r requirements.in
336 | six==1.17.0
337 |     # via python-dateutil
338 | smmap==5.0.2
339 |     # via gitdb
340 | sniffio==1.3.1
341 |     # via anyio
342 | soupsieve==2.7
343 |     # via beautifulsoup4
344 | sqlalchemy==2.0.41
345 |     # via
346 |     #   langchain
347 |     #   langchain-community
348 | st-annotated-text==4.0.2
349 |     # via streamlit-extras
350 | st-theme==1.2.3
351 |     # via streamlit-extras
352 | streamlit==1.47.0
353 |     # via
354 |     #   -r requirements.in
355 |     #   altex
356 |     #   markdownlit
357 |     #   st-theme
358 |     #   streamlit-avatar
359 |     #   streamlit-camera-input-live
360 |     #   streamlit-card
361 |     #   streamlit-cookies-manager
362 |     #   streamlit-embedcode
363 |     #   streamlit-extras
364 |     #   streamlit-faker
365 |     #   streamlit-image-coordinates
366 |     #   streamlit-keyup
367 |     #   streamlit-toggle-switch
368 |     #   streamlit-vertical-slider
369 | streamlit-avatar==0.1.3
370 |     # via streamlit-extras
371 | streamlit-camera-input-live==0.2.0
372 |     # via streamlit-extras
373 | streamlit-card==1.0.2
374 |     # via streamlit-extras
375 | streamlit-cookies-manager==0.2.0
376 |     # via -r requirements.in
377 | streamlit-embedcode==0.1.2
378 |     # via streamlit-extras
379 | streamlit-extras==0.6.0
380 |     # via
381 |     #   -r requirements.in
382 |     #   markdownlit
383 | streamlit-faker==0.0.4
384 |     # via streamlit-extras
385 | streamlit-image-coordinates==0.1.9
386 |     # via streamlit-extras
387 | streamlit-keyup==0.3.0
388 |     # via streamlit-extras
389 | streamlit-toggle-switch==1.0.2
390 |     # via streamlit-extras
391 | streamlit-vertical-slider==2.5.5
392 |     # via streamlit-extras
393 | sympy==1.14.0
394 |     # via torch
395 | tenacity==9.1.2
396 |     # via
397 |     #   langchain-community
398 |     #   langchain-core
399 |     #   streamlit
400 | threadpoolctl==3.6.0
401 |     # via scikit-learn
402 | tokenizers==0.21.2
403 |     # via transformers
404 | toml==0.10.2
405 |     # via streamlit
406 | torch==2.7.1
407 |     # via sentence-transformers
408 | tornado==6.5.1
409 |     # via streamlit
410 | tqdm==4.67.1
411 |     # via
412 |     #   huggingface-hub
413 |     #   sentence-transformers
414 |     #   transformers
415 | transformers==4.53.3
416 |     # via
417 |     #   -r requirements.in
418 |     #   sentence-transformers
419 | triton==3.3.1
420 |     # via torch
421 | typing-extensions==4.14.1
422 |     # via
423 |     #   altair
424 |     #   beautifulsoup4
425 |     #   huggingface-hub
426 |     #   langchain-core
427 |     #   llama-cpp-python
428 |     #   pydantic
429 |     #   pydantic-core
430 |     #   sentence-transformers
431 |     #   sqlalchemy
432 |     #   streamlit
433 |     #   torch
434 |     #   typing-inspect
435 |     #   typing-inspection
436 | typing-inspect==0.9.0
437 |     # via dataclasses-json
438 | typing-inspection==0.4.1
439 |     # via
440 |     #   pydantic
441 |     #   pydantic-settings
442 | tzdata==2025.2
443 |     # via
444 |     #   faker
445 |     #   pandas
446 | urllib3==2.5.0
447 |     # via requests
448 | validators==0.35.0
449 |     # via streamlit-extras
450 | watchdog==6.0.0
451 |     # via streamlit
452 | yarl==1.20.1
453 |     # via aiohttp
454 | zstandard==0.23.0
455 |     # via langsmith
456 | 
457 | # The following packages are considered to be unsafe in a requirements file:
458 | # setuptools
459 | 


--------------------------------------------------------------------------------
/ragnarok/RAGnarok_Settings.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | from requests.auth import HTTPBasicAuth
  3 | import time
  4 | # from langchain_community.embeddings import HuggingFaceEmbeddings
  5 | # from transformers import AutoModelForSequenceClassification, AutoTokenizer
  6 | import streamlit as st
  7 | from streamlit_cookies_manager import CookieManager
  8 | import torch
  9 | import urllib3
 10 | import urllib
 11 | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 12 | torch.classes.__path__ = []
 13 | 
 14 | cookies = CookieManager()
 15 | while not cookies.ready():
 16 |     time.sleep(1)
 17 | 
 18 | if torch.cuda.is_available():
 19 |     device = "cuda"
 20 | elif torch.backends.mps.is_available():
 21 |     device = "mps"
 22 | else:
 23 |     device = "cpu"
 24 | 
 25 | columns = st.columns(2)
 26 | with columns[0]:
 27 |     st.image("./img/ragnarok.png")
 28 | st.markdown("#### *[RAGnarok](https://github.com/GhostPack/RAGnarok) is a Retrieval-Augmented Generation LLM ChatBot powered by a Nemesis instance.*")
 29 | st.info("**NOTE**: RAGnarok does not have conversational memory, each question is independent.")
 30 | st.divider()
 31 | 
 32 | 
 33 | def wait_for_nemesis(nemesis_url, nemesis_user, nemesis_password, wait_timeout = 3):
 34 |     retries = 3
 35 |     success = False
 36 |     while retries > 0:
 37 |         try:
 38 |             if nemesis_url.startswith("https://"):
 39 |                 result = requests.get(
 40 |                     f"{nemesis_url}nlp/ready",
 41 |                     auth=HTTPBasicAuth(nemesis_user, nemesis_password),
 42 |                     timeout=wait_timeout,
 43 |                     verify=False
 44 |                 )
 45 |             else:
 46 |                 result = requests.get(
 47 |                     f"{nemesis_url}nlp/ready",
 48 |                     auth=HTTPBasicAuth(nemesis_user, nemesis_password),
 49 |                     timeout=wait_timeout
 50 |                 )
 51 |             if result.status_code == 401:
 52 |                 st.error(f"Invalid Nemesis credentials!")
 53 |             elif result.status_code != 200:
 54 |                 st.warning(f"Error connecting to Nemesis instance {nemesis_url}: '{result.status_code}'")
 55 |             else:
 56 |                 success = True
 57 |                 retries = 0
 58 |                 st.success("Successfully connected to Nemesis instance!")
 59 |             break
 60 |         except Exception as e:
 61 |             st.warning(f"Encountered an exception while trying to connect to Nemesis instance {nemesis_url}: '{e}', trying again in {wait_timeout} seconds...")
 62 |             time.sleep(wait_timeout)
 63 |             retries = retries - 1
 64 |             continue
 65 |     if not success:
 66 |         st.error(f"Error connecting to {nemesis_url}, please check your connection/credentials!")
 67 | 
 68 | default_value = cookies["nemesis_url"] if "nemesis_url" in cookies else ""
 69 | nemesis_url = st.text_input(
 70 |     label="Nemesis URL",
 71 |     help="The Nemesis endpoint",
 72 |     value=default_value
 73 | )
 74 | cols = st.columns(2)
 75 | with cols[0]:
 76 |     default_value = cookies["nemesis_username"] if "nemesis_username" in cookies else ""
 77 |     nemesis_username = st.text_input(
 78 |         label="Nemesis Username",
 79 |         help="This is the `basic_auth_user` value in nemesis.config.",
 80 |         value=default_value
 81 |     )
 82 | with cols[1]:
 83 |     default_value = cookies["nemesis_password"] if "nemesis_password" in cookies else ""
 84 |     nemesis_password = st.text_input(
 85 |         label="Nemesis Password",
 86 |         help="This is the `basic_auth_password` value in nemesis.config.",
 87 |         value=default_value
 88 |     )
 89 | if nemesis_url and nemesis_username and nemesis_password:
 90 |     if not nemesis_url.endswith("/"):
 91 |         nemesis_url = f"{nemesis_url}/"
 92 |     wait_for_nemesis(nemesis_url, nemesis_username, nemesis_password)
 93 |     cookies["nemesis_url"] = nemesis_url
 94 |     cookies["nemesis_username"] = nemesis_username
 95 |     cookies["nemesis_password"] = nemesis_password
 96 |     st.divider()
 97 | 
 98 | if "mode" not in cookies:
 99 |     cookies["mode"] = "Local LLM"  # Default mode
100 |     
101 | if "mode_index" not in st.session_state:
102 |     st.session_state["mode_index"] = 0  # Default to the first mode
103 | def get_mode_index(cookies):
104 |     if "mode" in cookies:
105 |         if cookies["mode"] == "Local LLM":
106 |             return 0
107 |         elif cookies["mode"] == "Remote Ollama Server (Local Reranker)":
108 |             return 1
109 |         elif cookies["mode"] == "Remote Ollama Server (No Reranker)":
110 |             return 2
111 |     return 0  # Default to 0 if no mode is set
112 | 
113 | def on_change_mode_selectbox():
114 |     # Update cookies and session state
115 |     cookies["mode"] = st.session_state["selected_mode"]
116 |     st.session_state["mode_index"] = get_mode_index(cookies)
117 |     print(f"Mode changed to: {cookies['mode']} (index: {st.session_state['mode_index']})")
118 | 
119 | # Selectbox for mode selection
120 | mode = st.selectbox(
121 |     label='Ragnarok Mode',
122 |     options=('Local LLM', 'Remote Ollama Server (Local Reranker)', 'Remote Ollama Server (No Reranker)'),
123 |     help="The mode you want to run RAGnarok in.",
124 |     index=st.session_state["mode_index"],  # Use session state for the index
125 |     on_change=on_change_mode_selectbox,
126 |     key="selected_mode"  # Bind to session state
127 | )
128 | # Update mode_index based on session state
129 | mode_index = st.session_state["mode_index"]
130 | 
131 | if mode_index == 0:
132 |     cols = st.columns(2)
133 |     with cols[0]:
134 |         default_index = 0
135 |         if "llm_model" in cookies:
136 |             if "neural-chat" in cookies["llm_model"].lower():
137 |                 default_index = 0
138 |             elif "openchat" in cookies["llm_model"].lower():
139 |                 default_index = 1
140 |             elif "starling" in cookies["llm_model"].lower():
141 |                 default_index = 2
142 |         llm_model = st.selectbox(
143 |             label='LLM model to use',
144 |             options=('Intel/neural-chat-7b-v3-3', 'openchat-3.5-0106', 'Starling-LM-7B-alpha'),
145 |             help="The core LLM to use for chat over retrieved document snippets.",
146 |             index=default_index
147 |         )
148 |         cookies["llm_model"] = llm_model
149 |     with cols[1]:
150 |         llm_temperature_default_value = float(cookies["llm_temperature"]) if "llm_temperature" in cookies else 0.1
151 |         llm_temperature = st.slider(
152 |             label="LLM Temperature",
153 |             min_value=0.0,
154 |             max_value=1.0,
155 |             value=llm_temperature_default_value,
156 |             help="The temperate for the core LLM. Higher means more 'creative', lower means more repeatable."
157 |         )
158 |         cookies["llm_temperature"] = llm_temperature
159 |     cols = st.columns(2)
160 |     with cols[0]:
161 |         default_index = 0
162 |         if "reranking_model" in cookies:
163 |             if cookies["reranking_model"] == "Harmj0y/nemesis-reranker":
164 |                 default_index = 0
165 |             elif cookies["reranking_model"] == "BAAI/bge-reranker-base":
166 |                 default_index = 1
167 |         reranking_model = st.selectbox(
168 |             label='Reranking model to use',
169 |             options=('nemesis-reranker', 'bge-reranker-base',),
170 |             help="Model to use to rerank results before sending to the LLM.",
171 |             index=default_index
172 |         )
173 |         if reranking_model == "nemesis-reranker":
174 |             reranking_model = "Harmj0y/nemesis-reranker"
175 |         elif reranking_model == "bge-reranker-base":
176 |             reranking_model = "BAAI/bge-reranker-base"
177 |         cookies["reranking_model"] = reranking_model
178 |     with cols[1]:
179 |         if device == "cuda" or device == "mps":
180 |             default_n_gpu_layers = int(cookies["n_gpu_layers"]) if "n_gpu_layers" in cookies else 4
181 |             n_gpu_layers = st.slider(
182 |                 label="Number of GPU layers to offload to the GPU", 
183 |                 min_value=1,
184 |                 max_value=32,
185 |                 value=default_n_gpu_layers,
186 |                 help="Number of GPU layers to offload to the GPU. More _usually_ means faster generation, but may cause out-of-memory errors."
187 |             )
188 |             cookies["n_gpu_layers"] = n_gpu_layers
189 | 
190 | elif mode_index == 1:
191 |     cols = st.columns(1)
192 |     with cols[0]:
193 |         ollama_url_default_value = cookies["ollama_url"] if "ollama_url" in cookies else "http://localhost:11434"
194 |         ollama_url = st.text_input(
195 |             label="Ollama Server URL",
196 |             help="Ollama Server URL (e.g. http://localhost:11434)",
197 |             value=ollama_url_default_value
198 |         )
199 |         cookies["ollama_url"] = ollama_url
200 | 
201 |     cols = st.columns(1)
202 |     with cols[0]:
203 |         ollama_model_default_value = cookies["ollama_model"] if "ollama_model" in cookies else ""
204 |         ollama_model = st.text_input(
205 |             label="Ollama Model",
206 |             help="The LLM you will use from Ollama (e.g. llama3.3:70b)",
207 |             value=ollama_model_default_value
208 |         )
209 |         cookies["ollama_model"] = ollama_model
210 | 
211 | 
212 |     cols = st.columns(2)
213 |     with cols[0]:
214 |         default_index = 0
215 |         if "reranking_model" in cookies:
216 |             if cookies["reranking_model"] == "Harmj0y/nemesis-reranker":
217 |                 default_index = 0
218 |             elif cookies["reranking_model"] == "BAAI/bge-reranker-base":
219 |                 default_index = 1
220 |         reranking_model = st.selectbox(
221 |             label='Reranking model to use',
222 |             options=('nemesis-reranker', 'bge-reranker-base',),
223 |             help="Model to use to rerank results before sending to the LLM.",
224 |             index=default_index
225 |         )
226 |         if reranking_model == "nemesis-reranker":
227 |             reranking_model = "Harmj0y/nemesis-reranker"
228 |         elif reranking_model == "bge-reranker-base":
229 |             reranking_model = "BAAI/bge-reranker-base"
230 |         cookies["reranking_model"] = reranking_model
231 |     with cols[1]:
232 |         if device == "cuda" or device == "mps":
233 |             default_n_gpu_layers = int(cookies["n_gpu_layers"]) if "n_gpu_layers" in cookies else 4
234 |             n_gpu_layers = st.slider(
235 |                 label="Number of GPU layers to offload to the GPU", 
236 |                 min_value=1,
237 |                 max_value=32,
238 |                 value=default_n_gpu_layers,
239 |                 help="Number of GPU layers to offload to the GPU. More _usually_ means faster generation, but may cause out-of-memory errors."
240 |             )
241 |             cookies["n_gpu_layers"] = n_gpu_layers
242 | 
243 | elif mode_index == 2:
244 |     cols = st.columns(1)
245 |     with cols[0]:
246 |         ollama_url_default_value = cookies["ollama_url"] if "ollama_url" in cookies else "http://localhost:11434"
247 |         ollama_url = st.text_input(
248 |             label="Ollama Server URL",
249 |             help="Ollama Server URL (e.g. http://localhost:11434)",
250 |             value=ollama_url_default_value
251 |         )
252 |         cookies["ollama_url"] = ollama_url
253 |     cols = st.columns(1)
254 |     with cols[0]:
255 |         ollama_model_default_value = cookies["ollama_model"] if "ollama_model" in cookies else ""
256 |         ollama_model = st.text_input(
257 |             label="Ollama Model",
258 |             help="The LLM you will use from Ollama (e.g. llama3.3:70b)",
259 |             value=ollama_model_default_value
260 |         )
261 |         cookies["ollama_model"] = ollama_model
262 | 
263 | 
264 | cols = st.columns(1)
265 | with cols[0]:
266 |     k_similarity_default_value = int(cookies["k_similarity"]) if "k_similarity" in cookies else 30
267 |     k_similarity = st.slider(
268 |         label="Initial K search",
269 |         min_value=1,
270 |         max_value=100,
271 |         value=k_similarity_default_value,
272 |         help="The number of similar indexed documents to pull from the Nemesis backend before performing reranking. More documents casts a wide net but takes more time."
273 |     )
274 |     cookies["k_similarity"] = k_similarity
275 | 
276 | cols = st.columns(2)
277 | with cols[0]:
278 |     min_doc_results_default_value = int(cookies["min_doc_results"]) if "min_doc_results" in cookies else 1
279 |     min_doc_results = st.slider(
280 |         label="Minimum number of documents to supply to the LLM", 
281 |         min_value=1,
282 |         max_value=15,
283 |         value=min_doc_results_default_value,
284 |         help="The minimum number of document results to feed to the LLM's context. More means slower response generation, higher provides more context (but possibly more irrelevant information)."
285 |     )
286 |     cookies["min_doc_results"] = min_doc_results
287 | with cols[1]:
288 |     max_doc_results_default_value = int(cookies["max_doc_results"]) if "max_doc_results" in cookies else 5
289 |     max_doc_results = st.slider(
290 |         label="Maximum number of documents to supply to the LLM", 
291 |         min_value=1,
292 |         max_value=15,
293 |         value=max_doc_results_default_value,
294 |         help="The maximum number of document results to feed to the LLM's context. More means slower response generation, higher provides more context (but possibly more irrelevant information)."
295 |     )
296 |     cookies["max_doc_results"] = max_doc_results
297 | 
298 | st.divider()
299 | 
300 | if st.button("Clear Cookies?"):
301 |     cookies.clear()
302 | 
303 | st.divider()
304 | 
305 | cookies.save()


--------------------------------------------------------------------------------
/ragnarok/pages/1_RAGnarok_Chat.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import ntpath
  3 | import os.path
  4 | import requests
  5 | import json
  6 | from requests.auth import HTTPBasicAuth
  7 | 
  8 | from huggingface_hub import hf_hub_download
  9 | from transformers import AutoModelForSequenceClassification, AutoTokenizer
 10 | from llama_cpp import Llama
 11 | import numpy as np
 12 | import streamlit as st
 13 | from streamlit_cookies_manager import CookieManager
 14 | import torch
 15 | import asyncio
 16 | from ollama import AsyncClient
 17 | 
 18 | 
 19 | 
 20 | cookies = CookieManager()
 21 | while not cookies.ready():
 22 |     time.sleep(1)
 23 | 
 24 | if "mode" not in cookies:
 25 |     st.error("Your cookies are broken, please go back to the main settings page.")
 26 |     st.stop()
 27 | 
 28 | if "mode_index" not in st.session_state:
 29 |     st.session_state["mode_index"] = 0  # Default to the first mode
 30 | def get_mode_index(cookies):
 31 |     if "mode" in cookies:
 32 |         if cookies["mode"] == "Local LLM":
 33 |             return 0
 34 |         elif cookies["mode"] == "Remote Ollama Server (Local Reranker)":
 35 |             return 1
 36 |         elif cookies["mode"] == "Remote Ollama Server (No Reranker)":
 37 |             return 2
 38 |     return 0  # Default to 0 if no mode is set
 39 | 
 40 | mode_index = get_mode_index(cookies)
 41 | 
 42 | if mode_index == 0:
 43 |     if "llm_model" not in cookies and "ollama_model" not in cookies:
 44 |         st.error("Please select a LLM model on the main settings page.")
 45 |         st.stop()
 46 |     if "llm_temperature" not in cookies:
 47 |         st.error("Please select a LLM model temperature on the main settings page.")
 48 |         st.stop()
 49 | 
 50 | if mode_index == 0 or mode_index == 1:
 51 |     if "reranking_model" not in cookies:
 52 |         st.error("Please select an reranking model on the main settings page.")
 53 |         st.stop()
 54 | if mode_index == 1 or mode_index == 2:
 55 |     if "ollama_url" not in cookies:
 56 |         st.error("Please select an Ollama server on the main settings page.")
 57 |         st.stop()
 58 |     if "ollama_model" not in cookies:
 59 |         st.error("Please select an Ollama model on the main settings page.")
 60 |         st.stop()
 61 |     if cookies["ollama_model"] == "":
 62 |         st.error("Please select an Ollama model on the main settings page.")
 63 |         st.stop()
 64 | 
 65 | if "nemesis_url" not in cookies or "nemesis_username" not in cookies or "nemesis_password" not in cookies:
 66 |     st.error("Please enter Nemesis connection info on the main settings page.")
 67 |     st.stop()
 68 | 
 69 | if "k_similarity" not in cookies:
 70 |     st.error("Please select an k_similarity on the main settings page.")
 71 |     st.stop()
 72 | 
 73 | if "min_doc_results" not in cookies:
 74 |     st.error("Please select an min_doc_results on the main settings page.")
 75 |     st.stop()
 76 | 
 77 | if "max_doc_results" not in cookies:
 78 |     st.error("Please select an max_doc_results on the main settings page.")
 79 |     st.stop()
 80 | 
 81 | 
 82 | 
 83 | 
 84 | ########################################################
 85 | #
 86 | # Model Delarations
 87 | #
 88 | ########################################################
 89 | n_gpu_layers = 0
 90 | temp = ""
 91 | min_doc_results = int(cookies['min_doc_results'])
 92 | max_doc_results = int(cookies["max_doc_results"])
 93 | min_similarity_score = 0
 94 | 
 95 | @st.cache_resource
 96 | def get_llm(llm_model_path, n_gpu_layers):
 97 |     llm = Llama(
 98 |         model_path=llm_model_path,
 99 |         n_ctx=8192,
100 |         n_gpu_layers=n_gpu_layers,
101 |         verbose=False
102 |     )
103 |     return llm
104 | 
105 | @st.cache_resource
106 | def get_ollama_client(ollama_url):
107 |     client = AsyncClient(ollama_url)
108 |     return client
109 | 
110 | # either local LLM or remote Ollama server with local reranker
111 | if mode_index == 0 or mode_index == 1:
112 |     # check for GPU presence
113 |     if torch.cuda.is_available():
114 |         # traditional Nvidia cuda GPUs
115 |         device = torch.device("cuda:0")
116 |         n_gpu_layers = int(cookies["n_gpu_layers"])
117 |     elif torch.backends.mps.is_available():
118 |         # for macOS M1/M2s
119 |         device = torch.device("mps")
120 |         n_gpu_layers = int(cookies["n_gpu_layers"])
121 |     else:
122 |         device = torch.device("cpu")
123 |         n_gpu_layers = 0
124 |     
125 |     @st.cache_resource
126 |     def get_reranker(reranking_model, device):
127 |         rerank_tokenizer = AutoTokenizer.from_pretrained(reranking_model)
128 |         print(f"device: {device}")
129 |         rerank_model = AutoModelForSequenceClassification.from_pretrained(reranking_model).to(device)
130 |         return (rerank_tokenizer, rerank_model)
131 |     temp = cookies["reranking_model"]
132 |     with st.spinner(f"Downloading/loading reranking model {temp} ..."):
133 |         (rerank_tokenizer, rerank_model) = get_reranker(cookies["reranking_model"], device)
134 | 
135 | # only local LLM
136 | if mode_index == 0:
137 |     llm_generation_kwargs = {
138 |         "max_tokens": 512,
139 |         "stream": True, 
140 |         "temperature": float(cookies["llm_temperature"]),
141 |         "echo": False
142 |     }
143 |     try:
144 |         if cookies["llm_model"] == "Intel/neural-chat-7b-v3-3":
145 |             llm_model_path = hf_hub_download("TheBloke/neural-chat-7B-v3-3-GGUF", filename="neural-chat-7b-v3-3.Q5_K_M.gguf", local_files_only=True)
146 |         elif cookies["llm_model"] == "openchat-3.5-0106":
147 |             llm_model_path = hf_hub_download("TheBloke/openchat-3.5-0106-GGUF", filename="openchat-3.5-0106.Q5_K_M.gguf", local_files_only=True)
148 |         elif cookies["llm_model"] == "Starling-LM-7B-alpha":
149 |             llm_model_path = hf_hub_download("TheBloke/Starling-LM-7B-alpha-GGUF", filename="starling-lm-7b-alpha.Q5_K_M.gguf", local_files_only=True)
150 |         else:
151 |             llm_model = cookies["llm_model"]
152 |             st.error(f"Invalid llm_model: {llm_model}")
153 |     except:
154 |         with st.spinner("Downloading LLM model (this will take some time)..."):
155 |             if cookies["llm_model"] == "Intel/neural-chat-7b-v3-3":
156 |                 llm_model_path = hf_hub_download("TheBloke/neural-chat-7B-v3-3-GGUF", filename="neural-chat-7b-v3-3.Q5_K_M.gguf")
157 |             elif cookies["llm_model"] == "openchat-3.5-0106":
158 |                 llm_model_path = hf_hub_download("TheBloke/openchat-3.5-0106-GGUF", filename="openchat-3.5-0106.Q5_K_M.gguf")
159 |             elif cookies["llm_model"] == "Starling-LM-7B-alpha":
160 |                 llm_model_path = hf_hub_download("TheBloke/Starling-LM-7B-alpha-GGUF", filename="starling-lm-7b-alpha.Q5_K_M.gguf")
161 |             else:
162 |                 llm_model = cookies["llm_model"]
163 |                 st.error(f"Invalid llm_model_path: {llm_model}")
164 | 
165 |     llm = get_llm(llm_model_path, n_gpu_layers)
166 | 
167 | 
168 | with st.sidebar:
169 |     if mode_index == 0 or mode_index == 1:
170 |         if torch.cuda.is_available():
171 |             st.success("Using CUDA GPU!")
172 |         elif torch.backends.mps.is_available():
173 |             st.success("Using MPS GPU!")
174 |         else:
175 |             st.warning("No GPU detected, generation may be slow!")
176 |     file_path_include = st.text_input("Enter file name/path pattern to include in the initial search:")
177 |     file_path_exclude = st.text_input("Enter file name/path pattern to exclude from the initial search:")
178 |     st.write("**Note**: _wildcard == \\*, use | to separate multiple terms, e.g. C:\\Temp\\\\*|\\*.pdf_")
179 |     st.write("_You MUST surround any file name with \\*'s, i.e., \*A_Process_is_No_One.pdf\*_")
180 | 
181 | st.title("RAGnarok Chat")
182 | st.warning('*WARNING: results not guaranteed to be correct! Verify answers with the supplied sources.*', icon="⚠️")
183 | 
184 | if "private_messages" not in st.session_state:
185 |     st.session_state.private_messages = []
186 | 
187 | for message in st.session_state.private_messages:
188 |     with st.chat_message(message["role"]):
189 |         st.markdown(message["content"])
190 | 
191 | if prompt := st.chat_input("<enter a question>"):
192 |     st.session_state.private_messages.append({"role": "user", "content": prompt})
193 |     with st.chat_message("user"):
194 |         st.markdown(prompt)
195 | 
196 |     with st.chat_message("assistant"):
197 |         message_placeholder = st.empty()
198 |         full_response = ""
199 |         results = []
200 | 
201 |         try:
202 |             # data = {
203 |             #     "search_phrase": prompt,
204 |             #     "num_results": cookies['k_similarity'],
205 |             # }
206 |             
207 |             # if file_path_include:
208 |             #     data["file_path_include"] = file_path_include
209 |             # if file_path_exclude:
210 |             #     data["file_path_exclude"] = file_path_exclude
211 | 
212 |             # TODO: change this to query the local llm or remote Ollama server to get a list of search terms, then use those terms to query Nemesis
213 |             
214 |             client = get_ollama_client(cookies["ollama_url"])
215 |             single_turn_nlp_prompt = f"### System:\nYou are a helpful LLM who knows how to distil user queries down to an individual search term. The user is going to ask you a question, your job is to determine what the best search term is based on the users question. Only respond with the search term, nothing else. Do not preface the response with classifiers like 'search string' or 'search_term', only output the term itself. \n### User:\n{prompt}\n### Assistant:\n"
216 |             async def nlp_term_chat():
217 |                 nlp_search_term_full_response = ""
218 |                 message = {'role': 'user', 'content': single_turn_nlp_prompt}
219 |                 async for part in await AsyncClient().chat(model=cookies['ollama_model'], messages=[message], stream=True):
220 |                     nlp_search_term_full_response += part['message']['content']
221 |                 return nlp_search_term_full_response
222 |             nlp_search_term = asyncio.run(nlp_term_chat())
223 | 
224 | 
225 |             url = "{}/hasura/v1/graphql".format(cookies["nemesis_url"])
226 |             payload = json.dumps({
227 |             "query": "\n            query SearchDocuments($searchQuery: String!, $pathPattern: String!, $agentPattern: String!, $project: String, $startDate: timestamptz, $endDate: timestamptz) {\n              search_documents(\n                args: {\n                  search_query: $searchQuery,\n                  path_pattern: $pathPattern,\n                  agent_pattern: $agentPattern,\n                  project_name: $project,\n                  start_date: $startDate,\n                  end_date: $endDate,\n                  max_results: 100\n                }\n              ) {\n                object_id\n                chunk_number\n                content\n                file_name\n                path\n                extension\n                project\n                agent_id\n                timestamp\n              }\n            }\n          ",
228 |             "variables": {
229 |                 "searchQuery": nlp_search_term,
230 |                 "pathPattern": "%",
231 |                 "agentPattern": "%",
232 |                 "project": None,
233 |                 "startDate": None,
234 |                 "endDate": None
235 |             }
236 |             })
237 |             # TODO: add the X-Hasura-Admin-Secret to the settings page and use that instead of hardcoding it
238 |             headers = {
239 |             'Content-Type': 'application/json',
240 |             'X-Hasura-Admin-Secret': 'pass456',
241 |             }
242 | 
243 |             response = requests.request("POST", url, headers=headers, data=payload, verify=False)
244 |             if response.status_code != 200:
245 |                 st.error(f"Error calling Nemesis GraphQL API: {response.status_code} - {response.text}")
246 |                 st.stop()
247 |             else:
248 |                 results = response.json()
249 |                 if len(results["data"]["search_documents"]) == 0:
250 |                     st.error(f"No documents found matching the search criteria: {nlp_search_term}")
251 |                     st.stop()
252 |                 else:
253 |                     results = results["data"]["search_documents"]
254 |                     print(f"Found {len(results)} documents matching the search criteria.")
255 | 
256 | 
257 | 
258 |             # url = f'{cookies["nemesis_url"]}nlp/hybrid_search'
259 |             # with st.spinner("Searching for initial documents from Nemesis..."):
260 |             #     if url.startswith("https://"):
261 |             #         response = requests.get(
262 |             #             url,
263 |             #             json=data,
264 |             #             auth=HTTPBasicAuth(cookies["nemesis_username"], cookies["nemesis_password"]),
265 |             #             verify=False
266 |             #         )
267 |             #     else:
268 |             #         response = requests.get(
269 |             #             url,
270 |             #             json=data,
271 |             #             auth=HTTPBasicAuth(cookies["nemesis_username"], cookies["nemesis_password"])
272 |             #         )
273 |             #     if response.status_code == 200:
274 |             #         results = response.json()
275 |             #         if "error" in results:
276 |             #             if results["error"] == "index_not_found_exception":
277 |             #                 st.error(f"No documents have been indexed!")
278 |             #             else:
279 |             #                 error = results["error"]
280 |             #                 st.error(f"Error calling text search {url} with search_phrase '{prompt}' : {error}")
281 |             #             st.stop()
282 |             #     else:
283 |             #         st.error(f"Error calling text search {url} with search_phrase '{prompt}' : {response.status_code}")
284 |             #         st.stop()
285 |         
286 |         except Exception as e:
287 |             st.error(f"Error calling text search {url} with search_phrase '{prompt}' : {e}")
288 |             st.stop()
289 | 
290 |         documents = results
291 |         if mode_index == 0 or mode_index == 1:
292 |             # Reranking process
293 |             pairs = []
294 |             for document in documents:
295 |                 pairs += [[prompt, document["text"].replace('"""', '"').replace("'''", "'")]]
296 | 
297 |             start = time.time()
298 |             inputs = rerank_tokenizer(
299 |                 pairs,
300 |                 padding=True,
301 |                 truncation=True,
302 |                 return_tensors="pt",
303 |                 max_length=512,
304 |             ).to(device)
305 |             print(inputs)
306 |             with st.spinner("Reranking documents..."):
307 |                 rerank_scores = (rerank_model(**inputs, return_dict=True).logits.view(-1,).float()).tolist()
308 |                 end = time.time()
309 |                 print(f"Reranker evaluated in {(end - start):.2f} seconds")
310 | 
311 |         final_results = []
312 | 
313 |         for i in range(len(documents)):
314 |             document = documents[i]
315 |             rerank_score = rerank_scores[i] if (mode_index == 0  or mode_index == 1) else 0.1
316 |             originating_file_name = ntpath.basename(document["path"])
317 |             originating_object_id = document["object_id"]
318 |             nemesis_url = cookies["nemesis_url"]
319 |             # link to the file in Nemesis
320 |             file_page_link = f"{nemesis_url}files/{originating_object_id}"
321 |             # snippet_id = document["id"]
322 |             # direct link to the snippet
323 |             # snippet_link = f"{nemesis_url}/kibana/app/discover#/?_a=(filters:!((query:(match_phrase:(_id:'{snippet_id}')))),index:'45fbaacb-9ef9-4cd1-b837-bc8ab0448220')&_g=(time:(from:now-1y%2Fd,to:now))"
324 |             
325 |             final_results += [
326 |                 [
327 |                     rerank_score,
328 |                     originating_file_name,
329 |                     file_page_link,
330 |                     # snippet_link,
331 |                     document["content"].replace('"""', '"').replace("'''", "'")
332 |                 ]
333 |             ]
334 |     
335 |         # ensure results are sorted with the highest similiary score first
336 |         final_results.sort(key=lambda x: x[0], reverse=True)
337 | 
338 |         # start at the minimum number of documents required
339 |         i_range = min_doc_results if min_doc_results < len(final_results) else len(final_results)
340 |         sources = final_results[0:i_range]
341 | 
342 |         responses_generated = False
343 | 
344 |         # add in results over the minimum similiary score
345 |         for result in final_results[i_range:]:
346 |             if result[0] > min_similarity_score:
347 |                 sources.append(result)
348 | 
349 |         # finally, cap the final results so we don't go over the LLM context
350 |         #   default == 15 (15*510tokens == 7650, leaving room for prompt overhead)
351 |         sources = sources[:max_doc_results]
352 | 
353 |         print(f"\nNumber of source snippets: {len(sources)}\n")
354 | 
355 |         sources_formatted = []
356 |         for i in range(len(sources)):
357 |             source = sources[i]
358 |             sources_formatted += [f"{i+1}. [{source[1]}]({source[2]}) (score: {source[0]:.2f})"]
359 |         sources_formatted_final = "\n".join(sources_formatted)
360 | 
361 |         #######################################################
362 |         #
363 |         # LLM prompting
364 |         #
365 |         #######################################################
366 | 
367 |         template = """
368 | You are a helpful LLM who knows how to reason over source text blocks. Generate a coherent and informative response based on the following source blocks.
369 | 
370 | Each Source Block starts with the Source Block number, followed by a Similarity Score reflecting the block's relevance to the overall prompt, followed by the originating Filename, finally followed by the source Text itself.
371 | 
372 | The similarity scores represent the model's confidence in the relevance of each source block. Higher scores indicate higher perceived similarity. Utilize the information in all source blocks to enhance your answer, but if any source blocks contain contradictory information use the information from the source block with the higher Similarity Score.
373 | 
374 | Only answer questions using the source below and if you're not sure of an answer, you can say "I don't know based on the retrieved sources".
375 | """
376 | 
377 |         for i in range(len(sources)):
378 |             # final_result_score, originating_file_name, file_page_link, snippet_link, final_result_text = sources[i]
379 |             final_result_score, originating_file_name, file_page_link, final_result_text = sources[i]
380 | 
381 |             template += f"""
382 | Source Block: {i+1}
383 | Similarity Score: {final_result_score}
384 | Filename: {originating_file_name}
385 | Text:
386 | \"\"\"
387 | {final_result_text}
388 | \"\"\"
389 | 
390 | ---
391 | """     
392 | 
393 |         template += f"""
394 | Question: {prompt}
395 | """
396 | 
397 |         if "neural-chat" in cookies["llm_model"]:
398 |             single_turn_prompt = f"### System:\nYou are a helpful LLM who knows how to reason over source text blocks.\n### User:\n{template}\n### Assistant:\n"
399 |         else:
400 |             single_turn_prompt = f"GPT4 Correct User: {template}<|end_of_turn|>GPT4 Correct Assistant:"
401 | 
402 |         with st.spinner("LLM is processing the prompt..."):
403 |             start = time.time()
404 |             if mode_index == 0:
405 |                 stream = llm.create_completion(single_turn_prompt, **llm_generation_kwargs)
406 |                 for output in stream:
407 |                     full_response += (output['choices'][0]['text'] or "").split("### Assistant:\n")[-1]
408 |                     message_placeholder.markdown(full_response + "▌")
409 |             else:
410 |                 client = get_ollama_client(cookies["ollama_url"])
411 |                 async def chat():
412 |                     full_response = ""
413 |                     message = {'role': 'user', 'content': single_turn_prompt}
414 |                     async for part in await AsyncClient().chat(model=cookies['ollama_model'], messages=[message], stream=True):
415 |                         print(part['message']['content'], end='', flush=True)
416 |                         full_response += part['message']['content']
417 |                     return full_response
418 |                 full_response = asyncio.run(chat())
419 | 
420 |             
421 |             end = time.time()
422 |   
423 |             message_placeholder.markdown(f"{full_response}\n\n*Sources:*\n\nSearch Term: {nlp_search_term}\n\n{sources_formatted_final}\n\n_Generation time: {(end - start):.2f} seconds_\n")
424 |             
425 |             print(f"LLM generation completed in {(end - start):.2f} seconds")
426 | 
427 |         st.session_state.private_messages.append({"role": "assistant", "content": f"{full_response}\n\n*Sources:*\n{sources_formatted_final}\n\n_Generation time: {(end - start):.2f} seconds_\n"})
428 | 


--------------------------------------------------------------------------------