├── .dockerignore
├── .gitattributes
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── app.py
├── config
    └── config.yaml
├── contract
    ├── README.md
    ├── foundry.toml
    ├── script
    │   └── deploy.s.sol
    ├── src
    │   └── GlobalVectorManager.sol
    └── test
    │   └── GlobalVectorManager.t.sol
├── dev.sh
├── doc
    ├── drags.JPG
    ├── fRag.drawio
    ├── system.drawio
    └── systemDesign.drawio
├── docker-compose.yml
├── main.py
├── requirements.txt
├── research
    ├── .deepeval-cache.json
    ├── Evulate.ipynb
    ├── RAGAS.ipynb
    ├── communication
    │   ├── ipfs.ipynb
    │   ├── kafka
    │   │   ├── consumer.py
    │   │   ├── docker-compose.yml
    │   │   └── producer.py
    │   └── mqtt
    │   │   ├── receive.py
    │   │   └── send.py
    ├── groq.ipynb
    ├── localRag.ipynb
    ├── ollama.ipynb
    ├── retriver.ipynb
    └── utils.ipynb
├── run.sh
├── setup.py
└── src
    └── bayesrag
        ├── __init__.py
        ├── config.py
        ├── constant.py
        ├── data_loader.py
        ├── embedder.py
        ├── evaluator.py
        ├── generator.py
        ├── ipfs.py
        ├── llmEvaluator.py
        ├── mq.py
        ├── retriever.py
        ├── text_splitter.py
        ├── utils.py
        └── vector_db.py


/.dockerignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | artifacts/*
163 | 
164 | logs
165 | 
166 | qdrant_storage;C
167 | 
168 | *.png
169 | 
170 | data*/
171 | 
172 | tut/
173 | 
174 | research/
175 | 
176 | doc/
177 | 
178 | artifacts/*
179 | 
180 | logs
181 | 
182 | qdrant_*
183 | 
184 | *.png
185 | 
186 | data*/
187 | 
188 | tut/
189 | 
190 | downloaded_files
191 | zipdownloaded_files
192 | 
193 | ipfs_data
194 | ipfs_staging


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | artifacts/*
163 | 
164 | logs
165 | 
166 | qdrant_*
167 | 
168 | *.png
169 | 
170 | data*/
171 | 
172 | tut/
173 | 
174 | downloaded_files
175 | zipdownloaded_files
176 | 
177 | ipfs_data
178 | ipfs_staging


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use the official Python 3.10 slim image as a base image
 2 | FROM python:3.10-slim
 3 | 
 4 | # Set environment variables for better build caching
 5 | ENV PYTHONDONTWRITEBYTECODE 1
 6 | ENV PYTHONUNBUF 1
 7 | 
 8 | WORKDIR /app
 9 | 
10 | # Copy only the requirements.txt file first
11 | COPY requirements.txt /app/
12 | 
13 | # Install Python dependencies if requirements.txt has changed
14 | RUN pip install --no-cache-dir -r requirements.txt
15 | 
16 | # Copy the rest of the project files
17 | COPY . /app
18 | 
19 | # Build the package
20 | RUN python setup.py sdist bdist_wheel
21 | RUN pip install --no-cache-dir .
22 | 
23 | # Expose the application port 
24 | EXPOSE 8000
25 | 
26 | # Define data directory and default node type
27 | ENV DATA_DIR=/data
28 | ENV NODETYPE=admin
29 | ENV QDRANT_HOST=http://localhost:6333
30 | 
31 | # Command to run the applicatio
32 | CMD ["python", "main.py", "--data-dir", "$DATA_DIR", "--nodetype", "$NODETYPE"]
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 TraqueAi
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Traque Ai powered by RAG
  2 | ![RAQ (1)](https://github.com/user-attachments/assets/afc2009b-2c90-4c17-8a93-dc5b7eb79e5e)
  3 | 
  4 | 
  5 | 
  6 | [Website](https://traque.cc) | [Twitter](https://twitter.com/TraqueAi)
  7 | 
  8 | 
  9 | ## Features
 10 | 
 11 | 
 12 | - **Decentralized RAG for Blockchain Analysis**: Uses Retrieval-Augmented Generation (RAG) to process and analyze blockchain transactions with high accuracy.
 13 | - **Privacy-Preserving Storage**: Leverages IPFS for secure, decentralized data storage.
 14 | - **Low-Latency Communication**: Implements MQTT for real-time data sharing between nodes.
 15 | - **Immutable and Transparent Records**: Utilizes blockchain to ensure secure and verifiable transaction tracking.
 16 | - **Address Clustering & Fund Flow Analysis**: Identifies laundering patterns and traces transactions across addresses.
 17 | - **Graph-Based Visualization**: Displays transaction paths interactively for enhanced clarity.
 18 | - **AI-Driven Anomaly Detection**: Automatically flags suspicious transactions using machine learning.
 19 | - **Scalable & Collaborative Learning**: Nodes contribute knowledge to improve global blockchain analysis.
 20 | 
 21 | ## Key Technologies
 22 | 
 23 | - **IPFS**: Decentralized storage for secure and censorship-resistant data management.
 24 | - **MQTT**: Lightweight protocol for fast, efficient communication between nodes.
 25 | - **Blockchain**: Immutable ledger for recording analysis results and rewarding contributions.
 26 | - **Qdrant**: Vector database for high-speed, scalable similarity search and retrieval.
 27 | 
 28 | ## Setup and Installation
 29 | 
 30 | ### Clone and Build
 31 | 
 32 | ```sh
 33 | git clone https://github.com/TraqueAi/Traque-AI
 34 | cd Traque-AI
 35 | ```
 36 | 
 37 | Build the package:
 38 | 
 39 | ```sh
 40 | python setup.py sdist bdist_wheel
 41 | pip install .
 42 | ```
 43 | 
 44 | ### Running in Docker Container
 45 | 
 46 | Run all services:
 47 | 
 48 | ```sh
 49 | docker compose up -d
 50 | ```
 51 | 
 52 | Download the model and start the application:
 53 | 
 54 | ```sh
 55 | bash run.sh
 56 | ```
 57 | 
 58 | ### Setting up Qdrant (Vector Database)
 59 | 
 60 | Download Qdrant Image:
 61 | 
 62 | ```sh
 63 | docker pull qdrant/qdrant
 64 | ```
 65 | 
 66 | Run Qdrant:
 67 | 
 68 | ```sh
 69 | docker run -d -p 6333:6333 -p 6334:6334 \
 70 |     -v ./qdrant_data:/qdrant/storage \
 71 |     qdrant/qdrant
 72 | ```
 73 | 
 74 | For Windows:
 75 | 
 76 | ```sh
 77 | docker run -d --name qdrant_container -p 6333:6333 -p 6334:6334 \
 78 |     -v C:/path/to/qdrant_data:/qdrant/storage \
 79 |     qdrant/qdrant:latest
 80 | ```
 81 | 
 82 | ### Setting up Dependencies
 83 | 
 84 | **Ollama**: Install Ollama by following the instructions on Ollama's website.
 85 | 
 86 | **Language Model**: Pull a language model from the Ollama library:
 87 | 
 88 | ```sh
 89 | ollama pull llama3:8b
 90 | ```
 91 | 
 92 | **Text Embedding Model**:
 93 | 
 94 | ```sh
 95 | ollama pull nomic-embed-text:latest
 96 | ```
 97 | 
 98 | Install required Python libraries:
 99 | 
100 | ```sh
101 | pip install -r requirements.txt
102 | ```
103 | 
104 | ## Running the Application
105 | 
106 | ### Configure Node Type
107 | 
108 | Edit `main.py` to specify the node type:
109 | 
110 | - **admin**: Institute Node (manages the global embedding)
111 | - **data**: Data Node (contributes specialized knowledge)
112 | 
113 | Start the application:
114 | 
115 | ```sh
116 | python main.py --data-dir data --nodetype admin
117 | ```
118 | 
119 | Replace `data` with the desired data directory. Set `--nodetype` to either `admin` or `data`.
120 | 
121 | ---
122 | 
123 | For more details, check the [Docs](https://github.com/TraqueAi/TraqueAI).
124 | 
125 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | from bayesrag.config import DATA_DIR, QDRANT_COLLECTION,ID
 3 | from bayesrag.data_loader import load_directory_pdf
 4 | from bayesrag.text_splitter import split_texts
 5 | from bayesrag.vector_db import VectorDB
 6 | from bayesrag.retriever import get_context
 7 | from bayesrag.generator import generate_response
 8 | from bayesrag.mq import Mqttclient
 9 | from qdrant_client import QdrantClient
10 | import warnings
11 | from cryptography.utils import CryptographyDeprecationWarning
12 | 
13 | warnings.filterwarnings("ignore", category=CryptographyDeprecationWarning, message="ARC4 has been moved to cryptography.hazmat.decrepit.ciphers.algorithms.ARC4 and will be removed from this module in 48.0.0.")
14 | 
15 | # Initialize session state
16 | if 'messages' not in st.session_state:
17 |     st.session_state.messages = []
18 | vectorDb=VectorDB(QDRANT_COLLECTION)
19 | 
20 |     # Create vector database and upsert embeddings
21 | vectorDb.create_db()
22 | def insert_data(file):
23 |     with open(DATA_DIR / file.name, "wb") as f:
24 |         f.write(file.getbuffer())
25 | 
26 |     # Load and split documents
27 |     documents = load_directory_pdf(DATA_DIR)
28 |     text_chunks = split_texts(documents)
29 |     vectorDb=VectorDB(QDRANT_COLLECTION)
30 | 
31 |     # Create vector database and upsert embeddings
32 |     vectorDb.upsert_embeddings(text_chunks)
33 | 
34 | def main():
35 |     st.title("Lawyer-Based Chatbot")
36 | 
37 |     # Sidebar for file upload and actions
38 |     st.sidebar.title("Upload Document")
39 |     uploaded_file = st.sidebar.file_uploader("Choose a PDF file", type="pdf")
40 | 
41 |     if uploaded_file is not None:
42 |         insert_data(uploaded_file)
43 |         st.sidebar.success("Document uploaded and processed successfully!")
44 | 
45 |     # Sidebar actions
46 |     with st.sidebar.expander("Actions", expanded=True):
47 |         if st.button("Send Vector"):
48 |             qclient = QdrantClient(url="http://localhost:6333")  # Update URL if needed
49 |             client = Mqttclient(replyTopic=f"USER_TOPIC-{ID}", isAdmin=False)
50 |             scroll_result = qclient.scroll(collection_name=QDRANT_COLLECTION, with_vectors=True)
51 |             client.send_vector(scroll_result)
52 |             st.sidebar.success("Vector sent successfully.")
53 | 
54 |     st.header("Chat with your documents")
55 |     
56 |     for message in st.session_state.messages:
57 |         with st.chat_message(message["role"]):
58 |             st.write(message["content"])
59 | 
60 |     # Chat input
61 |     user_input = st.chat_input("Type your question here...")
62 |     
63 |     if user_input:
64 |         st.session_state.messages.append({"role": "user", "content": user_input})
65 |         with st.chat_message("user"):
66 |             st.write(user_input)
67 |         
68 |         result = get_context(user_input)
69 | 
70 |         with st.chat_message("assistant"):
71 |             response_container = st.empty()
72 |             response_text = ""
73 |             for response_part in generate_response(user_input, result):
74 |                 response_text += response_part
75 |                 response_container.markdown(response_text)
76 |         
77 |         st.session_state.messages.append({"role": "assistant", "content": response_text})
78 | 
79 | if __name__ == "__main__":
80 |     main()
81 | 


--------------------------------------------------------------------------------
/config/config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TraqueAi/TraqueAI/57c030675723c3f3bb5096dfd0480b53f5f76e8c/config/config.yaml


--------------------------------------------------------------------------------
/contract/README.md:
--------------------------------------------------------------------------------
 1 | ## Foundry
 2 | 
 3 | **Foundry is a blazing fast, portable and modular toolkit for Ethereum application development written in Rust.**
 4 | 
 5 | Foundry consists of:
 6 | 
 7 | -   **Forge**: Ethereum testing framework (like Truffle, Hardhat and DappTools).
 8 | -   **Cast**: Swiss army knife for interacting with EVM smart contracts, sending transactions and getting chain data.
 9 | -   **Anvil**: Local Ethereum node, akin to Ganache, Hardhat Network.
10 | -   **Chisel**: Fast, utilitarian, and verbose solidity REPL.
11 | 
12 | ## Documentation
13 | 
14 | https://book.getfoundry.sh/
15 | 
16 | ## Usage
17 | 
18 | ### Build
19 | 
20 | ```shell
21 | $ forge build
22 | ```
23 | 
24 | ### Test
25 | 
26 | ```shell
27 | $ forge test
28 | ```
29 | 
30 | ### Format
31 | 
32 | ```shell
33 | $ forge fmt
34 | ```
35 | 
36 | ### Gas Snapshots
37 | 
38 | ```shell
39 | $ forge snapshot
40 | ```
41 | 
42 | ### Anvil
43 | 
44 | ```shell
45 | $ anvil
46 | ```
47 | 
48 | ### Deploy
49 | 
50 | ```shell
51 | $ forge script script/Counter.s.sol:CounterScript --rpc-url <your_rpc_url> --private-key <your_private_key>
52 | ```
53 | 
54 | ### Cast
55 | 
56 | ```shell
57 | $ cast <subcommand>
58 | ```
59 | 
60 | ### Help
61 | 
62 | ```shell
63 | $ forge --help
64 | $ anvil --help
65 | $ cast --help
66 | ```
67 | 


--------------------------------------------------------------------------------
/contract/foundry.toml:
--------------------------------------------------------------------------------
1 | [profile.default]
2 | src = "src"
3 | out = "out"
4 | libs = ["lib"]
5 | 
6 | # See more config options https://github.com/foundry-rs/foundry/blob/master/crates/config/README.md#all-options
7 | 


--------------------------------------------------------------------------------
/contract/script/deploy.s.sol:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: MIT
 2 | pragma solidity ^0.8.0;
 3 | 
 4 | import "forge-std/Script.sol";
 5 | import "../src/GlobalVectorManager.sol"; 
 6 | 
 7 | contract Deploy is Script {
 8 |     function run() external {
 9 |         vm.startBroadcast();
10 |         new GlobalVectorManager();
11 |         vm.stopBroadcast();
12 |     }
13 | }


--------------------------------------------------------------------------------
/contract/src/GlobalVectorManager.sol:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: MIT
 2 | pragma solidity ^0.8.0;
 3 | 
 4 | contract GlobalVectorManager {
 5 |     struct VectorData {
 6 |         string ipfsHash;
 7 |         address uploader;
 8 |         uint256 timestamp;
 9 |         bool verified;
10 |     }
11 | 
12 |     mapping(uint256 => VectorData) public vectors;
13 |     uint256 public vectorCount;
14 |     address public admin;
15 |     mapping(address => bool) public isAdmin;
16 |     mapping(address => bool) public isDataNode;
17 |     address[] public dataNodes;
18 |     address[] public admins;
19 | 
20 |     event VectorUploaded(uint256 indexed vectorId, string ipfsHash, address indexed uploader, uint256 timestamp);
21 |     event VectorVerified(uint256 indexed vectorId, bool verified);
22 |     event AdminJoined(address indexed admin);
23 |     event DataNodeJoined(address indexed dataNode);
24 |     event IncentivePaid(address indexed dataNode, uint256 amount);
25 | 
26 |     modifier onlyAdmin() {
27 |         require(isAdmin[msg.sender], "Not an admin");
28 |         _;
29 |     }
30 | 
31 |     constructor() {
32 |         admin = msg.sender;
33 |         isAdmin[admin] = true;
34 |         admins.push(admin);
35 |         emit AdminJoined(admin);
36 |     }
37 | 
38 |     function joinAsDataNode() public {
39 |         require(!isDataNode[msg.sender], "Already a data node");
40 |         isDataNode[msg.sender] = true;
41 |         dataNodes.push(msg.sender);
42 |         emit DataNodeJoined(msg.sender);
43 |     }
44 | 
45 |     function uploadVector(string memory ipfsHash) public {
46 |         require(isDataNode[msg.sender], "Not a data node");
47 |         vectorCount++;
48 |         vectors[vectorCount] = VectorData(ipfsHash, msg.sender, block.timestamp, false);
49 |         emit VectorUploaded(vectorCount, ipfsHash, msg.sender, block.timestamp);
50 |     }
51 | 
52 |     function joinAsAdmin() public {
53 |         require(!isAdmin[msg.sender], "Already an admin");
54 |         isAdmin[msg.sender] = true;
55 |         admins.push(msg.sender);
56 |         emit AdminJoined(msg.sender);
57 |     }
58 | 
59 |     function verifyVector(uint256 vectorId, bool isVerified) public onlyAdmin {
60 |         VectorData storage vector = vectors[vectorId];
61 |         vector.verified = isVerified;
62 |         emit VectorVerified(vectorId, isVerified);
63 |         
64 |         if (isVerified) {
65 |             uint256 adminCount = 0;
66 |             for (uint256 i = 0; i < admins.length; i++) {
67 |                 if (isAdmin[admins[i]]) {
68 |                     adminCount++;
69 |                 }
70 |             }
71 |             if (adminCount * 2 >= admins.length) {
72 |                 // Merge vector logic here
73 |                 // Incentive logic for data node
74 |                 emit IncentivePaid(vector.uploader, 1 ether); // Example incentive
75 |             }
76 |         }
77 |     }
78 | 
79 |     function getVector(uint256 vectorId) public view returns (string memory, address, uint256, bool) {
80 |         VectorData memory vector = vectors[vectorId];
81 |         return (vector.ipfsHash, vector.uploader, vector.timestamp, vector.verified);
82 |     }
83 | 
84 |     function leaveContract() public {
85 |         require(isDataNode[msg.sender] || isAdmin[msg.sender], "Not a member");
86 |         if (isDataNode[msg.sender]) {
87 |             isDataNode[msg.sender] = false;
88 |             // Remove from dataNodes array logic here
89 |         } else {
90 |             isAdmin[msg.sender] = false;
91 |             // Remove from admins array logic here
92 |         }
93 |     }
94 | }


--------------------------------------------------------------------------------
/contract/test/GlobalVectorManager.t.sol:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: MIT
 2 | pragma solidity ^0.8.0;
 3 | 
 4 | import "forge-std/Test.sol";
 5 | import "../src/GlobalVectorManager.sol";
 6 | 
 7 | contract GlobalVectorManagerTest is Test {
 8 |     GlobalVectorManager public manager;
 9 |     address public admin;
10 |     address public dataNode;
11 | 
12 |     function setUp() public {
13 |         admin = address(this);
14 |         dataNode = address(0x2);
15 |         manager = new GlobalVectorManager();
16 |         
17 |         // Admin joins the contract
18 |         manager.joinAsAdmin();
19 |         
20 |         // Data node joins the contract
21 |         vm.prank(dataNode);
22 |         manager.joinAsDataNode();
23 |     }
24 | 
25 |     function testUploadVectorByDataNode() public {
26 |         // Data node uploads a vector
27 |         vm.prank(dataNode);
28 |         manager.uploadVector("QmHashDataNode");
29 | 
30 |         (string memory ipfsHash, address uploader, uint256 timestamp, bool verified) = manager.getVector(1);
31 |         assertEq(ipfsHash, "QmHashDataNode");
32 |         assertEq(uploader, dataNode);
33 |         assertTrue(timestamp > 0);
34 |         assertFalse(verified);
35 |     }
36 | 
37 |     function testVerifyVectorByAdmin() public {
38 |         // Data node uploads a vector
39 |         vm.prank(dataNode);
40 |         manager.uploadVector("QmHashDataNode");
41 | 
42 |         // Admin verifies the vector
43 |         manager.verifyVector(1, true);
44 |         (, , , bool isVerified) = manager.getVector(1);
45 |         assertTrue(isVerified);
46 |     }
47 | 
48 |     function testIncentivePaidToDataNode() public {
49 |         // Data node uploads a vector
50 |         vm.prank(dataNode);
51 |         manager.uploadVector("QmHashDataNode");
52 | 
53 |         // Admin verifies the vector
54 |         manager.verifyVector(1, true);
55 | 
56 |         // Check if incentive was paid (this would require additional logic to track incentives)
57 |         // For example, you could add a mapping to track incentives in the contract and assert here.
58 |     }
59 | }


--------------------------------------------------------------------------------
/dev.sh:
--------------------------------------------------------------------------------
1 | 
2 | docker run -d  -p 6333:6333 -p 6334:6334 -v C:/Users/faiza/Music/llmResearch/rag/qdrant_data:/qdrant/storage qdrant/qdrant:latest
3 | 
4 | ipfs daemon
5 | 


--------------------------------------------------------------------------------
/doc/drags.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TraqueAi/TraqueAI/57c030675723c3f3bb5096dfd0480b53f5f76e8c/doc/drags.JPG


--------------------------------------------------------------------------------
/doc/system.drawio:
--------------------------------------------------------------------------------
  1 | <mxfile host="app.diagrams.net" modified="2024-06-21T18:59:12.237Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36" version="24.6.1" etag="ElsdoMCSyOCC7s9fuOUg" type="device">
  2 |   <diagram id="XtRHHLT9hMPPoSZRFdoH" name="Page-1">
  3 |     <mxGraphModel dx="1450" dy="957" grid="0" gridSize="10" guides="0" tooltips="1" connect="0" arrows="0" fold="1" page="0" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
  4 |       <root>
  5 |         <mxCell id="0" />
  6 |         <mxCell id="1" parent="0" />
  7 |         <mxCell id="7" style="edgeStyle=none;html=1;exitX=1;exitY=0.25;exitDx=0;exitDy=0;entryX=0;entryY=0.25;entryDx=0;entryDy=0;" parent="1" source="2" target="3" edge="1">
  8 |           <mxGeometry relative="1" as="geometry" />
  9 |         </mxCell>
 10 |         <mxCell id="8" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=-0.02;entryY=0.393;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="2" target="5" edge="1">
 11 |           <mxGeometry relative="1" as="geometry">
 12 |             <mxPoint x="305" y="170" as="targetPoint" />
 13 |           </mxGeometry>
 14 |         </mxCell>
 15 |         <mxCell id="9" style="edgeStyle=none;html=1;exitX=1;exitY=0.75;exitDx=0;exitDy=0;entryX=-0.027;entryY=0.373;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="2" target="6" edge="1">
 16 |           <mxGeometry relative="1" as="geometry" />
 17 |         </mxCell>
 18 |         <mxCell id="2" value="Document" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
 19 |           <mxGeometry x="45" y="130" width="120" height="60" as="geometry" />
 20 |         </mxCell>
 21 |         <mxCell id="12" style="edgeStyle=none;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="3" target="44" edge="1">
 22 |           <mxGeometry relative="1" as="geometry" />
 23 |         </mxCell>
 24 |         <mxCell id="3" value="Chunk1&amp;nbsp;" style="shape=process;whiteSpace=wrap;html=1;backgroundOutline=1;" parent="1" vertex="1">
 25 |           <mxGeometry x="265" y="50" width="120" height="60" as="geometry" />
 26 |         </mxCell>
 27 |         <mxCell id="5" value="Chunk2" style="shape=process;whiteSpace=wrap;html=1;backgroundOutline=1;" parent="1" vertex="1">
 28 |           <mxGeometry x="265" y="140" width="120" height="60" as="geometry" />
 29 |         </mxCell>
 30 |         <mxCell id="51" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="6" target="50" edge="1">
 31 |           <mxGeometry relative="1" as="geometry" />
 32 |         </mxCell>
 33 |         <mxCell id="6" value="Chunk3" style="shape=process;whiteSpace=wrap;html=1;backgroundOutline=1;" parent="1" vertex="1">
 34 |           <mxGeometry x="265" y="230" width="120" height="60" as="geometry" />
 35 |         </mxCell>
 36 |         <mxCell id="60" style="edgeStyle=none;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;exitPerimeter=0;entryX=1;entryY=0;entryDx=0;entryDy=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="11" target="25" edge="1">
 37 |           <mxGeometry relative="1" as="geometry" />
 38 |         </mxCell>
 39 |         <mxCell id="11" value="Vector DB" style="shape=cylinder3;whiteSpace=wrap;html=1;boundedLbl=1;backgroundOutline=1;size=15;" parent="1" vertex="1">
 40 |           <mxGeometry x="655" y="110" width="96.25" height="110" as="geometry" />
 41 |         </mxCell>
 42 |         <mxCell id="17" style="edgeStyle=none;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="15" target="18" edge="1">
 43 |           <mxGeometry relative="1" as="geometry">
 44 |             <mxPoint x="200" y="425" as="targetPoint" />
 45 |           </mxGeometry>
 46 |         </mxCell>
 47 |         <mxCell id="55" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="18" edge="1">
 48 |           <mxGeometry relative="1" as="geometry">
 49 |             <mxPoint x="355" y="424.57142857142867" as="targetPoint" />
 50 |           </mxGeometry>
 51 |         </mxCell>
 52 |         <mxCell id="18" value="Query" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
 53 |           <mxGeometry x="215" y="395" width="120" height="60" as="geometry" />
 54 |         </mxCell>
 55 |         <mxCell id="19" value="" style="group" parent="1" vertex="1" connectable="0">
 56 |           <mxGeometry x="100" y="395" width="50" height="90" as="geometry" />
 57 |         </mxCell>
 58 |         <mxCell id="15" value="Actor" style="shape=umlActor;verticalLabelPosition=bottom;verticalAlign=top;html=1;outlineConnect=0;" parent="19" vertex="1">
 59 |           <mxGeometry x="10" width="30" height="60" as="geometry" />
 60 |         </mxCell>
 61 |         <mxCell id="16" value="User" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=#3700CC;fillColor=#6a00ff;fontColor=#ffffff;" parent="19" vertex="1">
 62 |           <mxGeometry y="60" width="50" height="30" as="geometry" />
 63 |         </mxCell>
 64 |         <mxCell id="22" value="&lt;font color=&quot;#0000cc&quot; style=&quot;font-size: 15px;&quot;&gt;PreProcess&lt;/font&gt;" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=#2D7600;fillColor=#60a917;fontColor=#ffffff;" parent="1" vertex="1">
 65 |           <mxGeometry x="-60" y="145" width="100" height="30" as="geometry" />
 66 |         </mxCell>
 67 |         <mxCell id="57" style="edgeStyle=none;html=1;entryX=1;entryY=0.5;entryDx=0;entryDy=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="23" target="33" edge="1">
 68 |           <mxGeometry relative="1" as="geometry" />
 69 |         </mxCell>
 70 |         <mxCell id="23" value="&lt;font color=&quot;#ffffff&quot;&gt;LLM&lt;/font&gt;" style="ellipse;shape=cloud;whiteSpace=wrap;html=1;fontSize=15;fontColor=#0000CC;" parent="1" vertex="1">
 71 |           <mxGeometry x="685" y="527.5" width="120" height="80" as="geometry" />
 72 |         </mxCell>
 73 |         <mxCell id="58" style="edgeStyle=none;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.496;entryY=0.074;entryDx=0;entryDy=0;entryPerimeter=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="24" target="23" edge="1">
 74 |           <mxGeometry relative="1" as="geometry" />
 75 |         </mxCell>
 76 |         <mxCell id="24" value="&lt;font style=&quot;font-size: 12px;&quot;&gt;Query + context&lt;/font&gt;" style="rounded=1;whiteSpace=wrap;html=1;fontSize=15;fontColor=#FFFFFF;" parent="1" vertex="1">
 77 |           <mxGeometry x="670" y="382.5" width="150" height="75" as="geometry" />
 78 |         </mxCell>
 79 |         <mxCell id="29" style="edgeStyle=none;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="25" target="24" edge="1">
 80 |           <mxGeometry relative="1" as="geometry" />
 81 |         </mxCell>
 82 |         <mxCell id="59" style="edgeStyle=none;html=1;exitX=0.75;exitY=0;exitDx=0;exitDy=0;entryX=0.145;entryY=1;entryDx=0;entryDy=-4.35;entryPerimeter=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="25" target="11" edge="1">
 83 |           <mxGeometry relative="1" as="geometry" />
 84 |         </mxCell>
 85 |         <mxCell id="25" value="&lt;font style=&quot;font-size: 12px;&quot;&gt;Get revelant Context&lt;/font&gt;" style="rounded=1;whiteSpace=wrap;html=1;fontSize=15;fontColor=#FFFFFF;gradientColor=none;fillColor=default;" parent="1" vertex="1">
 86 |           <mxGeometry x="505" y="392.5" width="120" height="60" as="geometry" />
 87 |         </mxCell>
 88 |         <mxCell id="62" style="edgeStyle=none;html=1;entryX=0.463;entryY=1.062;entryDx=0;entryDy=0;entryPerimeter=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="33" target="16" edge="1">
 89 |           <mxGeometry relative="1" as="geometry">
 90 |             <Array as="points">
 91 |               <mxPoint x="123" y="570" />
 92 |               <mxPoint x="123" y="550" />
 93 |             </Array>
 94 |           </mxGeometry>
 95 |         </mxCell>
 96 |         <mxCell id="33" value="Result" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;fontColor=#FFFFFF;" parent="1" vertex="1">
 97 |           <mxGeometry x="525" y="537.5" width="120" height="60" as="geometry" />
 98 |         </mxCell>
 99 |         <mxCell id="34" value="1" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;fontSize=12;fontColor=#FFFFFF;" parent="1" vertex="1">
100 |           <mxGeometry x="-80" y="145" width="30" height="30" as="geometry" />
101 |         </mxCell>
102 |         <mxCell id="35" value="2" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;fontSize=12;fontColor=#FFFFFF;" parent="1" vertex="1">
103 |           <mxGeometry x="-70" y="405" width="30" height="30" as="geometry" />
104 |         </mxCell>
105 |         <mxCell id="36" value="&lt;font color=&quot;#0000cc&quot; style=&quot;font-size: 15px;&quot;&gt;Post Processing&lt;/font&gt;" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=#2D7600;fillColor=#60a917;fontColor=#ffffff;" parent="1" vertex="1">
106 |           <mxGeometry x="-40" y="405" width="130" height="30" as="geometry" />
107 |         </mxCell>
108 |         <mxCell id="45" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0;entryDx=0;entryDy=15;entryPerimeter=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="44" target="11" edge="1">
109 |           <mxGeometry relative="1" as="geometry" />
110 |         </mxCell>
111 |         <mxCell id="44" value="Embeeding chunk1" style="shape=process;whiteSpace=wrap;html=1;backgroundOutline=1;" parent="1" vertex="1">
112 |           <mxGeometry x="420" y="50" width="120" height="60" as="geometry" />
113 |         </mxCell>
114 |         <mxCell id="47" value="" style="edgeStyle=none;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;endArrow=none;" parent="1" source="5" target="46" edge="1">
115 |           <mxGeometry relative="1" as="geometry">
116 |             <mxPoint x="385" y="170.48387096774195" as="sourcePoint" />
117 |             <mxPoint x="635" y="172.5" as="targetPoint" />
118 |           </mxGeometry>
119 |         </mxCell>
120 |         <mxCell id="48" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0;entryDx=0;entryDy=52.5;entryPerimeter=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="46" target="11" edge="1">
121 |           <mxGeometry relative="1" as="geometry" />
122 |         </mxCell>
123 |         <mxCell id="46" value="Embeeding chunk2" style="shape=process;whiteSpace=wrap;html=1;backgroundOutline=1;" parent="1" vertex="1">
124 |           <mxGeometry x="425" y="140" width="120" height="60" as="geometry" />
125 |         </mxCell>
126 |         <mxCell id="52" style="edgeStyle=none;html=1;exitX=1;exitY=0.25;exitDx=0;exitDy=0;entryX=0;entryY=0;entryDx=0;entryDy=75;entryPerimeter=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="50" target="11" edge="1">
127 |           <mxGeometry relative="1" as="geometry" />
128 |         </mxCell>
129 |         <mxCell id="50" value="Embeeding chunk 3" style="shape=process;whiteSpace=wrap;html=1;backgroundOutline=1;" parent="1" vertex="1">
130 |           <mxGeometry x="425" y="230" width="120" height="60" as="geometry" />
131 |         </mxCell>
132 |         <mxCell id="56" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="54" target="25" edge="1">
133 |           <mxGeometry relative="1" as="geometry" />
134 |         </mxCell>
135 |         <mxCell id="54" value="&lt;font style=&quot;font-size: 12px;&quot;&gt;Embeeded Query&lt;/font&gt;" style="rounded=1;whiteSpace=wrap;html=1;fontSize=15;fontColor=#FFFFFF;" parent="1" vertex="1">
136 |           <mxGeometry x="360" y="395" width="120" height="60" as="geometry" />
137 |         </mxCell>
138 |         <mxCell id="61" value="Model Size :&lt;br&gt;4096 token" style="ellipse;whiteSpace=wrap;html=1;fontSize=12;fontColor=#FFFFFF;" parent="1" vertex="1">
139 |           <mxGeometry x="625" y="-10" width="120" height="80" as="geometry" />
140 |         </mxCell>
141 |         <mxCell id="63" value="3" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;fontSize=12;fontColor=#FFFFFF;" parent="1" vertex="1">
142 |           <mxGeometry x="-140" y="725" width="30" height="30" as="geometry" />
143 |         </mxCell>
144 |         <mxCell id="64" value="&lt;font color=&quot;#0000cc&quot; style=&quot;font-size: 15px;&quot;&gt;Evaulatiing using Ragsas&lt;/font&gt;" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=#2D7600;fillColor=#60a917;fontColor=#ffffff;" parent="1" vertex="1">
145 |           <mxGeometry x="-90" y="725" width="190" height="30" as="geometry" />
146 |         </mxCell>
147 |         <mxCell id="68" style="edgeStyle=none;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="65" target="67" edge="1">
148 |           <mxGeometry relative="1" as="geometry" />
149 |         </mxCell>
150 |         <mxCell id="65" value="Question&amp;nbsp;" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
151 |           <mxGeometry x="110" y="710" width="120" height="60" as="geometry" />
152 |         </mxCell>
153 |         <mxCell id="70" style="edgeStyle=none;html=1;" parent="1" source="67" edge="1">
154 |           <mxGeometry relative="1" as="geometry">
155 |             <mxPoint x="450" y="740" as="targetPoint" />
156 |           </mxGeometry>
157 |         </mxCell>
158 |         <mxCell id="67" value="Actual Answer" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
159 |           <mxGeometry x="280" y="710" width="120" height="60" as="geometry" />
160 |         </mxCell>
161 |         <mxCell id="72" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="69" target="71" edge="1">
162 |           <mxGeometry relative="1" as="geometry" />
163 |         </mxCell>
164 |         <mxCell id="69" value="&lt;span style=&quot;color: rgb(255, 255, 255);&quot;&gt;Get revelant Context&lt;/span&gt;" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
165 |           <mxGeometry x="460" y="710" width="120" height="60" as="geometry" />
166 |         </mxCell>
167 |         <mxCell id="71" value="&lt;span style=&quot;color: rgb(255, 255, 255);&quot;&gt;LLM Response&lt;/span&gt;" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
168 |           <mxGeometry x="620" y="710" width="120" height="60" as="geometry" />
169 |         </mxCell>
170 |         <mxCell id="76" value="Correntness" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
171 |           <mxGeometry x="620" y="790" width="120" height="60" as="geometry" />
172 |         </mxCell>
173 |       </root>
174 |     </mxGraphModel>
175 |   </diagram>
176 | </mxfile>
177 | 


--------------------------------------------------------------------------------
/doc/systemDesign.drawio:
--------------------------------------------------------------------------------
  1 | <mxfile host="65bd71144e">
  2 |     <diagram id="XtRHHLT9hMPPoSZRFdoH" name="Page-1">
  3 |         <mxGraphModel dx="1683" dy="751" grid="0" gridSize="10" guides="0" tooltips="1" connect="0" arrows="0" fold="1" page="0" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
  4 |             <root>
  5 |                 <mxCell id="0"/>
  6 |                 <mxCell id="1" parent="0"/>
  7 |                 <mxCell id="7" style="edgeStyle=none;html=1;exitX=1;exitY=0.25;exitDx=0;exitDy=0;entryX=0;entryY=0.25;entryDx=0;entryDy=0;" parent="1" source="2" target="3" edge="1">
  8 |                     <mxGeometry relative="1" as="geometry"/>
  9 |                 </mxCell>
 10 |                 <mxCell id="8" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=-0.02;entryY=0.393;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="2" target="5" edge="1">
 11 |                     <mxGeometry relative="1" as="geometry">
 12 |                         <mxPoint x="306" y="215" as="targetPoint"/>
 13 |                     </mxGeometry>
 14 |                 </mxCell>
 15 |                 <mxCell id="9" style="edgeStyle=none;html=1;exitX=1;exitY=0.75;exitDx=0;exitDy=0;entryX=-0.027;entryY=0.373;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="2" target="6" edge="1">
 16 |                     <mxGeometry relative="1" as="geometry"/>
 17 |                 </mxCell>
 18 |                 <mxCell id="2" value="Document" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
 19 |                     <mxGeometry x="46" y="175" width="120" height="60" as="geometry"/>
 20 |                 </mxCell>
 21 |                 <mxCell id="12" style="edgeStyle=none;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="3" target="44" edge="1">
 22 |                     <mxGeometry relative="1" as="geometry"/>
 23 |                 </mxCell>
 24 |                 <mxCell id="3" value="Chunk1&amp;nbsp;" style="shape=process;whiteSpace=wrap;html=1;backgroundOutline=1;" parent="1" vertex="1">
 25 |                     <mxGeometry x="266" y="95" width="120" height="60" as="geometry"/>
 26 |                 </mxCell>
 27 |                 <mxCell id="5" value="Chunk2" style="shape=process;whiteSpace=wrap;html=1;backgroundOutline=1;" parent="1" vertex="1">
 28 |                     <mxGeometry x="266" y="185" width="120" height="60" as="geometry"/>
 29 |                 </mxCell>
 30 |                 <mxCell id="51" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="6" target="50" edge="1">
 31 |                     <mxGeometry relative="1" as="geometry"/>
 32 |                 </mxCell>
 33 |                 <mxCell id="6" value="Chunk3" style="shape=process;whiteSpace=wrap;html=1;backgroundOutline=1;" parent="1" vertex="1">
 34 |                     <mxGeometry x="266" y="275" width="120" height="60" as="geometry"/>
 35 |                 </mxCell>
 36 |                 <mxCell id="60" style="edgeStyle=none;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;exitPerimeter=0;entryX=1;entryY=0;entryDx=0;entryDy=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="11" target="25" edge="1">
 37 |                     <mxGeometry relative="1" as="geometry"/>
 38 |                 </mxCell>
 39 |                 <mxCell id="11" value="Vector DB" style="shape=cylinder3;whiteSpace=wrap;html=1;boundedLbl=1;backgroundOutline=1;size=15;" parent="1" vertex="1">
 40 |                     <mxGeometry x="656" y="155" width="96.25" height="110" as="geometry"/>
 41 |                 </mxCell>
 42 |                 <mxCell id="17" style="edgeStyle=none;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="15" target="18" edge="1">
 43 |                     <mxGeometry relative="1" as="geometry">
 44 |                         <mxPoint x="200" y="425" as="targetPoint"/>
 45 |                     </mxGeometry>
 46 |                 </mxCell>
 47 |                 <mxCell id="55" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="18" edge="1">
 48 |                     <mxGeometry relative="1" as="geometry">
 49 |                         <mxPoint x="355" y="424.57142857142867" as="targetPoint"/>
 50 |                     </mxGeometry>
 51 |                 </mxCell>
 52 |                 <mxCell id="18" value="Query" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
 53 |                     <mxGeometry x="215" y="395" width="120" height="60" as="geometry"/>
 54 |                 </mxCell>
 55 |                 <mxCell id="19" value="" style="group" parent="1" vertex="1" connectable="0">
 56 |                     <mxGeometry x="100" y="395" width="50" height="90" as="geometry"/>
 57 |                 </mxCell>
 58 |                 <mxCell id="15" value="Actor" style="shape=umlActor;verticalLabelPosition=bottom;verticalAlign=top;html=1;outlineConnect=0;" parent="19" vertex="1">
 59 |                     <mxGeometry x="10" width="30" height="60" as="geometry"/>
 60 |                 </mxCell>
 61 |                 <mxCell id="16" value="User" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=#3700CC;fillColor=#6a00ff;fontColor=#ffffff;" parent="19" vertex="1">
 62 |                     <mxGeometry y="60" width="50" height="30" as="geometry"/>
 63 |                 </mxCell>
 64 |                 <mxCell id="22" value="&lt;font color=&quot;#0000cc&quot; style=&quot;font-size: 15px;&quot;&gt;PreProcess&lt;/font&gt;" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=#2D7600;fillColor=#60a917;fontColor=#ffffff;" parent="1" vertex="1">
 65 |                     <mxGeometry x="-59" y="190" width="100" height="30" as="geometry"/>
 66 |                 </mxCell>
 67 |                 <mxCell id="57" style="edgeStyle=none;html=1;entryX=1;entryY=0.5;entryDx=0;entryDy=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="23" target="33" edge="1">
 68 |                     <mxGeometry relative="1" as="geometry"/>
 69 |                 </mxCell>
 70 |                 <mxCell id="23" value="&lt;font color=&quot;#ffffff&quot;&gt;LLM&lt;/font&gt;" style="ellipse;shape=cloud;whiteSpace=wrap;html=1;fontSize=15;fontColor=#0000CC;labelBackgroundColor=default;" parent="1" vertex="1">
 71 |                     <mxGeometry x="682" y="508" width="120" height="80" as="geometry"/>
 72 |                 </mxCell>
 73 |                 <mxCell id="58" style="edgeStyle=none;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.496;entryY=0.074;entryDx=0;entryDy=0;entryPerimeter=0;fontSize=12;fontColor=#FFFFFF;" parent="1" target="23" edge="1">
 74 |                     <mxGeometry relative="1" as="geometry">
 75 |                         <mxPoint x="742" y="438" as="sourcePoint"/>
 76 |                     </mxGeometry>
 77 |                 </mxCell>
 78 |                 <mxCell id="24" value="&lt;font style=&quot;font-size: 12px;&quot;&gt;Query + context&lt;/font&gt;" style="rounded=1;whiteSpace=wrap;html=1;fontSize=15;fontColor=#FFFFFF;labelBackgroundColor=default;" parent="1" vertex="1">
 79 |                     <mxGeometry x="670" y="382.5" width="150" height="75" as="geometry"/>
 80 |                 </mxCell>
 81 |                 <mxCell id="29" style="edgeStyle=none;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="25" target="24" edge="1">
 82 |                     <mxGeometry relative="1" as="geometry"/>
 83 |                 </mxCell>
 84 |                 <mxCell id="59" style="edgeStyle=none;html=1;exitX=0.75;exitY=0;exitDx=0;exitDy=0;entryX=0.145;entryY=1;entryDx=0;entryDy=-4.35;entryPerimeter=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="25" target="11" edge="1">
 85 |                     <mxGeometry relative="1" as="geometry"/>
 86 |                 </mxCell>
 87 |                 <mxCell id="25" value="&lt;font style=&quot;font-size: 12px;&quot;&gt;Get revelant Context&lt;/font&gt;" style="rounded=1;whiteSpace=wrap;html=1;fontSize=15;fontColor=#FFFFFF;gradientColor=none;fillColor=default;labelBackgroundColor=default;" parent="1" vertex="1">
 88 |                     <mxGeometry x="505" y="392.5" width="120" height="60" as="geometry"/>
 89 |                 </mxCell>
 90 |                 <mxCell id="62" style="edgeStyle=none;html=1;entryX=0.463;entryY=1.062;entryDx=0;entryDy=0;entryPerimeter=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="33" edge="1">
 91 |                     <mxGeometry relative="1" as="geometry">
 92 |                         <Array as="points">
 93 |                             <mxPoint x="120" y="550.5"/>
 94 |                             <mxPoint x="120" y="530.5"/>
 95 |                         </Array>
 96 |                         <mxPoint x="120.14999999999986" y="467.3599999999999" as="targetPoint"/>
 97 |                     </mxGeometry>
 98 |                 </mxCell>
 99 |                 <mxCell id="33" value="Result" style="rounded=1;whiteSpace=wrap;html=1;fontSize=12;fontColor=#FFFFFF;labelBackgroundColor=default;" parent="1" vertex="1">
100 |                     <mxGeometry x="522" y="518" width="120" height="60" as="geometry"/>
101 |                 </mxCell>
102 |                 <mxCell id="34" value="1" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;fontSize=12;fontColor=#FFFFFF;" parent="1" vertex="1">
103 |                     <mxGeometry x="-79" y="190" width="30" height="30" as="geometry"/>
104 |                 </mxCell>
105 |                 <mxCell id="35" value="2" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;fontSize=12;fontColor=#FFFFFF;" parent="1" vertex="1">
106 |                     <mxGeometry x="-70" y="405" width="30" height="30" as="geometry"/>
107 |                 </mxCell>
108 |                 <mxCell id="36" value="&lt;font color=&quot;#0000cc&quot; style=&quot;font-size: 15px;&quot;&gt;Post Processing&lt;/font&gt;" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=#2D7600;fillColor=#60a917;fontColor=#ffffff;" parent="1" vertex="1">
109 |                     <mxGeometry x="-40" y="405" width="130" height="30" as="geometry"/>
110 |                 </mxCell>
111 |                 <mxCell id="45" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0;entryDx=0;entryDy=15;entryPerimeter=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="44" target="11" edge="1">
112 |                     <mxGeometry relative="1" as="geometry"/>
113 |                 </mxCell>
114 |                 <mxCell id="44" value="Embeeding chunk1" style="shape=process;whiteSpace=wrap;html=1;backgroundOutline=1;" parent="1" vertex="1">
115 |                     <mxGeometry x="421" y="95" width="120" height="60" as="geometry"/>
116 |                 </mxCell>
117 |                 <mxCell id="47" value="" style="edgeStyle=none;html=1;endArrow=none;" parent="1" source="5" target="46" edge="1">
118 |                     <mxGeometry relative="1" as="geometry">
119 |                         <mxPoint x="386" y="215.48387096774195" as="sourcePoint"/>
120 |                         <mxPoint x="636" y="217.5" as="targetPoint"/>
121 |                     </mxGeometry>
122 |                 </mxCell>
123 |                 <mxCell id="48" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0;entryDx=0;entryDy=52.5;entryPerimeter=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="46" target="11" edge="1">
124 |                     <mxGeometry relative="1" as="geometry"/>
125 |                 </mxCell>
126 |                 <mxCell id="46" value="Embeeding chunk2" style="shape=process;whiteSpace=wrap;html=1;backgroundOutline=1;" parent="1" vertex="1">
127 |                     <mxGeometry x="426" y="185" width="120" height="60" as="geometry"/>
128 |                 </mxCell>
129 |                 <mxCell id="52" style="edgeStyle=none;html=1;exitX=1;exitY=0.25;exitDx=0;exitDy=0;entryX=0;entryY=0;entryDx=0;entryDy=75;entryPerimeter=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="50" target="11" edge="1">
130 |                     <mxGeometry relative="1" as="geometry"/>
131 |                 </mxCell>
132 |                 <mxCell id="50" value="Embeeding chunk 3" style="shape=process;whiteSpace=wrap;html=1;backgroundOutline=1;" parent="1" vertex="1">
133 |                     <mxGeometry x="426" y="275" width="120" height="60" as="geometry"/>
134 |                 </mxCell>
135 |                 <mxCell id="56" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;fontSize=12;fontColor=#FFFFFF;" parent="1" source="54" target="25" edge="1">
136 |                     <mxGeometry relative="1" as="geometry"/>
137 |                 </mxCell>
138 |                 <mxCell id="54" value="&lt;font style=&quot;font-size: 12px;&quot;&gt;Embeeded Query&lt;/font&gt;" style="rounded=1;whiteSpace=wrap;html=1;fontSize=15;fontColor=#FFFFFF;labelBackgroundColor=default;" parent="1" vertex="1">
139 |                     <mxGeometry x="360" y="395" width="120" height="60" as="geometry"/>
140 |                 </mxCell>
141 |                 <mxCell id="61" value="&lt;font color=&quot;#e6e0e0&quot;&gt;Model Size : &lt;br&gt;4096 token&lt;/font&gt;" style="ellipse;whiteSpace=wrap;html=1;fontSize=12;fontColor=#FFFFFF;labelBackgroundColor=default;" parent="1" vertex="1">
142 |                     <mxGeometry x="626" y="35" width="120" height="80" as="geometry"/>
143 |                 </mxCell>
144 |                 <mxCell id="63" value="3" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;fontSize=12;fontColor=#FFFFFF;" parent="1" vertex="1">
145 |                     <mxGeometry x="-63" y="641" width="30" height="30" as="geometry"/>
146 |                 </mxCell>
147 |                 <mxCell id="64" value="&lt;font color=&quot;#0000cc&quot; style=&quot;font-size: 15px;&quot;&gt;Evaulatiing using Ragsas&lt;/font&gt;" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=#2D7600;fillColor=#60a917;fontColor=#ffffff;" parent="1" vertex="1">
148 |                     <mxGeometry x="-13" y="641" width="190" height="30" as="geometry"/>
149 |                 </mxCell>
150 |                 <mxCell id="68" style="edgeStyle=none;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="65" target="67" edge="1">
151 |                     <mxGeometry relative="1" as="geometry"/>
152 |                 </mxCell>
153 |                 <mxCell id="65" value="Question&amp;nbsp;" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
154 |                     <mxGeometry x="187" y="626" width="120" height="60" as="geometry"/>
155 |                 </mxCell>
156 |                 <mxCell id="70" style="edgeStyle=none;html=1;" parent="1" source="67" edge="1">
157 |                     <mxGeometry relative="1" as="geometry">
158 |                         <mxPoint x="527" y="656" as="targetPoint"/>
159 |                     </mxGeometry>
160 |                 </mxCell>
161 |                 <mxCell id="67" value="Actual Answer" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
162 |                     <mxGeometry x="357" y="626" width="120" height="60" as="geometry"/>
163 |                 </mxCell>
164 |                 <mxCell id="72" style="edgeStyle=none;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="69" target="71" edge="1">
165 |                     <mxGeometry relative="1" as="geometry"/>
166 |                 </mxCell>
167 |                 <mxCell id="69" value="&lt;span style=&quot;color: rgb(255, 255, 255);&quot;&gt;Get revelant Context&lt;/span&gt;" style="rounded=1;whiteSpace=wrap;html=1;labelBackgroundColor=default;" parent="1" vertex="1">
168 |                     <mxGeometry x="537" y="626" width="120" height="60" as="geometry"/>
169 |                 </mxCell>
170 |                 <mxCell id="71" value="&lt;span style=&quot;color: rgb(255, 255, 255);&quot;&gt;LLM Response&lt;/span&gt;" style="rounded=1;whiteSpace=wrap;html=1;labelBackgroundColor=default;" parent="1" vertex="1">
171 |                     <mxGeometry x="697" y="626" width="120" height="60" as="geometry"/>
172 |                 </mxCell>
173 |                 <mxCell id="76" value="Correntness" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
174 |                     <mxGeometry x="697" y="706" width="120" height="60" as="geometry"/>
175 |                 </mxCell>
176 |             </root>
177 |         </mxGraphModel>
178 |     </diagram>
179 | </mxfile>


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   qdrant:
 3 |     image: qdrant/qdrant:latest
 4 |     container_name: qdrantt
 5 |     ports:
 6 |       - "6333:6333"
 7 |       - "6334:6334"
 8 |     volumes:
 9 |       - ./qdrant_storage:/qdrant/storage
10 |     networks:
11 |       - app-network
12 | 
13 |   ollamaa:
14 |     image: ollama/ollama:latest
15 |     container_name: ollamaa
16 |     volumes:
17 |       - ollama:/root/.ollama
18 |     ports:
19 |       - "11434:11434"
20 |     networks:
21 |       - app-network
22 |     entrypoint: ["bash","run.sh"]
23 | 
24 |   bayesrag:
25 |     build: .
26 |     container_name: bayesrag_cont
27 |     stdin_open: true
28 |     tty: true
29 |     depends_on:
30 |       - qdrant
31 |       - ollamaa
32 |     environment:
33 |       - DATA_DIR=/data
34 |       - NODETYPE=admin
35 |       - QDRANT_HOST=http://qdrant:6333
36 |     volumes:
37 |       - ./data:/data
38 |     networks:
39 |       - app-network
40 |     command: ["python", "main.py", "--data-dir", "/data", "--nodetype", "admin"]
41 | 
42 | volumes:
43 |   ollama:
44 | 
45 | networks:
46 |   app-network:
47 |     driver: bridge
48 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import sys
  3 | from bayesrag.data_loader import load_directory_pdf
  4 | from bayesrag.text_splitter import split_texts
  5 | from bayesrag.vector_db import  VectorDB
  6 | from bayesrag.retriever import get_context
  7 | from bayesrag.generator import generate_response,classify_query
  8 | from bayesrag.utils import ClassificationResult
  9 | from bayesrag.evaluator import deepEvalutor
 10 | from bayesrag.config import QDRANT_HOST,QDRANT_COLLECTION,REPLAY_TOPIC
 11 | 
 12 | from bayesrag.mq import Mqttclient
 13 | from qdrant_client import QdrantClient
 14 | from bayesrag.utils import wait_for_commands
 15 | from loguru import logger
 16 | 
 17 | 
 18 | 
 19 | 
 20 | 
 21 | logger.debug(f"QDRANT_HOST: {QDRANT_HOST}", )
 22 | 
 23 | def query():
 24 |     user_query = input("Enter your query or type 'q' to quit: ")
 25 | 
 26 |     while user_query.lower() != "q":
 27 |         relevant_context = get_context(user_query)
 28 |         print("-" * 100)
 29 |         # TODO 
 30 |         # Query classification
 31 |         # result = classify_query(user_query)
 32 | 
 33 |         # if result == ClassificationResult.YES:
 34 |         #     relevant_context = get_context(user_query,collection_name=QDRANT_COLLECTION)
 35 | 
 36 |         #     print("-" * 100)
 37 |         # else:
 38 |         #     relevant_context = None
 39 |         response_text = ""
 40 |         for text in generate_response(user_query, relevant_context):
 41 |             print(text, end=" ")
 42 |             response_text += text
 43 |         # Prompt the user to decide if evaluation is needed
 44 |         is_evaluator = input("\nDo you need evaluation? (Y/N): ").strip().lower()
 45 | 
 46 |         if is_evaluator == 'y':
 47 |             evaluator: dict = deepEvalutor(user_query,response_text,[relevant_context])  # Assuming this function returns a dictionary with score and reason
 48 |             print(f"Score: {evaluator['score']}\nReason: {evaluator['reason']}")
 49 |         elif is_evaluator == 'n':
 50 |             print("Evaluation skipped.")
 51 |         else:
 52 |             print("Invalid input. Please enter 'Y' or 'N'.")
 53 |         user_query = input("\nEnter your query or type 'q' to quit: ")
 54 | 
 55 | def insertData(DATA_DIR,QDRANT_COLLECTION):
 56 |     # Load and split documents
 57 |     documents = load_directory_pdf(DATA_DIR)
 58 |     text_chunks = split_texts(documents)
 59 |     
 60 |     # Create vector database and upsert embeddings
 61 |     vectorDb=VectorDB(QDRANT_COLLECTION)
 62 |     vectorDb.create_db()
 63 |     vectorDb.upsert_embeddings(text_chunks)
 64 |     
 65 | 
 66 | def main():
 67 |     parser = argparse.ArgumentParser(description="Run the BayesRAG query system.")
 68 |     parser.add_argument('--data-dir', type=str, help="Directory containing the PDF documents.")
 69 |     parser.add_argument("--nodetype",type=str,help="Node Type")    
 70 |     args = parser.parse_args()
 71 | 
 72 | 
 73 |     if args.nodetype is not None and args.nodetype.lower() == "admin":
 74 |         client=Mqttclient(replyTopic=REPLAY_TOPIC,isAdmin=True)  
 75 |     else:
 76 |         client=Mqttclient(replyTopic=REPLAY_TOPIC,isAdmin=False)
 77 | 
 78 |     qclient = QdrantClient(url=QDRANT_HOST)
 79 |     if args.data_dir:
 80 |         insertData(args.data_dir,QDRANT_COLLECTION)
 81 |         logger.info("PreProcess completed successfully.")
 82 | 
 83 |     while True:
 84 |             command = wait_for_commands()
 85 |             if command == 'quit':
 86 |                 logger.warning('Quitting')
 87 |                 break
 88 | 
 89 |             elif command =='query':
 90 |                 query()
 91 | 
 92 |             elif command == 'send':
 93 |                 scroll_result=qclient.scroll(collection_name=QDRANT_COLLECTION,with_vectors=True) 
 94 |                 client.send_vector(scroll_result)
 95 |             elif command.startswith('insert '):
 96 |                 data_location = command.split(' ', 1)[1]
 97 |                 insertData(data_location,QDRANT_COLLECTION)
 98 |                 logger.info("insertData completed successfully.")
 99 | 
100 | 
101 |     client.stop()
102 |     logger.info("MQTT client stopped.")            
103 | 
104 | if __name__ == "__main__":
105 |     main()
106 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | langchain==0.2.15
 2 | paho-mqtt==2.1.0
 3 | openai==1.42.0
 4 | qdrant-client==1.11.1
 5 | loguru==0.7.2
 6 | python-dotenv==1.0.1
 7 | ragas
 8 | datasets==2.21.0
 9 | ollama==0.3.2
10 | ipfshttpclient==0.7.0
11 | pypdf==4.3.1
12 | deepeval


--------------------------------------------------------------------------------
/research/.deepeval-cache.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "test_cases_lookup_map": {
  3 |     "{\"actual_output\": \"We offer a 30-day full refund at no extra cost.\", \"context\": null, \"expected_output\": null, \"hyperparameters\": null, \"input\": \"What if these shoes don't fit?\", \"retrieval_context\": null}": {
  4 |       "cached_metrics_data": [
  5 |         {
  6 |           "metric_metadata": {
  7 |             "metric": "Answer Relevancy",
  8 |             "threshold": 0.7,
  9 |             "success": true,
 10 |             "score": 1.0,
 11 |             "reason": "The score is 1.00 because the response perfectly addressed the query without any irrelevant information.",
 12 |             "strictMode": false,
 13 |             "evaluationModel": "gpt-4",
 14 |             "evaluationCost": 0,
 15 |             "verboseLogs": "Statements:\n[\n    \"We offer a 30-day full refund at no extra cost.\"\n]\n\nVerdicts:\n[\n    {\n        \"verdict\": \"yes\",\n        \"reason\": null\n    }\n]"
 16 |           },
 17 |           "metric_configuration": {
 18 |             "threshold": 0.7,
 19 |             "evaluation_model": "gpt-4",
 20 |             "strict_mode": false,
 21 |             "include_reason": true
 22 |           }
 23 |         }
 24 |       ]
 25 |     },
 26 |     "{\"actual_output\": \"We offer a 30-day full refund at no extra cost.\", \"context\": null, \"expected_output\": \"We offer a 40-day full refund at no extra cost\", \"hyperparameters\": null, \"input\": \"What if these shoes don't fit?\", \"retrieval_context\": null}": {
 27 |       "cached_metrics_data": [
 28 |         {
 29 |           "metric_metadata": {
 30 |             "metric": "Answer Relevancy",
 31 |             "threshold": 0.7,
 32 |             "success": true,
 33 |             "score": 1.0,
 34 |             "reason": "The score is 1.00 because the response accurately addresses the concern about the fit of the shoes.",
 35 |             "strictMode": false,
 36 |             "evaluationModel": "gpt-4",
 37 |             "evaluationCost": 0,
 38 |             "verboseLogs": "Statements:\n[\n    \"We offer a 30-day full refund at no extra cost\"\n]\n\nVerdicts:\n[\n    {\n        \"verdict\": \"yes\",\n        \"reason\": null\n    }\n]"
 39 |           },
 40 |           "metric_configuration": {
 41 |             "threshold": 0.7,
 42 |             "evaluation_model": "gpt-4",
 43 |             "strict_mode": false,
 44 |             "include_reason": true
 45 |           }
 46 |         }
 47 |       ]
 48 |     },
 49 |     "{\"actual_output\": \"We offer a 30-day full refund at no extra cost.\", \"context\": [\"We offer a 40-day full refund at no extra cost\"], \"expected_output\": null, \"hyperparameters\": null, \"input\": \"What if these shoes don't fit?\", \"retrieval_context\": null}": {
 50 |       "cached_metrics_data": [
 51 |         {
 52 |           "metric_metadata": {
 53 |             "metric": "Answer Relevancy",
 54 |             "threshold": 0.7,
 55 |             "success": true,
 56 |             "score": 1.0,
 57 |             "reason": "The score is 1.00 because the response completely and accurately addressed the concern raised in the question.",
 58 |             "strictMode": false,
 59 |             "evaluationModel": "gpt-4",
 60 |             "evaluationCost": 0,
 61 |             "verboseLogs": "Statements:\n[\n    \"We offer a 30-day full refund at no extra cost.\"\n]\n\nVerdicts:\n[\n    {\n        \"verdict\": \"yes\",\n        \"reason\": null\n    }\n]"
 62 |           },
 63 |           "metric_configuration": {
 64 |             "threshold": 0.7,
 65 |             "evaluation_model": "gpt-4",
 66 |             "strict_mode": false,
 67 |             "include_reason": true
 68 |           }
 69 |         }
 70 |       ]
 71 |     },
 72 |     "{\"actual_output\": \"We offer a 30-day full refund at no extra cost.\", \"context\": null, \"expected_output\": null, \"hyperparameters\": null, \"input\": \"What if these shoes don't fit?\", \"retrieval_context\": [\"We offer a 40-day full refund at no extra cost\"]}": {
 73 |       "cached_metrics_data": [
 74 |         {
 75 |           "metric_metadata": {
 76 |             "metric": "Answer Relevancy",
 77 |             "threshold": 0.7,
 78 |             "success": false,
 79 |             "score": 0.0,
 80 |             "reason": "The score is 0.00 because the actual output provided statements that were completely unrelated to the input question, discussing shoe refunds instead of addressing the issue of poorly fitting shoes.",
 81 |             "strictMode": false,
 82 |             "evaluationModel": "lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF",
 83 |             "evaluationCost": 0,
 84 |             "verboseLogs": "Statements:\n[\n    \"We offer a 30-day full refund\",\n    \"at no extra cost\"\n]\n\nVerdicts:\n[\n    {\n        \"verdict\": \"no\",\n        \"reason\": \"The statements provided are about shoe refunds and are not related to the input question, which asks about what to do if shoes don't fit.\"\n    }\n]"
 85 |           },
 86 |           "metric_configuration": {
 87 |             "threshold": 0.7,
 88 |             "evaluation_model": "lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF",
 89 |             "strict_mode": false,
 90 |             "include_reason": true
 91 |           }
 92 |         }
 93 |       ]
 94 |     },
 95 |     "{\"actual_output\": \"We offer a 300-day full refund at no extra cost.\", \"context\": null, \"expected_output\": \"You are eligible for a 30 day full refund at no extra cost.\", \"hyperparameters\": null, \"input\": \"What if these shoes don't fit?\", \"retrieval_context\": [\"All customers are eligible for a 30 day full refund at no extra cost.\"]}": {
 96 |       "cached_metrics_data": [
 97 |         {
 98 |           "metric_metadata": {
 99 |             "metric": "Contextual Precision",
100 |             "threshold": 0.7,
101 |             "success": true,
102 |             "score": 1.0,
103 |             "reason": "The score is 1.00 because the only node in the retrieval context directly addresses the user's concern about shoes not fitting, hence its high relevance and top ranking.",
104 |             "strictMode": false,
105 |             "evaluationModel": "gpt-4",
106 |             "evaluationCost": 0,
107 |             "verboseLogs": "Verdicts:\n[\n    {\n        \"verdict\": \"yes\",\n        \"reason\": \"The text 'All customers are eligible for a 30 day full refund at no extra cost.' directly addresses the concern about shoes not fitting.\"\n    }\n]"
108 |           },
109 |           "metric_configuration": {
110 |             "threshold": 0.7,
111 |             "evaluation_model": "gpt-4",
112 |             "strict_mode": false,
113 |             "include_reason": true
114 |           }
115 |         }
116 |       ]
117 |     }
118 |   }
119 | }
120 | 


--------------------------------------------------------------------------------
/research/RAGAS.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "\n",
 11 |     "os.environ[\"OPENAI_API_KEY\"]=\"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\""
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": []
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {},
 25 |    "outputs": [
 26 |     {
 27 |      "data": {
 28 |       "application/vnd.jupyter.widget-view+json": {
 29 |        "model_id": "088582f0c86c42d28f180e08019fb5c6",
 30 |        "version_major": 2,
 31 |        "version_minor": 0
 32 |       },
 33 |       "text/plain": [
 34 |        "Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]"
 35 |       ]
 36 |      },
 37 |      "metadata": {},
 38 |      "output_type": "display_data"
 39 |     },
 40 |     {
 41 |      "data": {
 42 |       "text/html": [
 43 |        "<div>\n",
 44 |        "<style scoped>\n",
 45 |        "    .dataframe tbody tr th:only-of-type {\n",
 46 |        "        vertical-align: middle;\n",
 47 |        "    }\n",
 48 |        "\n",
 49 |        "    .dataframe tbody tr th {\n",
 50 |        "        vertical-align: top;\n",
 51 |        "    }\n",
 52 |        "\n",
 53 |        "    .dataframe thead th {\n",
 54 |        "        text-align: right;\n",
 55 |        "    }\n",
 56 |        "</style>\n",
 57 |        "<table border=\"1\" class=\"dataframe\">\n",
 58 |        "  <thead>\n",
 59 |        "    <tr style=\"text-align: right;\">\n",
 60 |        "      <th></th>\n",
 61 |        "      <th>question</th>\n",
 62 |        "      <th>answer</th>\n",
 63 |        "      <th>contexts</th>\n",
 64 |        "      <th>faithfulness</th>\n",
 65 |        "    </tr>\n",
 66 |        "  </thead>\n",
 67 |        "  <tbody>\n",
 68 |        "    <tr>\n",
 69 |        "      <th>0</th>\n",
 70 |        "      <td>When was the first super bowl?</td>\n",
 71 |        "      <td>The first superbowl was held on Jan 15, 1967</td>\n",
 72 |        "      <td>[The First AFL–NFL World Championship Game was...</td>\n",
 73 |        "      <td>0.0</td>\n",
 74 |        "    </tr>\n",
 75 |        "    <tr>\n",
 76 |        "      <th>1</th>\n",
 77 |        "      <td>Who won the most super bowls?</td>\n",
 78 |        "      <td>The most super bowls have been won by The New ...</td>\n",
 79 |        "      <td>[The Green Bay Packers...Green Bay, Wisconsin....</td>\n",
 80 |        "      <td>0.0</td>\n",
 81 |        "    </tr>\n",
 82 |        "  </tbody>\n",
 83 |        "</table>\n",
 84 |        "</div>"
 85 |       ],
 86 |       "text/plain": [
 87 |        "                         question  \\\n",
 88 |        "0  When was the first super bowl?   \n",
 89 |        "1   Who won the most super bowls?   \n",
 90 |        "\n",
 91 |        "                                              answer  \\\n",
 92 |        "0       The first superbowl was held on Jan 15, 1967   \n",
 93 |        "1  The most super bowls have been won by The New ...   \n",
 94 |        "\n",
 95 |        "                                            contexts  faithfulness  \n",
 96 |        "0  [The First AFL–NFL World Championship Game was...           0.0  \n",
 97 |        "1  [The Green Bay Packers...Green Bay, Wisconsin....           0.0  "
 98 |       ]
 99 |      },
100 |      "execution_count": 2,
101 |      "metadata": {},
102 |      "output_type": "execute_result"
103 |     }
104 |    ],
105 |    "source": [
106 |     "from datasets import Dataset \n",
107 |     "from ragas.metrics import faithfulness\n",
108 |     "from ragas import evaluate\n",
109 |     "\n",
110 |     "data_samples = {\n",
111 |     "    'question': ['When was the first super bowl?', 'Who won the most super bowls?'],\n",
112 |     "    'answer': ['The first superbowl was held on Jan 15, 1967', 'The most super bowls have been won by The New England Patriots'],\n",
113 |     "    'contexts' : [['The First AFL–NFL World Championship Game was an American football game played on January 15, 1967, at the Los Angeles Memorial Coliseum in Los Angeles,'], \n",
114 |     "    ['The Green Bay Packers...Green Bay, Wisconsin.','The Packers compete...Football Conference']],\n",
115 |     "}\n",
116 |     "dataset = Dataset.from_dict(data_samples)\n",
117 |     "score = evaluate(dataset,metrics=[faithfulness])\n",
118 |     "score.to_pandas()"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 3,
124 |    "metadata": {},
125 |    "outputs": [
126 |     {
127 |      "data": {
128 |       "application/vnd.jupyter.widget-view+json": {
129 |        "model_id": "aac45b3caa9f4031b7253e47a5eaba12",
130 |        "version_major": 2,
131 |        "version_minor": 0
132 |       },
133 |       "text/plain": [
134 |        "Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]"
135 |       ]
136 |      },
137 |      "metadata": {},
138 |      "output_type": "display_data"
139 |     },
140 |     {
141 |      "data": {
142 |       "text/html": [
143 |        "<div>\n",
144 |        "<style scoped>\n",
145 |        "    .dataframe tbody tr th:only-of-type {\n",
146 |        "        vertical-align: middle;\n",
147 |        "    }\n",
148 |        "\n",
149 |        "    .dataframe tbody tr th {\n",
150 |        "        vertical-align: top;\n",
151 |        "    }\n",
152 |        "\n",
153 |        "    .dataframe thead th {\n",
154 |        "        text-align: right;\n",
155 |        "    }\n",
156 |        "</style>\n",
157 |        "<table border=\"1\" class=\"dataframe\">\n",
158 |        "  <thead>\n",
159 |        "    <tr style=\"text-align: right;\">\n",
160 |        "      <th></th>\n",
161 |        "      <th>question</th>\n",
162 |        "      <th>answer</th>\n",
163 |        "      <th>contexts</th>\n",
164 |        "      <th>answer_relevancy</th>\n",
165 |        "    </tr>\n",
166 |        "  </thead>\n",
167 |        "  <tbody>\n",
168 |        "    <tr>\n",
169 |        "      <th>0</th>\n",
170 |        "      <td>When was the first super bowl?</td>\n",
171 |        "      <td>The first superbowl was held on Jan 15, 1967</td>\n",
172 |        "      <td>[The First AFL–NFL World Championship Game was...</td>\n",
173 |        "      <td>0.975320</td>\n",
174 |        "    </tr>\n",
175 |        "    <tr>\n",
176 |        "      <th>1</th>\n",
177 |        "      <td>Who won the most super bowls?</td>\n",
178 |        "      <td>The most super bowls have been won by The New ...</td>\n",
179 |        "      <td>[The Green Bay Packers...Green Bay, Wisconsin....</td>\n",
180 |        "      <td>0.943043</td>\n",
181 |        "    </tr>\n",
182 |        "  </tbody>\n",
183 |        "</table>\n",
184 |        "</div>"
185 |       ],
186 |       "text/plain": [
187 |        "                         question  \\\n",
188 |        "0  When was the first super bowl?   \n",
189 |        "1   Who won the most super bowls?   \n",
190 |        "\n",
191 |        "                                              answer  \\\n",
192 |        "0       The first superbowl was held on Jan 15, 1967   \n",
193 |        "1  The most super bowls have been won by The New ...   \n",
194 |        "\n",
195 |        "                                            contexts  answer_relevancy  \n",
196 |        "0  [The First AFL–NFL World Championship Game was...          0.975320  \n",
197 |        "1  [The Green Bay Packers...Green Bay, Wisconsin....          0.943043  "
198 |       ]
199 |      },
200 |      "execution_count": 3,
201 |      "metadata": {},
202 |      "output_type": "execute_result"
203 |     }
204 |    ],
205 |    "source": [
206 |     "from datasets import Dataset \n",
207 |     "from ragas.metrics import answer_relevancy\n",
208 |     "from ragas import evaluate\n",
209 |     "\n",
210 |     "data_samples = {\n",
211 |     "    'question': ['When was the first super bowl?', 'Who won the most super bowls?'],\n",
212 |     "    'answer': ['The first superbowl was held on Jan 15, 1967', 'The most super bowls have been won by The New England Patriots'],\n",
213 |     "    'contexts' : [['The First AFL–NFL World Championship Game was an American football game played on January 15, 1967, at the Los Angeles Memorial Coliseum in Los Angeles,'], \n",
214 |     "    ['The Green Bay Packers...Green Bay, Wisconsin.','The Packers compete...Football Conference']],\n",
215 |     "}\n",
216 |     "dataset = Dataset.from_dict(data_samples)\n",
217 |     "score = evaluate(dataset,metrics=[answer_relevancy])\n",
218 |     "score.to_pandas()"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": null,
224 |    "metadata": {},
225 |    "outputs": [],
226 |    "source": []
227 |   }
228 |  ],
229 |  "metadata": {
230 |   "kernelspec": {
231 |    "display_name": "env",
232 |    "language": "python",
233 |    "name": "python3"
234 |   },
235 |   "language_info": {
236 |    "codemirror_mode": {
237 |     "name": "ipython",
238 |     "version": 3
239 |    },
240 |    "file_extension": ".py",
241 |    "mimetype": "text/x-python",
242 |    "name": "python",
243 |    "nbconvert_exporter": "python",
244 |    "pygments_lexer": "ipython3",
245 |    "version": "3.12.2"
246 |   }
247 |  },
248 |  "nbformat": 4,
249 |  "nbformat_minor": 2
250 | }
251 | 


--------------------------------------------------------------------------------
/research/communication/kafka/consumer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from kafka import KafkaConsumer
 3 | 
 4 | KAFKA_BOOTSTRAP_SERVERS = os.environ.get("KAFKA_BOOTSTRAP_SERVERS", "localhost:29092")
 5 | KAFKA_TOPIC_TEST = os.environ.get("KAFKA_TOPIC_TEST", "test")
 6 | KAFKA_API_VERSION = os.environ.get("KAFKA_API_VERSION", "7.3.1")
 7 | 
 8 | consumer = KafkaConsumer(
 9 |     KAFKA_TOPIC_TEST,
10 |     bootstrap_servers=[KAFKA_BOOTSTRAP_SERVERS],
11 |     api_version=KAFKA_API_VERSION,
12 |     auto_offset_reset="earliest",
13 |     enable_auto_commit=True,
14 | )
15 | 
16 | for message in consumer:
17 |     print(message.value.decode("utf-8"))
18 | 


--------------------------------------------------------------------------------
/research/communication/kafka/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.7'
 2 | services:
 3 |   zookeeper:
 4 |     image: confluentinc/cp-zookeeper:latest
 5 |     container_name: zookeeper
 6 |     environment:
 7 |       ZOOKEEPER_CLIENT_PORT: 2181
 8 |       ZOOKEEPER_TICK_TIME: 2000
 9 |     ports:
10 |       - 22181:2181
11 |     restart: on-failure
12 |   kafka:
13 |     image: confluentinc/cp-kafka:latest
14 |     container_name: kafka
15 |     depends_on:
16 |       - zookeeper
17 |     ports:
18 |       - 29092:29092
19 |     environment:
20 |       KAFKA_BROKER_ID: 1
21 |       KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
22 |       KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
23 |       KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
24 |       KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
25 |       KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
26 |     restart: on-failure
27 | 


--------------------------------------------------------------------------------
/research/communication/kafka/producer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | import random
 4 | import json
 5 | from kafka import KafkaProducer
 6 | 
 7 | KAFKA_BOOTSTRAP_SERVERS = os.environ.get("KAFKA_BOOTSTRAP_SERVERS", "localhost:29092")
 8 | KAFKA_TOPIC_TEST = os.environ.get("KAFKA_TOPIC_TEST", "test")
 9 | KAFKA_API_VERSION = os.environ.get("KAFKA_API_VERSION", "7.3.1")
10 | 
11 | producer = KafkaProducer(
12 |     bootstrap_servers=[KAFKA_BOOTSTRAP_SERVERS],
13 |     api_version=KAFKA_API_VERSION,
14 | )
15 | i = 0
16 | while i <= 30:
17 |     producer.send(
18 |         KAFKA_TOPIC_TEST,
19 |         json.dumps({"message": f"Hello, Kafka! - test {i}"}).encode("utf-8"),
20 |     )
21 |     i += 1
22 |     time.sleep(random.randint(1, 5))
23 | producer.flush()
24 | 


--------------------------------------------------------------------------------
/research/communication/mqtt/receive.py:
--------------------------------------------------------------------------------
 1 | import pika
 2 | import threading
 3 | import time
 4 | 
 5 | # Function to send messages
 6 | def send_message():
 7 |     while True:
 8 |         time.sleep(10)  # Wait for 10 seconds
 9 |         connection = pika.BlockingConnection(pika.ConnectionParameters('localhost'))
10 |         channel = connection.channel()
11 |         channel.queue_declare(queue='hello')
12 |         message = 'Hello World!'
13 |         channel.basic_publish(exchange='', routing_key='hello', body=message)
14 |         print(f" [x] Sent '{message}'")
15 |         connection.close()
16 | 
17 | # Function to receive messages
18 | def receive_message():
19 |     connection = pika.BlockingConnection(pika.ConnectionParameters('localhost'))
20 |     channel = connection.channel()
21 |     channel.queue_declare(queue='hello')
22 | 
23 |     def callback(ch, method, properties, body):
24 |         print(f" [x] Received {body}")
25 | 
26 |     channel.basic_consume(queue='hello', on_message_callback=callback, auto_ack=True)
27 |     print(' [*] Waiting for messages. To exit press CTRL+C')
28 |     channel.start_consuming()
29 | 
30 | # Start threads for sending and receiving messages
31 | send_thread = threading.Thread(target=send_message)
32 | receive_thread = threading.Thread(target=receive_message)
33 | 
34 | send_thread.start()
35 | receive_thread.start()
36 | 
37 | # Keep the main thread running, otherwise signals are ignored.
38 | send_thread.join()
39 | receive_thread.join()
40 | 


--------------------------------------------------------------------------------
/research/communication/mqtt/send.py:
--------------------------------------------------------------------------------
 1 | import pika
 2 | import threading
 3 | import time
 4 | 
 5 | # Function to send messages
 6 | def send_message():
 7 |     while True:
 8 |         time.sleep(10)  # Wait for 10 seconds
 9 |         connection = pika.BlockingConnection(pika.ConnectionParameters('localhost'))
10 |         channel = connection.channel()
11 |         channel.queue_declare(queue='hello')
12 |         message = 'Hello World!'
13 |         channel.basic_publish(exchange='', routing_key='hello', body=message)
14 |         print(f" [x] Sent '{message}'")
15 |         connection.close()
16 | 
17 | # Function to receive messages
18 | def receive_message():
19 |     connection = pika.BlockingConnection(pika.ConnectionParameters('localhost'))
20 |     channel = connection.channel()
21 |     channel.queue_declare(queue='hello')
22 | 
23 |     def callback(ch, method, properties, body):
24 |         print(f" [x] Received {body}")
25 | 
26 |     channel.basic_consume(queue='hello', on_message_callback=callback, auto_ack=True)
27 |     print(' [*] Waiting for messages. To exit press CTRL+C')
28 |     channel.start_consuming()
29 | 
30 | # Start threads for sending and receiving messages
31 | send_thread = threading.Thread(target=send_message)
32 | receive_thread = threading.Thread(target=receive_message)
33 | 
34 | send_thread.start()
35 | receive_thread.start()
36 | 
37 | # Keep the main thread running, otherwise signals are ignored.
38 | send_thread.join()
39 | receive_thread.join()
40 | 


--------------------------------------------------------------------------------
/research/groq.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [
 8 |     {
 9 |      "name": "stdout",
10 |      "output_type": "stream",
11 |      "text": [
12 |       "Fast language models, also known as fasttext or fasttext-based language models, have gained significant attention in recent years due to their ability to process and analyze large amounts of text data quickly and efficiently. Here are some reasons why fast language models are important:\n",
13 |       "\n",
14 |       "1. **Speed and Scalability**: Fast language models can process text data at incredible speeds, often orders of magnitude faster than traditional language models. This is particularly important for large-scale NLP applications that require real-time processing, such as chatbots, virtual assistants, and social media analytics.\n",
15 |       "2. **Large-scale Text Analysis**: Fast language models can analyze massive amounts of text data quickly, making them ideal for applications that involve large-scale text processing, such as:\n",
16 |       "\t* Sentiment analysis on social media platforms\n",
17 |       "\t* Topic modeling on vast datasets\n",
18 |       "\t* Named entity recognition on large documents\n",
19 |       "3. **Real-time Processing**: Fast language models can process text data in real-time, enabling applications that require immediate results, such as:\n",
20 |       "\t* Sentiment analysis for customer service chatbots\n",
21 |       "\t* Automatic language translation for real-time conversations\n",
22 |       "\t* Text summarization for news articles\n",
23 |       "4. **Cloud and Edge Computing**: Fast language models are well-suited for cloud and edge computing applications, as they can be deployed on distributed architectures and process data in a decentralized manner. This enables secure, efficient, and scalable NLP processing.\n",
24 |       "5. **Improved Accuracy**: Fast language models can achieve similar or even better accuracy compared to traditional language models, despite their speed advantages. This is due to their ability to leverage advanced techniques, such as:\n",
25 |       "\t* Subword modeling\n",
26 |       "\t* Character-level modeling\n",
27 |       "\t* Attention mechanisms\n",
28 |       "6. **Low Resource Requirements**: Fast language models often require less computational resources and memory compared to traditional language models, making them suitable for resource-constrained devices, such as:\n",
29 |       "\t* Mobile devices\n",
30 |       "\t* IoT devices\n",
31 |       "\t* Edge devices\n",
32 |       "7. **Domain Adaptation**: Fast language models can adapt to new domains and datasets quickly, enabling applications that require context-specific language understanding, such as:\n",
33 |       "\t* Product recommendation systems\n",
34 |       "\t* Medical diagnosis assistance\n",
35 |       "\t* Financial report analysis\n",
36 |       "8. **Explainability and Interpretability**: Fast language models often provide better explainability and interpretability due to their ability to generate feature importance scores, attention weights, and other transparency-enhancing techniques.\n",
37 |       "\n",
38 |       "In summary, fast language models offer a unique combination of speed, scalability, accuracy, and resource efficiency, making them an indispensable tool for various NLP applications. As the demand for fast and efficient language processing continues to grow, fast language models will play a crucial role in driving innovation and improving the performance of various NLP-based applications.\n"
39 |      ]
40 |     }
41 |    ],
42 |    "source": [
43 |     "import os\n",
44 |     "\n",
45 |     "from groq import Groq\n",
46 |     "\n",
47 |     "client = Groq(\n",
48 |     "    # This is the default and can be omitted\n",
49 |     "    api_key=os.environ.get(\"GROQ_API_KEY\",\"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\"),\n",
50 |     ")\n",
51 |     "\n",
52 |     "chat_completion = client.chat.completions.create(\n",
53 |     "    messages=[\n",
54 |     "        {\n",
55 |     "            \"role\": \"system\",\n",
56 |     "            \"content\": \"you are a helpful assistant.\"\n",
57 |     "        },\n",
58 |     "        {\n",
59 |     "            \"role\": \"user\",\n",
60 |     "            \"content\": \"Explain the importance of fast language models\",\n",
61 |     "        }\n",
62 |     "    ],\n",
63 |     "    model=\"llama3-8b-8192\",\n",
64 |     ")\n",
65 |     "\n",
66 |     "print(chat_completion.choices[0].message.content)"
67 |    ]
68 |   }
69 |  ],
70 |  "metadata": {
71 |   "kernelspec": {
72 |    "display_name": "env",
73 |    "language": "python",
74 |    "name": "python3"
75 |   },
76 |   "language_info": {
77 |    "codemirror_mode": {
78 |     "name": "ipython",
79 |     "version": 3
80 |    },
81 |    "file_extension": ".py",
82 |    "mimetype": "text/x-python",
83 |    "name": "python",
84 |    "nbconvert_exporter": "python",
85 |    "pygments_lexer": "ipython3",
86 |    "version": "3.12.2"
87 |   }
88 |  },
89 |  "nbformat": 4,
90 |  "nbformat_minor": 2
91 | }
92 | 


--------------------------------------------------------------------------------
/research/ollama.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 28,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# from litellm import completion\n",
 10 |     "\n",
 11 |     "# response = completion(\n",
 12 |     "#     model='llama3:8b', \n",
 13 |     "#     messages=[{ \"content\": \"respond in 20 words. who are you?\",\"role\": \"user\"}], \n",
 14 |     "#     api_base=\"http://localhost:11434\"\n",
 15 |     "# )\n",
 16 |     "# print(response)\n"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 29,
 22 |    "metadata": {},
 23 |    "outputs": [
 24 |     {
 25 |      "name": "stdout",
 26 |      "output_type": "stream",
 27 |      "text": [
 28 |       "{\"results\": \"no\"}\n"
 29 |      ]
 30 |     },
 31 |     {
 32 |      "data": {
 33 |       "text/plain": [
 34 |        "{'results': 'no'}"
 35 |       ]
 36 |      },
 37 |      "execution_count": 29,
 38 |      "metadata": {},
 39 |      "output_type": "execute_result"
 40 |     }
 41 |    ],
 42 |    "source": [
 43 |     "import ollama\n",
 44 |     "\n",
 45 |     "user_query = \"hey\"\n",
 46 |     "system_prompt = \"\"\"\n",
 47 |     "You are a Lawyer. Classify the following question related to Law and always give a response in JSON format as {\"results\": \"yes/no\"} without giving any reason in the response.\n",
 48 |     "\"\"\"\n",
 49 |     "\n",
 50 |     "response = ollama.chat(\n",
 51 |     "    model='llama3:8b', \n",
 52 |     "    messages=[\n",
 53 |     "        {\"role\": \"system\", \"content\": system_prompt},\n",
 54 |     "        {\"role\": \"user\", \"content\": user_query}\n",
 55 |     "    ],\n",
 56 |     "    format=\"json\",\n",
 57 |     ")\n",
 58 |     "response_json=response['message']['content']\n",
 59 |     "\n",
 60 |     "print(response_json)\n",
 61 |     "import json\n",
 62 |     "json.loads(response_json)\n"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 5,
 68 |    "metadata": {},
 69 |    "outputs": [
 70 |     {
 71 |      "name": "stdout",
 72 |      "output_type": "stream",
 73 |      "text": [
 74 |       "The sky appears blue because of a phenomenon called Rayleigh scattering, named after the British physicist Lord Rayleigh. Here's what happens:\n",
 75 |       "\n",
 76 |       "1. **Sunlight enters Earth's atmosphere**: When sunlight enters our atmosphere, it contains all the colors of the visible spectrum, including red, orange, yellow, green, blue, indigo, and violet.\n",
 77 |       "2. **Light interacts with tiny molecules**: The light then encounters tiny molecules of gases like nitrogen (N2) and oxygen (O2). These molecules are much smaller than the wavelength of light.\n",
 78 |       "3. **Rayleigh scattering occurs**: When the light interacts with these small molecules, it scatters in all directions. This scattering is more pronounced for shorter wavelengths (like blue and violet) than longer wavelengths (like red and orange).\n",
 79 |       "4. **Blue light is scattered more**: The shorter wavelengths of blue light are scattered more than the longer wavelengths of red light. This is because the smaller molecules can scatter the shorter wavelengths more efficiently.\n",
 80 |       "5. **Our eyes perceive the scattered light**: As we look at the sky, our eyes see the scattered blue light as a blue color. This is why the sky typically appears blue during the daytime when the sun is overhead.\n",
 81 |       "\n",
 82 |       "Here are some interesting facts to note:\n",
 83 |       "\n",
 84 |       "* The exact shade of blue can vary depending on atmospheric conditions, like pollution levels or dust particles in the air.\n",
 85 |       "* During sunrise and sunset, the sky can take on hues of red and orange because the light has to travel longer distances through the atmosphere, scattering more blue light along the way.\n",
 86 |       "* The same Rayleigh scattering effect is responsible for the blue color of many natural bodies of water, like oceans and lakes.\n",
 87 |       "\n",
 88 |       "Now, go ahead and gaze up at that beautiful blue sky – it's all about the science!"
 89 |      ]
 90 |     }
 91 |    ],
 92 |    "source": [
 93 |     "import ollama\n",
 94 |     "response = ollama.chat(model='llama3:8b', messages=[\n",
 95 |     "  {\n",
 96 |     "    'role': 'user',\n",
 97 |     "    'content': 'Why is the sky blue?',\n",
 98 |     "  },\n",
 99 |     "  \n",
100 |     "],stream=True)\n",
101 |     "\n",
102 |     "for chunk in response:\n",
103 |     "  print(chunk['message']['content'], end='', flush=True)"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 7,
109 |    "metadata": {},
110 |    "outputs": [
111 |     {
112 |      "name": "stdout",
113 |      "output_type": "stream",
114 |      "text": [
115 |       "The sky appears blue because of a phenomenon called Rayleigh scattering, named after the British physicist Lord Rayleigh, who first described it in the late 19th century.\n",
116 |       "\n",
117 |       "Here's what happens:\n",
118 |       "\n",
119 |       "1. **Sunlight enters Earth's atmosphere**: When sunlight enters our atmosphere, it contains all the colors of the visible spectrum (red, orange, yellow, green, blue, indigo, and violet).\n",
120 |       "2. **Tiny molecules scatter light**: The tiny molecules of gases like nitrogen (N2) and oxygen (O2), as well as aerosols like water vapor, carbon dioxide, and pollutants, are present in the atmosphere. These molecules scatter the shorter, blue wavelengths of light more than the longer, red wavelengths.\n",
121 |       "3. **Blue light is scattered in all directions**: As a result of this scattering, the blue light is dispersed throughout the atmosphere, reaching our eyes from all directions.\n",
122 |       "4. **Our eyes perceive the blue color**: Since we see the blue light from all angles, it appears to us as a uniform blue color, which is why the sky looks blue.\n",
123 |       "\n",
124 |       "It's important to note that:\n",
125 |       "\n",
126 |       "* The exact shade of blue can vary depending on atmospheric conditions, such as pollution levels, dust, and water vapor.\n",
127 |       "* During sunrise and sunset, when the sun's rays travel longer distances through the atmosphere, they scatter more red light, giving the sky its characteristic orange or reddish hue.\n",
128 |       "* At higher altitudes, where there is less atmospheric scattering, the sky can appear darker blue or even black.\n",
129 |       "\n",
130 |       "Now, go outside and appreciate that beautiful blue sky!"
131 |      ]
132 |     }
133 |    ],
134 |    "source": [
135 |     "import ollama\n",
136 |     "response = ollama.chat(model='llama3:8b', messages=[\n",
137 |     "  {\n",
138 |     "    \"role\": \"system\", \"content\": \"you are bot\",\n",
139 |     "    'role': 'user',\n",
140 |     "    'content': 'Why is the sky blue?',\n",
141 |     "  },\n",
142 |     "  \n",
143 |     "],stream=True)\n",
144 |     "\n",
145 |     "for chunk in response:\n",
146 |     "  print(chunk['message']['content'], end='', flush=True)"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 6,
152 |    "metadata": {},
153 |    "outputs": [
154 |     {
155 |      "data": {
156 |       "text/plain": [
157 |        "{'embedding': [0.5969330072402954,\n",
158 |        "  0.40240398049354553,\n",
159 |        "  -3.301016330718994,\n",
160 |        "  -0.5291575193405151,\n",
161 |        "  0.7507085204124451,\n",
162 |        "  1.5169041156768799,\n",
163 |        "  -0.1273595243692398,\n",
164 |        "  0.39703452587127686,\n",
165 |        "  0.0744360014796257,\n",
166 |        "  -1.1086369752883911,\n",
167 |        "  0.689741849899292,\n",
168 |        "  1.2773089408874512,\n",
169 |        "  1.15134859085083,\n",
170 |        "  1.0894445180892944,\n",
171 |        "  0.25243091583251953,\n",
172 |        "  0.2925049066543579,\n",
173 |        "  0.147650808095932,\n",
174 |        "  -0.6373675465583801,\n",
175 |        "  -0.2042306363582611,\n",
176 |        "  -0.19849230349063873,\n",
177 |        "  -1.7923771142959595,\n",
178 |        "  -0.6290276646614075,\n",
179 |        "  0.039211440831422806,\n",
180 |        "  -0.6715478897094727,\n",
181 |        "  1.2626323699951172,\n",
182 |        "  1.2766432762145996,\n",
183 |        "  -0.1659788340330124,\n",
184 |        "  -0.001597180962562561,\n",
185 |        "  -0.2954447865486145,\n",
186 |        "  -0.481109082698822,\n",
187 |        "  1.2064409255981445,\n",
188 |        "  -0.631598711013794,\n",
189 |        "  -0.5379438996315002,\n",
190 |        "  -1.0367927551269531,\n",
191 |        "  0.6270114183425903,\n",
192 |        "  -1.206524133682251,\n",
193 |        "  0.6856288313865662,\n",
194 |        "  -0.05789913982152939,\n",
195 |        "  -0.19606412947177887,\n",
196 |        "  0.12672023475170135,\n",
197 |        "  -0.014926865696907043,\n",
198 |        "  -0.5510568618774414,\n",
199 |        "  0.35484158992767334,\n",
200 |        "  0.04613126814365387,\n",
201 |        "  0.5984001755714417,\n",
202 |        "  -0.9549779891967773,\n",
203 |        "  0.5055718421936035,\n",
204 |        "  1.5717413425445557,\n",
205 |        "  -0.7108980417251587,\n",
206 |        "  -0.38335850834846497,\n",
207 |        "  -0.6696508526802063,\n",
208 |        "  1.1474546194076538,\n",
209 |        "  -0.1113024652004242,\n",
210 |        "  -1.9442750215530396,\n",
211 |        "  0.47001031041145325,\n",
212 |        "  1.4351974725723267,\n",
213 |        "  0.5088778138160706,\n",
214 |        "  -0.3432624042034149,\n",
215 |        "  0.5005795955657959,\n",
216 |        "  0.07836741209030151,\n",
217 |        "  1.3461354970932007,\n",
218 |        "  1.7958828210830688,\n",
219 |        "  0.09640610218048096,\n",
220 |        "  1.1035782098770142,\n",
221 |        "  1.2969672679901123,\n",
222 |        "  -0.9251880645751953,\n",
223 |        "  -1.1434496641159058,\n",
224 |        "  -0.18965046107769012,\n",
225 |        "  0.195990651845932,\n",
226 |        "  -0.3228439688682556,\n",
227 |        "  1.473804235458374,\n",
228 |        "  -1.263108730316162,\n",
229 |        "  0.06456103175878525,\n",
230 |        "  0.8007808923721313,\n",
231 |        "  -0.5991656184196472,\n",
232 |        "  -1.1128748655319214,\n",
233 |        "  -1.3138744831085205,\n",
234 |        "  0.41518834233283997,\n",
235 |        "  -0.10650435090065002,\n",
236 |        "  1.0692203044891357,\n",
237 |        "  0.3282550573348999,\n",
238 |        "  0.43931347131729126,\n",
239 |        "  0.4604528844356537,\n",
240 |        "  0.2229543924331665,\n",
241 |        "  1.2037339210510254,\n",
242 |        "  0.6083930134773254,\n",
243 |        "  0.747463583946228,\n",
244 |        "  -0.35381266474723816,\n",
245 |        "  -0.3033270537853241,\n",
246 |        "  -0.134285569190979,\n",
247 |        "  0.16263678669929504,\n",
248 |        "  -0.5791411399841309,\n",
249 |        "  1.4177135229110718,\n",
250 |        "  0.19834254682064056,\n",
251 |        "  -0.4499247968196869,\n",
252 |        "  -0.1226959079504013,\n",
253 |        "  -0.6061049103736877,\n",
254 |        "  1.4596483707427979,\n",
255 |        "  -1.347644329071045,\n",
256 |        "  -0.6413447260856628,\n",
257 |        "  -1.183154821395874,\n",
258 |        "  -0.5076131224632263,\n",
259 |        "  -0.910965621471405,\n",
260 |        "  0.7226020693778992,\n",
261 |        "  1.5040684938430786,\n",
262 |        "  0.5279815793037415,\n",
263 |        "  -0.2285068929195404,\n",
264 |        "  -0.18390727043151855,\n",
265 |        "  -0.6069064140319824,\n",
266 |        "  -0.948840320110321,\n",
267 |        "  0.19214798510074615,\n",
268 |        "  0.6962512731552124,\n",
269 |        "  -0.08552020788192749,\n",
270 |        "  0.04608689248561859,\n",
271 |        "  -0.2115609347820282,\n",
272 |        "  -1.4146071672439575,\n",
273 |        "  0.09601227939128876,\n",
274 |        "  -0.8767741918563843,\n",
275 |        "  0.2451339066028595,\n",
276 |        "  1.9933544397354126,\n",
277 |        "  0.8806570768356323,\n",
278 |        "  -0.07517236471176147,\n",
279 |        "  0.08457306772470474,\n",
280 |        "  -0.35129988193511963,\n",
281 |        "  -0.21206848323345184,\n",
282 |        "  1.0229253768920898,\n",
283 |        "  -0.6047888994216919,\n",
284 |        "  0.008354417979717255,\n",
285 |        "  0.26811909675598145,\n",
286 |        "  -1.03435218334198,\n",
287 |        "  -0.7351409792900085,\n",
288 |        "  -0.7662344574928284,\n",
289 |        "  -0.09213773161172867,\n",
290 |        "  0.27880945801734924,\n",
291 |        "  0.10574719309806824,\n",
292 |        "  0.4355807900428772,\n",
293 |        "  -0.19979585707187653,\n",
294 |        "  -0.8391436338424683,\n",
295 |        "  0.13358736038208008,\n",
296 |        "  0.41741684079170227,\n",
297 |        "  0.8142489194869995,\n",
298 |        "  0.452465295791626,\n",
299 |        "  0.9815285205841064,\n",
300 |        "  -0.6698671579360962,\n",
301 |        "  -0.11721830070018768,\n",
302 |        "  -1.36162269115448,\n",
303 |        "  0.5913056135177612,\n",
304 |        "  -0.017594605684280396,\n",
305 |        "  -0.241714209318161,\n",
306 |        "  -0.7156906127929688,\n",
307 |        "  -0.5511345267295837,\n",
308 |        "  -0.18371659517288208,\n",
309 |        "  0.4178481101989746,\n",
310 |        "  0.6450223326683044,\n",
311 |        "  0.10575135797262192,\n",
312 |        "  -1.111432671546936,\n",
313 |        "  0.5375056862831116,\n",
314 |        "  -0.2022087723016739,\n",
315 |        "  0.5494593381881714,\n",
316 |        "  0.6695343255996704,\n",
317 |        "  1.0966334342956543,\n",
318 |        "  0.4774543344974518,\n",
319 |        "  -0.2518484890460968,\n",
320 |        "  0.4662938416004181,\n",
321 |        "  -0.48454609513282776,\n",
322 |        "  -0.6588786244392395,\n",
323 |        "  1.1179336309432983,\n",
324 |        "  0.8014687299728394,\n",
325 |        "  0.6608789563179016,\n",
326 |        "  0.7639712691307068,\n",
327 |        "  -1.094092845916748,\n",
328 |        "  -1.6810386180877686,\n",
329 |        "  -0.5525254011154175,\n",
330 |        "  -0.9299140572547913,\n",
331 |        "  -0.0516754575073719,\n",
332 |        "  0.18409796059131622,\n",
333 |        "  1.4240148067474365,\n",
334 |        "  -0.45321136713027954,\n",
335 |        "  0.18924275040626526,\n",
336 |        "  -0.8148701786994934,\n",
337 |        "  -0.2301432490348816,\n",
338 |        "  -1.396501898765564,\n",
339 |        "  0.300866961479187,\n",
340 |        "  0.16262906789779663,\n",
341 |        "  0.5520248413085938,\n",
342 |        "  0.04226088151335716,\n",
343 |        "  -1.1724492311477661,\n",
344 |        "  -0.8158020377159119,\n",
345 |        "  -0.7319320440292358,\n",
346 |        "  -0.5522674918174744,\n",
347 |        "  -0.2316242903470993,\n",
348 |        "  -0.5427405834197998,\n",
349 |        "  -1.3997856378555298,\n",
350 |        "  -0.9240342974662781,\n",
351 |        "  0.21352456510066986,\n",
352 |        "  -0.4874984920024872,\n",
353 |        "  1.1403656005859375,\n",
354 |        "  1.006916880607605,\n",
355 |        "  0.35721293091773987,\n",
356 |        "  0.36777225136756897,\n",
357 |        "  -0.14023816585540771,\n",
358 |        "  -0.31617653369903564,\n",
359 |        "  0.4841051399707794,\n",
360 |        "  -0.21828386187553406,\n",
361 |        "  0.23113061487674713,\n",
362 |        "  1.2797495126724243,\n",
363 |        "  0.2765345275402069,\n",
364 |        "  0.9319553375244141,\n",
365 |        "  -1.1441596746444702,\n",
366 |        "  0.0996268168091774,\n",
367 |        "  1.2239693403244019,\n",
368 |        "  -0.39085522294044495,\n",
369 |        "  0.22991026937961578,\n",
370 |        "  -0.1283843219280243,\n",
371 |        "  1.2102572917938232,\n",
372 |        "  0.0904652327299118,\n",
373 |        "  -0.9755544662475586,\n",
374 |        "  -0.01987474411725998,\n",
375 |        "  -0.28111007809638977,\n",
376 |        "  1.691156268119812,\n",
377 |        "  -0.4613744020462036,\n",
378 |        "  0.36020827293395996,\n",
379 |        "  0.6461095809936523,\n",
380 |        "  -0.46216630935668945,\n",
381 |        "  0.8203061819076538,\n",
382 |        "  0.037721022963523865,\n",
383 |        "  -0.6281374096870422,\n",
384 |        "  0.13812200725078583,\n",
385 |        "  0.9441364407539368,\n",
386 |        "  -0.3522789180278778,\n",
387 |        "  0.20636899769306183,\n",
388 |        "  0.2807869613170624,\n",
389 |        "  0.6878170967102051,\n",
390 |        "  0.7122486233711243,\n",
391 |        "  0.620973527431488,\n",
392 |        "  0.8290592432022095,\n",
393 |        "  0.4398501217365265,\n",
394 |        "  1.2006498575210571,\n",
395 |        "  -0.19351670145988464,\n",
396 |        "  -0.0058797746896743774,\n",
397 |        "  -0.8346617221832275,\n",
398 |        "  1.3418699502944946,\n",
399 |        "  -1.5052666664123535,\n",
400 |        "  0.26593220233917236,\n",
401 |        "  -0.9789713621139526,\n",
402 |        "  1.0011147260665894,\n",
403 |        "  -0.45902180671691895,\n",
404 |        "  -0.3896196484565735,\n",
405 |        "  -0.8297359943389893,\n",
406 |        "  0.000681564211845398,\n",
407 |        "  0.7317988872528076,\n",
408 |        "  0.09971418231725693,\n",
409 |        "  0.613571047782898,\n",
410 |        "  1.027052402496338,\n",
411 |        "  0.022376418113708496,\n",
412 |        "  -1.6324028968811035,\n",
413 |        "  -1.2577391862869263,\n",
414 |        "  0.07327530533075333,\n",
415 |        "  -0.06632357835769653,\n",
416 |        "  -1.0622644424438477,\n",
417 |        "  -0.663704514503479,\n",
418 |        "  -0.8165342211723328,\n",
419 |        "  0.9021570682525635,\n",
420 |        "  -1.5365514755249023,\n",
421 |        "  -1.247976541519165,\n",
422 |        "  1.1597577333450317,\n",
423 |        "  -0.5340402126312256,\n",
424 |        "  0.5200604796409607,\n",
425 |        "  0.39187031984329224,\n",
426 |        "  -1.5164936780929565,\n",
427 |        "  0.6607534885406494,\n",
428 |        "  0.5530205368995667,\n",
429 |        "  0.4261172413825989,\n",
430 |        "  0.26047423481941223,\n",
431 |        "  0.23848170042037964,\n",
432 |        "  -0.5781704187393188,\n",
433 |        "  0.36201000213623047,\n",
434 |        "  -0.61208176612854,\n",
435 |        "  -0.10791604965925217,\n",
436 |        "  0.9310345649719238,\n",
437 |        "  -0.347644180059433,\n",
438 |        "  -0.17619076371192932,\n",
439 |        "  -0.6879633665084839,\n",
440 |        "  -0.20133720338344574,\n",
441 |        "  0.2978660464286804,\n",
442 |        "  0.29992395639419556,\n",
443 |        "  0.4997974634170532,\n",
444 |        "  1.5950828790664673,\n",
445 |        "  0.008256647735834122,\n",
446 |        "  0.9875580668449402,\n",
447 |        "  0.7937583923339844,\n",
448 |        "  0.2185414433479309,\n",
449 |        "  0.6552398204803467,\n",
450 |        "  1.6166938543319702,\n",
451 |        "  -0.4650249779224396,\n",
452 |        "  0.6875301003456116,\n",
453 |        "  1.3440499305725098,\n",
454 |        "  0.06105814501643181,\n",
455 |        "  0.7095223665237427,\n",
456 |        "  -0.8062834739685059,\n",
457 |        "  0.27129611372947693,\n",
458 |        "  0.3888638913631439,\n",
459 |        "  1.0222375392913818,\n",
460 |        "  0.14357326924800873,\n",
461 |        "  -0.0519833117723465,\n",
462 |        "  0.2811974287033081,\n",
463 |        "  0.044580888003110886,\n",
464 |        "  0.3818289339542389,\n",
465 |        "  0.4734826982021332,\n",
466 |        "  -0.5939272046089172,\n",
467 |        "  -0.2055438905954361,\n",
468 |        "  1.1936163902282715,\n",
469 |        "  -0.5398076772689819,\n",
470 |        "  1.9759148359298706,\n",
471 |        "  -0.6846276521682739,\n",
472 |        "  1.1709625720977783,\n",
473 |        "  0.5542963147163391,\n",
474 |        "  -0.005060211755335331,\n",
475 |        "  0.6842615008354187,\n",
476 |        "  0.6797574162483215,\n",
477 |        "  0.38509055972099304,\n",
478 |        "  -0.6929147839546204,\n",
479 |        "  0.47245651483535767,\n",
480 |        "  -0.13119523227214813,\n",
481 |        "  0.5963976979255676,\n",
482 |        "  0.7866361737251282,\n",
483 |        "  -0.8986070156097412,\n",
484 |        "  0.9712056517601013,\n",
485 |        "  -0.7969645857810974,\n",
486 |        "  -1.0501054525375366,\n",
487 |        "  -0.2113889753818512,\n",
488 |        "  0.557990550994873,\n",
489 |        "  0.42786452174186707,\n",
490 |        "  -0.4004181921482086,\n",
491 |        "  -1.490622639656067,\n",
492 |        "  -0.1768869161605835,\n",
493 |        "  -0.19877658784389496,\n",
494 |        "  -1.122437834739685,\n",
495 |        "  -0.4374857246875763,\n",
496 |        "  1.1497288942337036,\n",
497 |        "  1.2931066751480103,\n",
498 |        "  -1.7598497867584229,\n",
499 |        "  -0.15349262952804565,\n",
500 |        "  -1.0941039323806763,\n",
501 |        "  -0.43680623173713684,\n",
502 |        "  -0.32257774472236633,\n",
503 |        "  -0.11588148772716522,\n",
504 |        "  -0.22306255996227264,\n",
505 |        "  0.5648189187049866,\n",
506 |        "  -0.32458019256591797,\n",
507 |        "  -0.35316920280456543,\n",
508 |        "  0.12598249316215515,\n",
509 |        "  0.1456516534090042,\n",
510 |        "  -0.25830793380737305,\n",
511 |        "  -0.5012840628623962,\n",
512 |        "  0.31420668959617615,\n",
513 |        "  -0.5568302273750305,\n",
514 |        "  0.5325078368186951,\n",
515 |        "  0.9101336002349854,\n",
516 |        "  -0.002856682986021042,\n",
517 |        "  0.21774788200855255,\n",
518 |        "  -1.0361272096633911,\n",
519 |        "  -0.21058189868927002,\n",
520 |        "  -0.28332746028900146,\n",
521 |        "  -0.7360256910324097,\n",
522 |        "  0.7603160738945007,\n",
523 |        "  0.1860726773738861,\n",
524 |        "  0.38276222348213196,\n",
525 |        "  0.07648235559463501,\n",
526 |        "  -0.2578059434890747,\n",
527 |        "  -0.7038977742195129,\n",
528 |        "  -0.2706027626991272,\n",
529 |        "  -0.8514689803123474,\n",
530 |        "  1.0904873609542847,\n",
531 |        "  0.2680012285709381,\n",
532 |        "  0.1882137954235077,\n",
533 |        "  -0.6451776623725891,\n",
534 |        "  -1.2454208135604858,\n",
535 |        "  0.43608856201171875,\n",
536 |        "  0.18699346482753754,\n",
537 |        "  -0.5061931014060974,\n",
538 |        "  0.6829191446304321,\n",
539 |        "  0.0227050818502903,\n",
540 |        "  0.4512227773666382,\n",
541 |        "  -0.017853587865829468,\n",
542 |        "  0.3598477244377136,\n",
543 |        "  -0.4943309426307678,\n",
544 |        "  -1.1516814231872559,\n",
545 |        "  -0.4648239016532898,\n",
546 |        "  -0.6143698692321777,\n",
547 |        "  -0.25584715604782104,\n",
548 |        "  -1.2217726707458496,\n",
549 |        "  -0.40027952194213867,\n",
550 |        "  -0.04058431088924408,\n",
551 |        "  0.36340054869651794,\n",
552 |        "  -1.3007392883300781,\n",
553 |        "  1.1115658283233643,\n",
554 |        "  -0.4899848997592926,\n",
555 |        "  -0.44514331221580505,\n",
556 |        "  0.423132985830307,\n",
557 |        "  -0.5540224313735962,\n",
558 |        "  -0.7597556114196777,\n",
559 |        "  -0.49123504757881165,\n",
560 |        "  -1.2201086282730103,\n",
561 |        "  -0.03684288263320923,\n",
562 |        "  0.6066595911979675,\n",
563 |        "  -0.16212333738803864,\n",
564 |        "  -1.5366402864456177,\n",
565 |        "  1.127274751663208,\n",
566 |        "  0.9400982856750488,\n",
567 |        "  0.5160001516342163,\n",
568 |        "  0.7944850921630859,\n",
569 |        "  -0.1538739800453186,\n",
570 |        "  -2.2368881702423096,\n",
571 |        "  -0.008281633257865906,\n",
572 |        "  0.03706078231334686,\n",
573 |        "  0.4323520362377167,\n",
574 |        "  0.07006222754716873,\n",
575 |        "  0.37046465277671814,\n",
576 |        "  0.2776362895965576,\n",
577 |        "  1.47837495803833,\n",
578 |        "  1.0875197649002075,\n",
579 |        "  0.189472958445549,\n",
580 |        "  -0.436390221118927,\n",
581 |        "  0.9330647587776184,\n",
582 |        "  0.36456820368766785,\n",
583 |        "  0.22274915874004364,\n",
584 |        "  0.5442119240760803,\n",
585 |        "  0.42451417446136475,\n",
586 |        "  -1.065991997718811,\n",
587 |        "  0.05369731783866882,\n",
588 |        "  -0.21914085745811462,\n",
589 |        "  -0.42247843742370605,\n",
590 |        "  -0.2379852533340454,\n",
591 |        "  -1.2094141244888306,\n",
592 |        "  0.52691650390625,\n",
593 |        "  -0.3525978624820709,\n",
594 |        "  -0.19346676766872406,\n",
595 |        "  0.29295334219932556,\n",
596 |        "  1.6976794004440308,\n",
597 |        "  1.1223934888839722,\n",
598 |        "  -1.4272985458374023,\n",
599 |        "  -0.59383225440979,\n",
600 |        "  -1.2220611572265625,\n",
601 |        "  1.466526746749878,\n",
602 |        "  2.428591012954712,\n",
603 |        "  0.3019430637359619,\n",
604 |        "  -2.2048487663269043,\n",
605 |        "  -0.8184424042701721,\n",
606 |        "  0.39190220832824707,\n",
607 |        "  -0.370586633682251,\n",
608 |        "  -1.0237020254135132,\n",
609 |        "  0.8733018040657043,\n",
610 |        "  1.659294605255127,\n",
611 |        "  1.8495774269104004,\n",
612 |        "  -0.11591681838035583,\n",
613 |        "  -0.5781370997428894,\n",
614 |        "  0.47891315817832947,\n",
615 |        "  1.0765089988708496,\n",
616 |        "  1.775381326675415,\n",
617 |        "  0.6142991185188293,\n",
618 |        "  0.1451084017753601,\n",
619 |        "  -0.1414390653371811,\n",
620 |        "  -0.07373569905757904,\n",
621 |        "  0.2840208411216736,\n",
622 |        "  -0.2533877491950989,\n",
623 |        "  -0.1589106023311615,\n",
624 |        "  -0.13348475098609924,\n",
625 |        "  0.13323791325092316,\n",
626 |        "  0.6622788906097412,\n",
627 |        "  -1.1298929452896118,\n",
628 |        "  -0.23007945716381073,\n",
629 |        "  0.3570381999015808,\n",
630 |        "  -0.703154444694519,\n",
631 |        "  -1.211100459098816,\n",
632 |        "  0.08954591304063797,\n",
633 |        "  -1.4300942420959473,\n",
634 |        "  -0.0627041906118393,\n",
635 |        "  0.7108924984931946,\n",
636 |        "  1.1379581689834595,\n",
637 |        "  0.4691418707370758,\n",
638 |        "  0.7356069087982178,\n",
639 |        "  -0.31013035774230957,\n",
640 |        "  -0.914227306842804,\n",
641 |        "  -0.5701828002929688,\n",
642 |        "  1.6390591859817505,\n",
643 |        "  0.19457614421844482,\n",
644 |        "  0.34527868032455444,\n",
645 |        "  0.33545637130737305,\n",
646 |        "  -0.6170878410339355,\n",
647 |        "  0.5678530931472778,\n",
648 |        "  -0.7131389379501343,\n",
649 |        "  0.2169259786605835,\n",
650 |        "  -0.19805964827537537,\n",
651 |        "  0.46968305110931396,\n",
652 |        "  -0.35956519842147827,\n",
653 |        "  0.2683506906032562,\n",
654 |        "  -0.5472516417503357,\n",
655 |        "  0.06679923087358475,\n",
656 |        "  0.04422000050544739,\n",
657 |        "  0.046676263213157654,\n",
658 |        "  0.44602248072624207,\n",
659 |        "  0.5070140361785889,\n",
660 |        "  0.3858823776245117,\n",
661 |        "  -0.5547109246253967,\n",
662 |        "  0.38376620411872864,\n",
663 |        "  0.7213706970214844,\n",
664 |        "  -0.7924078106880188,\n",
665 |        "  -0.28860747814178467,\n",
666 |        "  1.278285026550293,\n",
667 |        "  -0.49175992608070374,\n",
668 |        "  -1.6955938339233398,\n",
669 |        "  1.5084172487258911,\n",
670 |        "  0.19184447824954987,\n",
671 |        "  -0.07909544557332993,\n",
672 |        "  -1.0391669273376465,\n",
673 |        "  -0.32531654834747314,\n",
674 |        "  0.927850604057312,\n",
675 |        "  -0.5210866928100586,\n",
676 |        "  -0.27227792143821716,\n",
677 |        "  0.5483388900756836,\n",
678 |        "  -0.6289671659469604,\n",
679 |        "  -0.4185914099216461,\n",
680 |        "  0.18712659180164337,\n",
681 |        "  -0.8611207008361816,\n",
682 |        "  1.1203521490097046,\n",
683 |        "  -0.06220634654164314,\n",
684 |        "  -0.7590011954307556,\n",
685 |        "  0.4623388648033142,\n",
686 |        "  0.3431902229785919,\n",
687 |        "  0.039070796221494675,\n",
688 |        "  0.2561425268650055,\n",
689 |        "  -0.6439905762672424,\n",
690 |        "  -0.7210849523544312,\n",
691 |        "  0.676421582698822,\n",
692 |        "  -0.4975731372833252,\n",
693 |        "  -0.295342355966568,\n",
694 |        "  0.9971653819084167,\n",
695 |        "  -0.005186408758163452,\n",
696 |        "  0.8930045962333679,\n",
697 |        "  0.02193821594119072,\n",
698 |        "  0.5064648985862732,\n",
699 |        "  0.45460987091064453,\n",
700 |        "  -0.006464973092079163,\n",
701 |        "  -0.2622601389884949,\n",
702 |        "  0.23618388175964355,\n",
703 |        "  -1.4173583984375,\n",
704 |        "  0.33357179164886475,\n",
705 |        "  1.3777754306793213,\n",
706 |        "  -0.4219237267971039,\n",
707 |        "  0.6515622138977051,\n",
708 |        "  -0.9134950637817383,\n",
709 |        "  0.5213130712509155,\n",
710 |        "  0.2514581084251404,\n",
711 |        "  1.1781293153762817,\n",
712 |        "  -0.549098789691925,\n",
713 |        "  0.13700707256793976,\n",
714 |        "  -0.3450290560722351,\n",
715 |        "  -1.1835514307022095,\n",
716 |        "  0.11139538884162903,\n",
717 |        "  0.11353511363267899,\n",
718 |        "  0.2818242311477661,\n",
719 |        "  0.19390495121479034,\n",
720 |        "  0.17962710559368134,\n",
721 |        "  1.1119208335876465,\n",
722 |        "  -1.3936175107955933,\n",
723 |        "  -0.029605504125356674,\n",
724 |        "  0.45406073331832886,\n",
725 |        "  0.37284982204437256,\n",
726 |        "  0.014190159738063812,\n",
727 |        "  -0.040641166269779205,\n",
728 |        "  -0.404023677110672,\n",
729 |        "  -0.5203239917755127,\n",
730 |        "  -0.5268832445144653,\n",
731 |        "  -0.14274540543556213,\n",
732 |        "  0.3540705740451813,\n",
733 |        "  0.19812065362930298,\n",
734 |        "  0.12518112361431122,\n",
735 |        "  -1.0585590600967407,\n",
736 |        "  -0.44593650102615356,\n",
737 |        "  1.1793274879455566,\n",
738 |        "  0.060658056288957596,\n",
739 |        "  1.1565072536468506,\n",
740 |        "  -0.048325084149837494,\n",
741 |        "  -0.32006558775901794,\n",
742 |        "  -0.46950554847717285,\n",
743 |        "  0.23055510222911835,\n",
744 |        "  -1.1294047832489014,\n",
745 |        "  1.7594504356384277,\n",
746 |        "  -1.0128743648529053,\n",
747 |        "  -0.46421927213668823,\n",
748 |        "  -0.2551378309726715,\n",
749 |        "  -0.5973573327064514,\n",
750 |        "  -0.8218593001365662,\n",
751 |        "  -0.2700813412666321,\n",
752 |        "  -1.0766421556472778,\n",
753 |        "  -0.26782095432281494,\n",
754 |        "  -0.9895188808441162,\n",
755 |        "  -1.8075979948043823,\n",
756 |        "  -0.07338783890008926,\n",
757 |        "  0.4485602378845215,\n",
758 |        "  -0.7318544387817383,\n",
759 |        "  1.0587754249572754,\n",
760 |        "  -1.0209728479385376,\n",
761 |        "  0.32167914509773254,\n",
762 |        "  0.38367748260498047,\n",
763 |        "  -0.10342855751514435,\n",
764 |        "  -0.9597566723823547,\n",
765 |        "  0.3484736979007721,\n",
766 |        "  -0.2983153462409973,\n",
767 |        "  0.37630000710487366,\n",
768 |        "  -0.4209252893924713,\n",
769 |        "  0.12251009792089462,\n",
770 |        "  -0.1756943166255951,\n",
771 |        "  0.07007194310426712,\n",
772 |        "  -1.29022216796875,\n",
773 |        "  1.3690333366394043,\n",
774 |        "  -0.07290366291999817,\n",
775 |        "  0.7200726866722107,\n",
776 |        "  -2.0160746574401855,\n",
777 |        "  -0.13274146616458893,\n",
778 |        "  -1.9638891220092773,\n",
779 |        "  0.3720507025718689,\n",
780 |        "  -1.116735577583313,\n",
781 |        "  1.1695648431777954,\n",
782 |        "  -0.9605998396873474,\n",
783 |        "  -1.1635740995407104,\n",
784 |        "  -1.1121426820755005,\n",
785 |        "  0.7957886457443237,\n",
786 |        "  0.7252646088600159,\n",
787 |        "  -0.07327114045619965,\n",
788 |        "  0.8223224878311157,\n",
789 |        "  -0.252686083316803,\n",
790 |        "  -0.39583224058151245,\n",
791 |        "  -0.06106645241379738,\n",
792 |        "  0.4336000978946686,\n",
793 |        "  -0.22695374488830566,\n",
794 |        "  0.6386877298355103,\n",
795 |        "  0.5915052890777588,\n",
796 |        "  0.5142805576324463,\n",
797 |        "  0.2927683889865875,\n",
798 |        "  1.0167683362960815,\n",
799 |        "  0.3474890887737274,\n",
800 |        "  -0.43453752994537354,\n",
801 |        "  1.0115430355072021,\n",
802 |        "  -0.1619628220796585,\n",
803 |        "  1.168388843536377,\n",
804 |        "  -0.456600546836853,\n",
805 |        "  0.040428996086120605,\n",
806 |        "  -0.8876240253448486,\n",
807 |        "  1.2868940830230713,\n",
808 |        "  0.7289477586746216,\n",
809 |        "  0.5534698367118835,\n",
810 |        "  -0.9696080684661865,\n",
811 |        "  0.6120789647102356,\n",
812 |        "  -0.4789103865623474,\n",
813 |        "  0.5411056280136108,\n",
814 |        "  0.149862140417099,\n",
815 |        "  -0.4725341796875,\n",
816 |        "  -2.071988105773926,\n",
817 |        "  -1.0591026544570923,\n",
818 |        "  -0.09944861382246017,\n",
819 |        "  -2.1720995903015137,\n",
820 |        "  0.1839204579591751,\n",
821 |        "  0.6791471838951111,\n",
822 |        "  -0.5358027815818787,\n",
823 |        "  0.28796473145484924,\n",
824 |        "  -0.558844804763794,\n",
825 |        "  -1.7858095169067383,\n",
826 |        "  0.13518765568733215,\n",
827 |        "  -0.8150032758712769,\n",
828 |        "  0.5484442710876465,\n",
829 |        "  -0.4296145737171173,\n",
830 |        "  -0.44628873467445374,\n",
831 |        "  -0.013412535190582275,\n",
832 |        "  -0.2399807721376419,\n",
833 |        "  0.42904603481292725,\n",
834 |        "  0.8428268432617188,\n",
835 |        "  0.5121283531188965,\n",
836 |        "  -0.08175729215145111,\n",
837 |        "  0.9381901621818542,\n",
838 |        "  0.9145129323005676,\n",
839 |        "  -0.3036854863166809,\n",
840 |        "  -0.2646259069442749,\n",
841 |        "  -0.3135417401790619,\n",
842 |        "  -0.20847682654857635,\n",
843 |        "  -0.4184402525424957,\n",
844 |        "  -0.2823401093482971,\n",
845 |        "  0.6283870339393616,\n",
846 |        "  -2.468440294265747,\n",
847 |        "  0.4418151378631592,\n",
848 |        "  0.39606383442878723,\n",
849 |        "  -1.2600023746490479,\n",
850 |        "  -1.551861047744751,\n",
851 |        "  0.493257611989975,\n",
852 |        "  -0.6965646743774414,\n",
853 |        "  0.3390022814273834,\n",
854 |        "  -0.6375352740287781,\n",
855 |        "  0.4093487858772278,\n",
856 |        "  -0.3992699682712555,\n",
857 |        "  -1.1738249063491821,\n",
858 |        "  -0.11219552904367447,\n",
859 |        "  0.38236284255981445,\n",
860 |        "  0.8406801223754883,\n",
861 |        "  0.1389845609664917,\n",
862 |        "  -0.9452458620071411,\n",
863 |        "  0.48942050337791443,\n",
864 |        "  -0.44558706879615784,\n",
865 |        "  -0.2178858518600464,\n",
866 |        "  -0.0828937292098999,\n",
867 |        "  -0.30926504731178284,\n",
868 |        "  0.581653892993927,\n",
869 |        "  0.3554573357105255,\n",
870 |        "  0.998319149017334,\n",
871 |        "  0.10768307745456696,\n",
872 |        "  0.3734738528728485,\n",
873 |        "  -0.10945256054401398,\n",
874 |        "  0.26773497462272644,\n",
875 |        "  -0.6028599739074707,\n",
876 |        "  1.0187873840332031,\n",
877 |        "  0.8090458512306213,\n",
878 |        "  -0.3659715950489044,\n",
879 |        "  -0.6264289617538452,\n",
880 |        "  -1.4587609767913818,\n",
881 |        "  -0.04345208778977394,\n",
882 |        "  -0.4162880480289459,\n",
883 |        "  0.6597855091094971,\n",
884 |        "  -1.4604471921920776,\n",
885 |        "  -0.09566283226013184,\n",
886 |        "  -0.6478845477104187,\n",
887 |        "  0.11098421365022659,\n",
888 |        "  1.016716480255127,\n",
889 |        "  -0.25435155630111694,\n",
890 |        "  0.4994778335094452,\n",
891 |        "  -0.3476492762565613,\n",
892 |        "  -0.8175105452537537,\n",
893 |        "  -0.5767115354537964,\n",
894 |        "  0.13749797642230988,\n",
895 |        "  -0.050398264080286026,\n",
896 |        "  -0.3073069751262665,\n",
897 |        "  -1.3058867454528809,\n",
898 |        "  0.24302329123020172,\n",
899 |        "  0.2049877941608429,\n",
900 |        "  0.32993578910827637,\n",
901 |        "  0.014989953488111496,\n",
902 |        "  -0.45766833424568176,\n",
903 |        "  0.29031941294670105,\n",
904 |        "  0.5337725877761841,\n",
905 |        "  1.4808156490325928,\n",
906 |        "  -0.36382490396499634,\n",
907 |        "  0.44398123025894165,\n",
908 |        "  0.22713124752044678,\n",
909 |        "  -0.1587594598531723,\n",
910 |        "  0.3459387421607971,\n",
911 |        "  0.41898661851882935,\n",
912 |        "  0.6941905617713928,\n",
913 |        "  0.43946877121925354,\n",
914 |        "  -0.022126808762550354,\n",
915 |        "  2.0339138507843018,\n",
916 |        "  -0.050843410193920135,\n",
917 |        "  0.45677250623703003,\n",
918 |        "  -0.21609695255756378,\n",
919 |        "  0.34072253108024597,\n",
920 |        "  0.7042638659477234,\n",
921 |        "  -1.1494847536087036,\n",
922 |        "  -0.14919373393058777,\n",
923 |        "  -0.5456319451332092,\n",
924 |        "  0.4699326455593109]}"
925 |       ]
926 |      },
927 |      "execution_count": 6,
928 |      "metadata": {},
929 |      "output_type": "execute_result"
930 |     }
931 |    ],
932 |    "source": [
933 |     "import ollama\n",
934 |     "\n",
935 |     "ollama.embeddings(model='nomic-embed-text', prompt='The sky is blue because of rayleigh scattering')"
936 |    ]
937 |   }
938 |  ],
939 |  "metadata": {
940 |   "kernelspec": {
941 |    "display_name": "env",
942 |    "language": "python",
943 |    "name": "python3"
944 |   },
945 |   "language_info": {
946 |    "codemirror_mode": {
947 |     "name": "ipython",
948 |     "version": 3
949 |    },
950 |    "file_extension": ".py",
951 |    "mimetype": "text/x-python",
952 |    "name": "python",
953 |    "nbconvert_exporter": "python",
954 |    "pygments_lexer": "ipython3",
955 |    "version": "3.12.2"
956 |   }
957 |  },
958 |  "nbformat": 4,
959 |  "nbformat_minor": 2
960 | }
961 | 


--------------------------------------------------------------------------------
/research/retriver.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "\n",
 11 |     "# OpenAI\n",
 12 |     "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n",
 13 |     "os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 20,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "from langchain_community.document_loaders import PyPDFLoader\n",
 23 |     "from langchain_community.vectorstores import FAISS\n",
 24 |     "from langchain_openai import OpenAIEmbeddings\n",
 25 |     "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
 26 |     "\n",
 27 |     "loader = PyPDFLoader('./data/H-1B Filing Instructions.pdf')\n",
 28 |     "documents = loader.load()\n",
 29 |     "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=10)\n",
 30 |     "docs = text_splitter.split_documents(documents)\n"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 21,
 36 |    "metadata": {},
 37 |    "outputs": [
 38 |     {
 39 |      "data": {
 40 |       "text/plain": [
 41 |        "61"
 42 |       ]
 43 |      },
 44 |      "execution_count": 21,
 45 |      "metadata": {},
 46 |      "output_type": "execute_result"
 47 |     }
 48 |    ],
 49 |    "source": [
 50 |     "len(docs)"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 22,
 56 |    "metadata": {},
 57 |    "outputs": [
 58 |     {
 59 |      "data": {
 60 |       "text/plain": [
 61 |        "[Document(page_content='1 \\n  H-1B FILING INSTRUCTIONS AND GENERAL INFORMATION  \\nWhat is an H -1B Petition?  \\nH-1B is a nonimmigrant status that permits professional employment.  The employer must file an H -1B \\npetition with the U .S. Citizenship and Immigration Services (US CIS) and obtain a pproval for it; the “alien \\nbeneficiary” must obtain H -1B status either by applying for an H -1B visa abroad and entering the U.S. in H -1B \\nstatus, or by changing to H -1B status from another nonimmigrant status in the U.S.  H-1B employment may  not \\nbegin until  the office of International Student and Scholar Services ( ISSS ) informs the hiring department \\nthat all necessary approvals have been received.    \\n \\nRutgers policy restricts H -1B sponsorship to full time faculty appointments only.  For teaching faculty, \\n“Assistant Professor” is the lowest range title for which Rutgers will sponsor an H -1B.  For research faculty, the', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 0}),\n",
 62 |        " Document(page_content='lowest range H -1B title is generally “Research Associate” unless very specific exceptional criteria are met; Details \\nof Rutgers International hir ing policy is available at:  \\nhttps://policies.rutgers.edu/view -policies/human -resources -hr-%E2%80%93 -section -60 \\nSummary of Requirements and Forms  \\nWhen filing an H -1B petition, there are three USCIS forms to complete for all applicants : I-129 petition, the \\nH Classification Supplement  to For m I-129, and the H1B Data Collection and Filing Fee Exemption Supplement . \\nThere is also an optional form (I -907) if the petition will be filed with a request for “Premium Processing.”  \\n(“Premium Processing” is expedited pr ocessing by USCIS and requires an extra filing fee of $1, 410.) All of t he \\nactual USCIS H1B forms will be completed by ISSS  and submitted to USCIS,  along with all the required \\nsupporting documentation , in a packet . ISSS  obtains all the required information needed for completing the', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 0}),\n",
 63 |        " Document(page_content='USCIS forms from the RUTGERS INTERNAL H -1B REQUEST Packet, which has already been completed and \\nsubmitted to ISSS  by the hiring unit.  \\n \\nThere is a required USCIS form (I-539) if the alien is already in the U.S. and has visa depend ents who are \\nalso already physically in the U.S.  This form needs to be completed and signed by the dependent. ISSS  is not \\nresponsible for reviewing the form  I-539, although we will enclose  the completed form  in the H1B petitio n packet \\nand submit it to USCI S. The I -539 form to use should always be downloaded from the USCIS website at: \\nwww.uscis.gov . Detailed information on filing this form and its fees is also available  on this website.  \\n \\nIn addition, there are supporting documents that  must be attached to the application in duplicate. Please \\nrefer to Page 14  for a detailed list of required documentation.  \\n \\nThere is also a regulatory requirement that a \"Public Access File\" (PAF) be set up and maintained by the', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 0}),\n",
 64 |        " Document(page_content='employer. ISSS creates , update s, and maintain s a PAF for every H1B worker from the day an LCA is submitted \\nto Department of Labor (DOL) to the day 12 months after the approved H1B end date, while making it available \\nfor public inspection upon request and for potential audits by various government agencies.  \\n \\nThere are two sets of filing fees  for each petition . Each USCIS fee requires a separate check made \\npayable to “ U.S. Dept. of Homeland Security.”  The Address in Cornerstone  is USCIS , California Service \\nCenter , Laguna  Niguel, CA  926 77. USCIS  Filing fees vary depending on the specific types of petitions .   \\n \\nUSCIS Fee Schedule  \\n$460 Fee for Form I -129 ( must be paid by hiring unit under all circumstances ) \\n$500  “Anti-Fraud” Fee ( must be paid by hiring unit for all new pet itions , i.e. all new H1B cases with Rutgers ) \\n$370 Required ONLY IF  I-539 is being submitted for dependents ( This fee may be paid by hiring unit or alien)', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 0}),\n",
 65 |        " Document(page_content='$85      Biometrics fee required for each dependent included on the Form I -539. \\n$1440  Optional  Premiu m Processing  (PP) fee (must be paid by dept. unless needed strictly for alien’s personal \\nreasons . If the H1B beneficiary pays this fee, a written statement must be submitted to ISSS  confirming  that the \\nPP fee is paid by the H1B beneficiary because the requ est is for his/her personal benefits. ) \\n             \\nRutgers visa fee schedule  Please click on the  hyperlink for e ach specific fee requirement.  \\nRutgers fees are paid via a journal entry. Please complete the  Transmittal Form for Visa Processing Fees .', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 0}),\n",
 66 |        " Document(page_content='2 \\n The First Step:  E -mail ISSS  for an H -1B Request Packet  \\n \\nE-mail ISSS  at gantonatos@global.rutgers.edu  for a request packet. In  the subject line , write, “H -1B Packet \\nneeded .”  In the body of the message,  provide all of the following information :  \\n\\uf0b7 contact in sponsoring unit: name, phone number & e-mail address  to which H1B packet should be sent  \\n\\uf0b7 the name of the alien beneficiary for whom H -1B petition will be filed  \\n\\uf0b7 type of appointme nt (position title) you are offering the alien  \\n\\uf0b7 geographic location where actual work will occur (“on campus” or, if off -campus, provide city and state)  \\n\\uf0b7 name of sponsoring unit   \\n\\uf0b7 please check all that apply to the alien beneficiary:  \\n____current Rutgers emplo yee \\n____not yet a Rutgers employee  \\n____currently in the U.S. in H -1B status  \\n____currently in the U.S. in a nonimmigrant status other than H -1B (What status ? ____________)', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 1}),\n",
 67 |        " Document(page_content='____currently overseas and expected to apply for H -1B visa at a U.S. consulate abroad  \\n____currently in the U.S. with visa dependents who have or will require H -4 status  \\n\\uf0b7 a list of every  period the foreign national (FN)  has ever held J -1 or J -2 status in any J category  (Note: the \\n1-129 form asks for documentation of any period(s) of time the  FN spent in J-1 or J -2 status, so please \\nask the FN for such document s in the form of copies of DS -2019s, IAP -66s, or J -1/J-2 visa in passport.)   \\nThe Next Step – Receive an H -1B Packet via Email  \\nAssuming we have no questions about the information you pr ovide in your request for an H -1B packet, \\nwe will e -mail you a complete set of instructions  and forms for you to read or complete.       \\n \\nTime Frames to Bear in Mind  \\nAfter receiving a fully completed request packet from the hiring department and p rior to f iling the H -1 petition with', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 1}),\n",
 68 |        " Document(page_content='USCIS, our office’ s pre -processing will take about 4 weeks (to submit the LCA to the DOL, obtain additional \\nuniversity clearance, completing actual USCIS petition forms, and thoroughly review, organize and prepare the \\npacket for  submission).  ISSS  will then proceed with filing the H -1B petition with the USCIS. Normally, it take s \\nabout 8+ months for USCIS to adjudicate regular processing cases but it guarantees a 15 -day adjudication time \\nfor “Premium Processing “petitions . The tur n-around times at USCIS will be listed at \\nhttps://egov.uscis.gov/cris/processTimesDisplayInit.do  (scroll down to Service Center; select CSC -California \\nService Center, then click on proc essing dates).  While Premium Processing will decrease the turn -around time \\nat USCIS to about 2-3 weeks, the turn -around time for pre -processing at our office  is determined only by the order \\nin which the paperwork is received from the requesting department s. We therefore advise departments to submit', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 1}),\n",
 69 |        " Document(page_content=\"complete H -1B packets to ISSS  as early as 6 months prior to the intended starting date of the H -1B petition,  in \\norder to avoid both the USCIS $1,410 Premium Processing fee and the ISSS  late fee  of $200 . There is  also an \\noption for expedited processing service at ISSS , which requires a fee of $300. ISSS  tries to process the request \\nwithin 2 -3 weeks if everything is in order . Please see our Late Fee Schedule .  \\n \\nH-1B P ETITION FOR FACULTY APPOINTMENT  \\n(INSTRUCTION PACKET FOR EMPLOYING DEPARTMENTS ) \\nTable of Contents   \\nDepartments' Legal Responsibilities……………………………………………………………………… ..…3      \\nIntroduction to Each of the Required Forms in this Packet ………………………. ……………….... .........4 \\n    Form  #1:  Worksheet for Labor Condition Application ( LCA)   …………………………………….……. 5-6 \\n    Form  #2: “Deemed Export Certification for H -1B Petitions”   ………………………………………….... 7-8 \\n    Form  #3:  H-1B Scholar  Information Shee t (Required)………………. …………………………… …....  9-10\", metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 1}),\n",
 70 |        " Document(page_content='Form  #4:  H-1B Department Certification Form ……………………………………………………… ..….11 \\n    Form  #5:  Memorandum Explaining the Actual wage…………… ……………………………………… .12  \\n    Form  #6:  Form I -539 ( Required for Visa Dependents only )…………………………… …….………....13  \\n    \\nList of Required Supporting Documents ………………………………………….. …………… …………… 14 \\nSample Letter in Support of H -1B Petition…………………………………………………… ……………… 15 \\nChart of H1B Visa Process ……………………………………………………………… …………………… .16', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 1}),\n",
 71 |        " Document(page_content='3 \\n                     \\n \\nDepa rtments\\' Legal Responsibilities in Filing H -1B Petitions  \\nDepartments filing H -1B petitions  assume  significant liability on behalf of Rutgers , and are thus responsible for \\nunderstanding and strictly adhering to certain requirements. Please read the list of \"Departments\\' Legal \\nResponsibilities\" thoroughly before proceeding with the application materials.  For most H -1B matters, the actual \\nresponsibility for complying with these federal regulations rests with hiring units , since it is there that H -1B \\nemployees\\'  files are initiated and maintained.  Because H -1B regulations are complex, and because an \\nemployer\\'s failure to comply with these regulations can result in significant penalties, we advise you to read the \\nresponsibilities listed below with utmost attentio n, to fill out forms exactly as specified on the attached \\ninstructions, and to follow the guidelines noted throughout this packet with care.', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 2}),\n",
 72 |        " Document(page_content=\"1.  Departments must submit in a timely fashion the complete H -1B request packet (approved LCA \\nworksheet, fi ling fees, and the required number of photocopies of all accompanying supporting \\ndocuments) to ISSS —even if the department is requesting and paying for PP service,  The USCIS PP Fee \\nexpedites processing at the USCIS but not within the University itself.   \\n  Departments must allow an absolute minimum of four weeks for ISSS  to process an H -1B petition packet \\nbefore it is ready to submit to USCIS.  Once the petition is submitted, the amount of time it will take USCIS to \\nadjudicate  it depends upon the type of ap plication being filed.   For P P service  petitions, an approval comes in \\nabout 3 -4 weeks.  Departments NOT filing via P P service  should check with ISSS  for appropriate timeframes, as \\nthey will vary based on a number of different factors .    \\n \\n2.  The terms o f an H -1B worker's employment may not be changed in ANY way other than routine salary\", metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 2}),\n",
 73 |        " Document(page_content='increases during the period of the H -1B approval .  Any changes in the terms of employment automatically \\ncancel the validity of the H -1B approval, and the employee thereby  becomes ineligible to continue working legally \\nat Rutgers.  Examples of prohibited changes are: job title, work -site, or job responsibilities.  If a department \\ncontemplates any of these changes for a current H -1B employee, please contact ISSS  to discuss.  \\n \\n3.  It is of utmost importance that departments inform ISSS  if H-1B employment is terminated for any \\nreason  before the end -date requested on form I -129.  If an H -1B employee is terminated by Rutgers  prior to \\nthe ending date requested on the I -129 petition , the employer (via ISSS ) is required by federal regulation to notify \\nUSCIS.  In addition, the employing unit is liable for transportation costs to the foreign country in which the \\nemployee most recently resided (unless the employee leaves by choice).', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 2}),\n",
 74 |        " Document(page_content='4.  ISSS  will provide the hiring unit and the FN with a copy of the completed  I-129 petition form , \\nDOL -certified LCA as soon as it is available and also notify the AAUP of the LCA filing.     \\n \\n5.  Departments are responsible for ensuring that H -1B employees going on Rutgers  payroll for the first \\ntime attend a \"check -in/orientation session \" at ISSS  as soon as possible after the employee has arrived \\nat Rutgers .  The workshop is offered weekly on Thursday at 2:45 p.m. and requires adv ance sign -up by emailing \\nISSS at gan tonatos@global.rutgers.edu.  For employees whose H -1B status is being extended, the workshop is \\nnot necessary.  \\n \\n6.  Federal regulations change regularly and ISSS  routinely updates the H -1B instruction packet.  Please \\nread all of ISSS ’ H-1B instructi ons each time you file an H -1B petition.  Procedures and forms may be', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 2}),\n",
 75 |        " Document(page_content='different from those you used the last time you filed an H -1B petition.   H-1B employment may not begin until ISSS  informs the department that all necessary approvals have \\nbeen received.  “Employment” refers not only to  being on Payroll, but also to perfo rming services for \\nwhich an individual would normally be compensated.', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 2}),\n",
 76 |        " Document(page_content='4 \\n  \\n                   Introduction to Each of the Required Form s in the H1B Packet  \\n \\nForm #1 :  Worksheet for La bor Condition Application (LCA ) \\n The LCA is the form used to obtain clearance from the U.S. Department of Labor (DOL) to proceed with filing the \\nH-1B petition. An LCA approval from DOL gives us the \"go ahead\" to file the actual H -1B petition with the USCIS . \\n Extreme care must be taken in completing the LCA Worksheet accurately per the attached instructions; incorrect \\ninformation could result in serious liability for Rutgers.  \\nProcedures for the form : \\n\\uf0a8 Dept. completes “Worksheet for Labor Condition Application ”; \\n\\uf0a8 Dept .sends the Worksheet to the Dean\\'s office  for Dean\\'s review and si gnature ; \\n\\uf0a8 Department retrieves those documents from the Dean’s office, then email  the Worksheet to Academic Labor \\nRelations (ALR) for review and approval : OALR@oq.rutgers.edu .', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 3}),\n",
 77 |        " Document(page_content='\\uf0a8 ALR reviews materials, signs Worksheet for approval and faxes it back to hiring department;   \\n\\uf0a8 Department attaches approved worksheet to the rest of the H -1B request packet and submits it to ISSS ; \\n\\uf0a8 ISSS  files the LCA application with the DOL and obtains certificatio n in about 7 -10 days .  \\n\\uf0a8  \\nForm  #2: Deemed Export Certification for H -1B Petitions   \\nThis is  a formal certification of employer stating that the FN ’s work is /is not “deemed” to be an “export” by \\nCommerce Dept. This form must be first signed by fac ulty sponsor and chair or director; original plus \\ndocumentation must be sent to  the Export Compliance Office , who will then send the certifi cation  to ISSS . \\n \\nForm #3 :  H-1B Scholar Information Sheet  \\nThis form elicits information from the FN needed by the de partment in order to complete the H1B Request Form.  \\nDept . asks the H -1B worker to complete the form & then uses the information to complete the Internal H1B \\nRequest Form.', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 3}),\n",
 78 |        " Document(page_content='Form #4: H -1B Department Certification  \\nThis form is related to the DO L’s LCA regulat ions. Before ISSS  can submit the LCA for certification from the DOL \\nand USCIS, the hiring department must certify the 9 statements specified on the Certification form. Department \\nhas this form signed by the direct supervisor and by Chair or Director of the  hiring unit  and then submit it to ISSS  \\nwith the rest of the require d documents.  \\n \\nForm # 5: Memorandum Explaining the Actual Wage  \\nThis is a document required by the DOL H1B regulations. It must be completed by the hiring unit and submitted \\nto ISSS  with the other required documents. It will be kept in the Public Access File (PAF)  at ISSS .  \\n \\nForm # 6:  I-539 (Required ONLY for visa dependents of the H -1B who are already physically in the U.S .)  \\nI-539 is a generic form used for change of nonimmigrant status or extension of status.  In this context, the I -539', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 3}),\n",
 79 |        " Document(page_content='is used ONLY for the visa dependents of H1B employees already in the U.S. who need to change or extend \\ndependent H4 status along with the H1B employee.  (The change or extension of status for the employee \\nhim/herself is included in the I -129 petition itself.)  Please refer to the I -539 filing and fees instructions that can be \\nfound at www.uscis.gov . No I-539 is needed for the H -1B employee, nor is it needed if the employee \\'s visa \\ndependents are not in the U.S. at the time of filing the I -129 Petition, nor if the employee’s dependent(s) have their \\nown independent non -immigrant status.  \\n \\nProcedures for the form : \\n\\uf0a8 H-1B employee has his or her visa dependent(s) complete and sign the form;  \\n\\uf0a8 H1B employee returns completed I -539 to the department with filing fees and supporting document s (original \\nplus one copy),  as described on the attached \"Instructions for Completing Form I -539\"', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 3}),\n",
 80 |        " Document(page_content='\\uf0a8 Dept. submits I -539 and supporting documents to ISSS   along with the rest of the H -1B petition packet', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 3}),\n",
 81 |        " Document(page_content='5 \\n  \\nWORKSHEET FOR LABOR CONDITION APPLICATION (LCA)  \\n                                                               (Please follow instructions provided on the next page ) \\n1. Name of Employee (LAS T, First, Middle): ……………………………………………………………………..  \\n 2. Rate of Pay per Year: $ …………………………… (This is the person’s actual salary; must be equal to or higher than minimum annual sa lary in AAUP \\nContract for this job title.)  \\n \\n 3. Check Here to Confirm this is a Full-Time Position _____ (Note: Rutgers does not support the filing of H -1B petitions for part -time positions.  \\nPlease refer to the University Policy on Hiring Foreign Nationals)  \\n \\n 4. Period of the Proposed H -1B Employment: From* ………./………./……… To ….../……… …/……….. (MM/DD/YYYY)  \\n (*“From” date is the date on which H -1B status should become effective, which may or may not be the  same as the appointment start date.)', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 4}),\n",
 82 |        " Document(page_content='5. The Standard Occupational Classification (SOC) Code: See instructions for completing the L CA Worksheet and provide the SOC code selected and \\nits description)  \\n       \\n      SOC code: ……………………………………     SOC code description: …………………………………………………………….  \\n6. Job Title / Department: ………………………………………………. /................................................. .....................  \\n 7. Job Address: List ALL locations where the foreign national will work (actual address and county) --but do NOT list multiple NB/Piscataway campus \\nlocations (Specify only one primary campus location):  \\n \\n……………………………………………………………………………… …………………………….….  \\n \\n \\n 8. Prevailing Wage per Year: $............................ (Minimum annual salary for this title as it appears in AAUP Contra ct)  \\n 9. Prevailing Wage Source: \\uf0a0 Collective Bargaining Unit (AAUP) ………………….  \\uf0a0  Other   \\n 10. Prepared by: Name  (print): ……………………………………………………  Title: ……………………………………………………………………….', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 4}),\n",
 83 |        " Document(page_content='Phone: …………………………………………….                       Email: ……………………………………………………………………  \\n                                Signature: …………………………………….                           Date: ………………………………..………………….  \\n  11. Approval by Dean/Director:  \\n      Name printed: ……………………………………………..              Signature: ………………………………….          Date: …………………………………..                                \\n \\nAfter completing ITEM 11 above, email tis  form to ACADEMIC LABOR RELATIONS (ALR) for review and approval at : \\noalr@oq.rutgers.edu  \\n (If appointment has a 1 -year term but department is requesting the H -1B approval for more than one year, also email to ALR   a copy of the official \\n1-year appointment letter or form.)  \\nTHIS FORM WILL BE emailed BACK TO ORIGINATING DEPARTMENT AFTER APPROVED BY ALR (SEE APPROVAL BELOW), and should then be \\nsubmitted to the Rutgers Global --ISSS with the rest of the H -1B completed petition packet.', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 4}),\n",
 84 |        " Document(page_content='ALR approva l: …………………………… ……………………………….. Date : …………………………………………….', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 4}),\n",
 85 |        " Document(page_content='6 \\n WORKSHEET FOR LABOR CONDITION APPLICATION (LCA)  \\n(This is instruction for completing the LCA Worksheet on previous page.)  \\n \\nPlease follow these line -by-line instructions carefully. The department assumes a serious lia bility on behalf of \\nRutgers if the LCA Worksheet is not completed with accurate information as outlined below.  \\n1. Enter the employee’s (or prospective employee’s) name; include full middle name  (if known).  \\n2. “Rate of Pay per Year :” enter the actual annual salary to be paid to the employee  \\n3. “Full-Time Position? ”  Verify that this is a full -time position by checking “yes.”  \\n4. “Period of Employment :” For the begin  date, enter the date you want this H -1B authorization to start (which \\nmight be different from the actual employment starting date if the individual began working in this position in \\na different status or is now extending H -1B status).  If the person is co ntinuing an appointment already begun', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 5}),\n",
 86 |        " Document(page_content='under a different visa status (such as F -1 on OPT or J -1), show the date immediately following the date on \\nwhich that current visa status will expire.  Remember that we cannot file LCA’s or H -1B petitions more than \\nsix months before the requested begin date.   \\n5. “The Standard Occupational Classification (SOC) . Please refer to the SOC code manual at : \\nhttps://www.bls.gov/soc/2018/soc_2 018_manual.pdf :” for teaching positions , use SOC codes within \\n“25-0000 Education, Training, and Library Occupations. ”   For research positions , look up and select the \\ncode most appropriate to the academic discipline.  \\n6. “Job Title :” enter the official Rutgers payroll title followed by the name of the department, e.g. Assistant \\nProfessor/ Chemistry or Research Associate/Physics.  Note:  any change in title, job responsibilities or \\ndepartment may require filing a new LCA and a new H -1B petition. Please check with ISSS before', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 5}),\n",
 87 |        " Document(page_content=\"initiating any changes to determine if new forms will be required.  \\n7. “Job Address(es) :” provide a complete address ( including the lab or office number  and County ) of the \\nlocation(s) where th e actual  work will be performed.  Note:  any change in the location of the work -site \\nafter filing this LCA -- even a short -term, temporary change of more than five days -- may require the \\nfiling of a completely new LCA.  \\n8. “Prevailing  Wage per Year :” enter the dollar figure on the AAUP contract (applicable for the “begin date” \\nshown in #4 above) which represents the minimum annual salary for this job title.   (If you have ANY questions \\nabout this figure, please call your dean's office or ISSS for assistance.  \\n9.  “Prevailing Wage Source :” for all AAUP positions, check “Collective Bargaining Unit (AAUP).”  \\n10-11.  Complete as appropriate, and have Dean or Director Sign.\", metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 5}),\n",
 88 |        " Document(page_content='\\uf0a8 Follow instructions on bottom of the Worksheet itself in order to ob tain approval from Academic Labor  \\n    Relations (ALR) prior to submitting it to ISSS with the rest of the H -1B packet.  \\n\\uf0a8 ISSS then generates (online) an actual LCA and submits it to Department of Labor ( DOL ). Upon DOL’s \\ncertification, ISSS will be able to fi le the H1B petition packet to USCIS for adjudication.', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 5}),\n",
 89 |        " Document(page_content='7 \\n  \\n \\n       \\nForm # 2: DEEMED EXPORT CERTIFICATION FOR H -1B PETITIONS  \\n(This page  is for  information and processing instructions . The f ollowing page  is the form itself) \\n \\nDeemed Export Certification for H -1B Petitions  \\n \\nThe Form I -129 issued by USCIS  is a petition for a non -immigrant alien to come to work in  the United States \\ntemporarily in a specialty occupation.  This fo rm requires a “deemed export certification.”  This means that \\nRutgers must certify whet her or not the beneficiary of the visa petition will have access to export controlled \\ninformation or technology through his or her work at Rutgers.  The certification is based on knowledge as of the \\ntime of the application  \\n \\nThe certification reads as follo ws: \\n \\nWith respect to technology or technical data the petitioner will release or otherwise provide access to the \\nbeneficiary, the petitioner certifies that it has reviewed the Export Administration Regulations (EAR) and the', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 6}),\n",
 90 |        " Document(page_content='International Traffic in Arms Re gulations and has determined that:  \\n \\n1. A license is not required from either US Department of Commerce or the US Department of State to \\nrelease such technology or technical data to the foreign person; or \\n \\n2. A license is required from the US Department of Commer ce or the US Department of State to release \\nsuch technology or technical data to the beneficiary and the petitioner will prevent access to the controlled \\ntechnology or technical data by the beneficiary until and unless the petitioner has received the requi red \\nlicense or other authorization to release to the beneficiary.  \\nThis formal certification is made as part of the visa pe tition process conducted by the office of International \\nStudent and Scholar Services  upon advice from the Office of General Counsel. W hile the department does not \\nmake the certification, the certification cannot be completed without information from the department.  The visa', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 6}),\n",
 91 |        " Document(page_content='petition cannot be processed without the certification so it is very important that department provide the \\nnecessa ry information as promptly as possible.  \\n \\nPlease note that the certification is made under penalty of perjury. Therefore the information provided \\nmust be as complete and accurate as possible.   Both the information we require and the certification \\nstatement appear on page 2 of this document.   \\n \\n \\nPROCESSING INSTRUCTIONS : \\n \\n1.  The fully executed copy of the form on the next page and all necessary documentation related to this form  \\nshould be emailed to: \\nRobert Phillips , Export Compliance Officer ; export -support@rutgers.edu ;  \\nAdministrative Services Building III, Cook Campus .    \\n2. A single photocopy of the fully executed form  (form ONLY —no documentation ) must  be included \\nalong with the rest of the H -1B packet submit ted to : ISSS , 30 College Ave., New Brunswick', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 6}),\n",
 92 |        " Document(page_content='Please note:  All questions about this form & relevant documentation should be directed to Robert Phillips .  \\n    His phone : 848-932-4522. His office location : Knightsbridge  Rd, 2nd Floor East, Piscataway, NJ  08854)', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 6}),\n",
 93 |        " Document(page_content='8 \\n DEEMED EXPORT CERTIFICATION FOR H -1B PETITIONS  \\n \\nThe following needed information can be provided by dep artment /center administrators:  \\n \\n1. A general description of the beneficiary’s duties  \\n2. Name and contact information for faculty sponsor and Chair of Department  \\n3. Name and contact information for beneficiary  \\n4. A copy of any & all grants, contracts & awards to which FN is assigned as of date of visa petition if any  \\n5. A copy of the beneficiary’s C.V.  \\nThe following information (as well as any unanswered question s above) should be provided by the \\nbeneficiary’s faculty sponsor and/or department chair or center director:  \\n \\n      To the extent known at time of visa petitions, will the beneficiary be:  \\n \\n\\uf0b7 Yes  No  Working on any grant, co ntract or award containing publication restrictions  \\n \\n\\uf0b7 Yes  No  Working on any grant, contract or award restricting participation of foreign nationals', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 7}),\n",
 94 |        " Document(page_content='\\uf0b7 Yes  No  Working on corporate sponsore d contracts with access to company proprietary information  \\n \\n\\uf0b7 Yes  No  Having access to technology or equipment designed or developed with military or space \\napplications  \\n \\n\\uf0b7 Yes  No  Working on  high-tech or experimental equipment  (e.g. high speed computers, lasers, satellites)  \\n \\nIf the answer to any of the above is yes, please explain.  \\n \\nThe following certification must be signed by the beneficiary’s faculty sponsor and the chair of the \\nrelevant  department or director of the relevant center.  \\nI certify under penalty of perjury that, to the best of my knowledge, the information herein provided is true and \\naccurate as it pertains to the H -1B petition for   Name of beneficiary :_______________________ _ \\nAnd that, with respect to the beneficiary’s expected duties at Rutgers : \\n\\uf0a0 A license is n ot required from either the US Department of Commerce or the US Department of State to allow', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 7}),\n",
 95 |        " Document(page_content='beneficiary access to the technology or technical data he/she will use or  be exposed to while working for Rutgers.  \\n\\uf0a0 A license is required from the US Department of Commerce and/or the US Department of State in order for \\nbeneficiary to access certain export controlled technology or technical data beneficiary will use or be expo sed to while \\nworking for Rutgers.  Please note, if a license is required, beneficiary may have no access to said export controlled \\nmaterials until and unless a proper license is in place.  \\n \\n \\nFaculty Sponsor   Name _______________              Center Direct or or Dept. Chair Name _____ __________  \\n \\nSignature ________    Date: ______                              Signature ____________________   Date: __ ______', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 7}),\n",
 96 |        " Document(page_content='9 \\n Form # 3: H-1B Scholar  Information Sheet (page 1 of 2)  \\n(Provides hiring  unit information  it needs from the employee  in order to complete forms in the packet)  \\nTO BE COMPLETED BY THE EMPLOYEE  NAMED IN THE H -1B PETITION  \\n \\nCURRENT EMAIL OF THE SCHOLAR: _______________________________________________________  \\n \\n1. Family name (exactly as it appears i n passport) _______________________________________________  \\n \\n2. Given name (first name exactly as it appears in passport)________________________________________  \\n \\n3. Middle name (if applicable and only if listed in passport)________________________________ _________  \\n \\n4. Gender : Male ______  Female ______  \\n \\n5. All other names used_____________________________________________________________  \\n \\n6. Date of Birth (mm/dd/yyyy)  ______________  \\n \\n7. USCIS A# (if any)  _____________________________________________________  \\n \\n8. Country of Birth _______________________________________________________', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 8}),\n",
 97 |        " Document(page_content='9.  Province  or State or City  of Birth ____________________________________________________  \\n \\n10. Country of Citizenship  __________________________________________________________ _ \\n \\n11. Do you and any dependents filing with you have a valid passport?   Yes____       No____   \\n      If you answer “no” please provide on a separate sheet of paper an explanation and/or proof that an   \\n      extension has been applied for.  \\n \\n12. Are appli cations for dependents being filed with this petition?     Yes____       No____   \\n      If you answer “yes” please indicate how many dependents are included___________  \\n \\n13. Are you or your dependents currently in U.S. immigration removal proceedings?  Yes_ ___       No____   \\n      If you answer “yes” please provide an explanation on a separate sheet of paper.  \\n \\n14. Has a U.S. immigrant petition ever been filed for any person in this petition, including dependents?', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 8}),\n",
 98 |        " Document(page_content='Yes____       No____    If you answe r “yes” please provide an explanation on a separate sheet of paper.  \\n \\n15. Have you ever been given any H status of any kind (including H -4) before?  Yes____       No____   \\n      If you answer “yes” please indicate all the dates on a separate sheet of paper.  \\n \\n16. Have you ever been denied H status?     Yes____       No____   \\n      If you answer “yes” please explain on a separate sheet of paper.  \\n \\n17. Please provide a list of every  period during which you have ever held J -1 or J -2 status in any J category. \\n(Note: because the H -1B petition form asks for documentation of all J status periods, we will need you to \\nprovide documentation in the form of copies of DS -2019s, IAP -66s, or J -1/J-2 visa in passport . \\n \\n(Continued on page 1 3)', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 8}),\n",
 99 |        " Document(page_content='10 \\n Form # 3:  H-1B Scholar  Information Sheet (page 2 of 2)  \\n(Complete ONLY ONE  Section Below : Section A or Section B, but not both).  \\n \\nSECTION A   H-1B STATUS:   Complete #19 - #27 ONLY IF  you are currently in the U.S. and do NOT  \\nintend to  leave the U.S. before beginning employment at Rutge rs. \\n \\n18. To help us file your H -1B petition in the most appropriate way for your situation, please list approximate dates  \\n      and destinations of all your planned travel outside the U.S. in the next 12 months  \\n \\n _________________________________________        ____________________________________  \\n \\n19. Date of Most Recent Arrival in the U.S., if applicable. __________________________________________  \\n \\n20. I-94# (from the most recent arrival/departure document)_________________________________________  \\n \\n21. Current Nonimmigrant Status in the U.S. :________  (Note: if currently in H -1B status, you are eligible for', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 9}),\n",
100 |        " Document(page_content='Rutgers H -1B sponsorship only if  you can document with copies of your 3 most recent pay stubs that you are still \\nemployed in your current H -1B job at the time Rutgers files its H -1B petition for you. ) \\n  \\n22. Date Status Expires, if applicable (F -1 and J -1 visa holders: put “D/S”) __________________________  \\n \\n23. Passport Number________________________________  \\n \\n24. Date passport issued (mm/dd/yyyy)________ _________   25. Date passport expires: _________________  \\n \\n26. Current U.S. address ____________________________________________________________________  \\n \\nSECTION B  H-1B VISA:  Complete #2 7 and #2 8 ONLY IF  you will be visiting a U.S. consulate abroad \\nand appl ying for an H -1B visa prior to  beginning employment at Rutgers.  (Note for Canadian Citizens only: \\nunless you are changing to H -1B status within the U.S. , please complete #2 7 and # 89 even though you will NOT \\nneed to visit a U.S. embassy )', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 9}),\n",
101 |        " Document(page_content='27. The U.S. cons ulate or U.S. immigration inspection facility you will visit to obtain your U.S. H -1B visa. \\n       \\n      Office Address (City):_________________     Country of Citizenship or Nationality : ___________________  \\n \\n28. Your foreign address (your permanent addres s outside the U.S.) Please provide complete address.  \\n   Street # & Name : ________________           Apt. \\uf0a0 Ste.\\uf0a0 Flr.\\uf0a0                City or Town: ______________  \\n \\n   State or Province : __________________      Postal Code : _________     Country: __________ ________  \\n \\nBy signing below, I certify that all of the above information is correct to the best of my knowledge AND that neither \\nI nor any dependents are currently subject to regulation 212(e) which subjects certain J visa holders to a 2 -year \\nhome residence  requirement.  (NOTE: If you ARE currently subject to this requirement, do not sign this \\nform, but contact your department at Rutgers immediately.)', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 9}),\n",
102 |        " Document(page_content='Signature                                                                                                                        Date  \\n \\nPLEASE RETURN THE COMPLETED FORM TO YOUR DEPARTMENT ADMINISTRATOR AT RUTGERS  \\n(If you have questions concerning this form please email Ruimin Zhang at  rzhang@global.rutgers.edu . Questions \\nregarding your appointment or visa eligibility should be directed to the hiring unit, however. )', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 9}),\n",
103 |        " Document(page_content='11 \\n  \\n                                                                                  \\nForm # 4: H-1B Department Certification From  \\n \\nBefore ISSS  can submit the LCA for certification from the U.S. D OL and USCIS adjudication,  \\nThe hiring unit must certify the following statements and complete the required information.  \\n   \\nI certify that:  \\n\\uf0b7 The salary being paid to the above named employee is at least the actual wage being paid to all other \\nindividuals with similar experience and qualifications for the specific employment in question or the \\nprevailing wage level for the occupation in the area of employment (regional average), whichever is \\nhigher.  \\n\\uf0b7 Fringe benefits offered to this employee are equivalent to that offered to other U.S. workers in the same \\nclassification.  \\n\\uf0b7 Employing this person will not adversely affect the working conditions of U.S. workers similarly employed.  \\n\\uf0b7 There is no strike, lockout, or work stopp age due to labor dispute in this occupation.', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 10}),\n",
104 |        " Document(page_content=\"\\uf0b7 We agree to comply fully with the terms of the Labor Condition Application stated above for the duration \\nof the alien's employment in H -1B status at Rutgers  \\n\\uf0b7 We fully understand that any willful violation conne cted with providing inaccurate information in the LCA \\nmay incur severe penalties that have a long -range impact at Rutgers to include fines and legal \\nprosecution.  \\n\\uf0b7 As required by the US Citizenship and Immigration Services, we agree to pay the reasonable co st of \\nreturn  transportation to the alien's home country if s/he is dismissed before the end of the authorized \\nperiod of H -1B employment.  \\n\\uf0b7 We have contacted the Rutgers Export Compliance Manager and have the Deemed Export Certification \\nForm signed and we will comply with all Licensure Requirements for research activities.   \\n \\n  \\n    Certified by :  \\n  \\n\\uf0b7 Direct Supervisor  of Hiring Unit:  \\n \\n           _____________________________________________________________ ___________________\", metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 10}),\n",
105 |        " Document(page_content='(Name  printed )                                             (Signature)                                           (Date)  \\n  \\n\\uf0b7 Chair  or Director of Hiring Unit :  \\n \\n           ________________________________________________________________________________  \\n           (Name printed)                                             (Signature)                                           (Date)  \\n \\n \\n\\uf0b7 Contact Person of Hiring Unit : \\n \\n \\n               Name:  ______________ _________________                 Phone  #: _________________        \\n \\n                 \\n               Email: _______________________________', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 10}),\n",
106 |        " Document(page_content='12 \\n Form # 5: Memorandum Explaining the Actual Wage  \\n \\n(Required For THE PUBLIC ACCESS FILE to be set up and maintained at ISSS ) \\n \\nDepartment of Labor (D OL) regulations are design ed to protect U.S. workers.  As such, the DOL wants to ensure \\nthat U.S. workers are not being displaced by H -1B employees. The requirements to provide an “explanation of the \\nactual wage” are to document that employers are not using the H -1B program to hire  foreign workers at salaries \\nlower than those a U.S. worker would expect for a similar position with similar requirements and responsibilities. \\n  Please note that the DOL reserves the right to conduct employer audits of Public Access  Files.  \\n \\n \\nName of the H -1B employee ______________________________________  \\n \\nName of Hiring Department or Center_______________________________  \\n \\nActual wage being paid to the H -1B employee ___________________________', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 11}),\n",
107 |        " Document(page_content='(Exactly  as actual wage is noted on the LCA Worksheet after sig ned by the VPAA’s office)  \\n \\nThe following explanations must  be included below or on additional pages:  \\n \\n_____An explanation of how the “prevailing wage” was determined.  At Rutgers, “prevailing wage” is the minimum \\nannual salary for the job title as it appea rs in AAUP contract, so this documentation should consist of a copy of the \\napplicable page of the AAUP contract with the “prevailing wage” (minimum salary) highlighted for this specific job \\ntitle and term of appointment (CY or AY)  \\n \\n_____An explanation of h ow the “actual wage” (actual salary) for the H -1B employee was determined.  \\nRegulations provide guidance on this requirement as follows: ) A full, clear explanation of the system that the \\nemployer used to set the \"actual wage\" the employer has paid or will  pay workers in the occupation [job title]  for', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 11}),\n",
108 |        " Document(page_content=\"which the H -1B nonimmigrant is sought, including any periodic increases which the system may provide -- e.g., \\nmemorandum summarizing the system or a copy of the employer's pay system or scale (payroll records are not \\nrequired, although they shall be made available to the Department in an enforcement action).   \\nPlease note : \\n \\n\\uf0b7 If U.S. employees are paid more than the H -1B employee is , you must provide a detailed explanation and \\njustification as to why this is the case, bearing in mind the DOL’s reason for seeking this information (see top \\nof page).  \\n\\uf0b7 Any records documenting wages/salary should not violate the privacy of any employees.  As such, if you use \\nactual copies of other employees’ pay records, you should blac k out the employees’ names and SSN.\", metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 11}),\n",
109 |        " Document(page_content='13 \\n                               \\n \\n \\nForm #6: I -539, Application to Extend/Change Nonimmigrant Status  \\n \\n(This form is required ONLY if  visa dependent (s) is (are) physically in the U.S. at the time of filing the H 1B \\npetition. If the alien has a spouse and/or children already in the U.S. in dependent nonimmigrant status, the \\nfollowing should  be submitted to ISSS  by the hiring unit along with the rest of the H -1B petition packet.  (Please \\nsubmit one original and one photocopy of each item only.)  \\n \\nIMPORTANT  NOTE : our office  is not responsible  for reviewing this form for the dependent(s), although we will \\nenclose the form in the H1B petition packet  to be submitted to USCIS,  so the H1B beneficiary’s dependent(s) \\nmust rea d and follow the filing instructions closely and make sure the form is fully completed and signed by the \\ndependent in blue ink.  The dependent(s) must complete the I -539 in his/her/their name(s).  The H -1B principal', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 12}),\n",
110 |        " Document(page_content='alien is not the applicant on form I -539; the applicant(s) is (are) the dependent(s).  \\n \\nFORM I -539 and Instructions CAN BE OBTAINED ON THE USCIS WEB SITE AT : www.uscis.gov  \\n \\n• Click on “Forms” tab at the top of the page  \\n• Scroll down to Form I -539 \\n \\nThe following ite ms constitute a completed I -539 packet for dependents:  \\n \\n• Form I -539, filled out by and in the name of the spouse, or, if there is no spouse , in the name of the first  \\n      Visa dependent (original plus one copy);  \\n \\n• A completed \"I -539A if there is more than one visa dependent (original plus one copy);  \\n \\n• Two copies of all Forms I -94 of all visa dependents (the most recent I-94 admission number/record, which \\nis proof of legal visitor status, can be downloaded from https://i94.cbp.dhs.gov/I94/#/home.     \\n \\n• Two cop ies* of marriage license (for spouse only) and birth certificates (for children only); and', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 12}),\n",
111 |        " Document(page_content='• A check to \"Dept . of Homeland Security\" for $ 370. 00 and a check of $85 for each dependent include on  \\n            the I-539. \\n \\nProof of relationships:  \\nIf the marria ge license and/or birth certificate(s) are in a foreign language, a certified translation must be \\nattached.   \\n \\nA certified translation is one on which the translator has written, \"I certify that I am competent in both the English \\nand _____ languages and th at this is a true and accurate translation of the attached document.\"  The translator \\nthen signs and dates this statement in the presence of a notary public, who then notarizes the signature.', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 12}),\n",
112 |        " Document(page_content='14 \\n  \\nREQUIRED SUPPORTING DOCUMENTS FOR H -1B PETITIONS  \\n(The l isted documents must be submitted by the department to ISSS  with the completed Internal H1B Request \\nForm as a package.)    \\n1. Strong letter of support from department chair  (See sample letter on next Page) .  Address letter to:  \\n U.S. Citizenship and Immigra tion Services, California Service Center, Laguna Niguel, CA 92607  \\n Letter should include at least the following : \\n(a)  Title of position, exact salary as specified on AAUP contract, and inclusive appointment dates for the current \\nH-1B petition (letter must state that the appointment is temporary);  \\n(b) Position description in general terms, e.g., responsibilities include biomedical research and writing papers , the \\nspecific academic credentials required for it, and an explanation as to why these specific cred entials are required;  \\n(c)  Explanation of how the individual is considered to have a \"specialty occupation.\"  (This is the language of the', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 13}),\n",
113 |        " Document(page_content=\"regulations.)  The explanation should be more than a perfunctory statement, and can address the way in which the \\nunique talents of the     individual will help the department to meet its specific needs;  \\n(d)  Original signature of department chair (on at least one copy of the letter).  \\n2.  Individual's C.V./Resume.  \\n3.  Photocopies of the highest degree diploma:  \\nIf the di ploma is from a foreign university, you must obtain a “credential evaluation” which certifies it is \\nequivalent to a U.S. Ph.D.   (USCIS frequently returns H -1B petitions submitted with a foreign degree but without \\na credential evaluation, and this can delay  the petition approval process  by several weeks or more. Following are \\nlinks for 3 Credit Evaluation Services :  \\n   http://www.evaluationservice.net/      http://naces.org/      https://www.wes.org/   \\nNote: if the Ph.D. diploma has not yet been awarded , you may submit a certified copy of the Master's diploma\", metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 13}),\n",
114 |        " Document(page_content=\"PLUS a letter with original signature from the registrar at the Ph.D. -granting instit ution stating that all degree \\nrequirements have been completed and indicating the expected date of conferral of the Ph.D. degree.  \\n4.   Photocopy of Dean’s letter offering the position to the H1B worker . \\n5.   Official job description from the Dean’s office , Director,  or HR  \\n6.   If the individual is already in the U.S ., attach 2 photocopies of the forms in the individual's possession --this \\nshould be at least  a little white card in the passport or a computer -generated I -94 printout which can be downloaded \\nat www.cbp.gov/I94 ); (b) the individual’s passport picture and expiration page (these are usually on the same page \\nbut may be separate) plus 2 copies  of items listed under the one applicable bullet below : \\n\\uf0a8 If individual is currently in F -l student status , attach photocopy of front and back of his/her Form I -20 and, if on\", metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 13}),\n",
115 |        " Document(page_content='authorized Practical Training, a photocopy of the Employment Authorization Document (EAD, small ID -sized card)  \\n\\uf0a8 If FN is currently in J -1 status , attach photocopies of all DS -2019 forms in his/her possession and a copy of the \\nJ-1 visa stamp from the passport.  Also, if the J -1 is subject to the 2 -year home country residence, provide a copy \\nof the waiver of this requirement from the USCIS.  (If wai ver is required but not yet obtained, contact our office  \\nimmediately, as obtaining a waiver of the 2 -year requirement can be a lengthy process.)  \\n\\uf0a8 If the individual is currently in H -1B status with another employer , attach a copy of his/her current &  all pre vious  \\nH1B Approval s (Form I -797) AND  copies of his/her most recent 3 pay stubs (as proof that s/he is still employed.)  \\n\\uf0a8 If the individual is currently in H -4 status, attach a copy of the H -1B principal’s documents as listed immediately', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 13}),\n",
116 |        " Document(page_content='above  (under “If the individual is currently in H -1B status with another employer”) plus a copy of the marriage \\nlicense documenting the alien and his/her H -1B spouse are legal spouses, copies of all H -4 Notices of Approval \\nissued by USCIS to the H -4 individual and dependent(s)  passport bio data page showing the expiration date  \\n6.   If H1B status is being requested for a period beyond the employment end date stated in the appointment letter , \\na statement from the Chair confirming that funding is guaranteed to be available for thi s position for the entire \\nperiod requested, and confirming that the department is aware it must notify ISSS  if the H1B employee  leaves the \\nposition before the end date requested on the petition.', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 13}),\n",
117 |        " Document(page_content='15 \\n SAMPLE LETTER IN SUPPORT OF H1B PETITION  \\n                     (Please Print out on Department letterhead and Sign in Blue Ink)  \\nUSCIS, California Service Center  \\nAttn: Cap Exempt H -1B Processing Unit   \\n24000 Avila Road, Room 2312  \\nLaguna Niguel, CA 92677  \\n  \\nRe: Dr. Doe’s H -1B Petition  \\n  \\nTo Whom It May Concern:  \\n  \\nThis letter is submitted in support of the H1B petition of Rutgers University for Dr. Doe,  who has \\nbeen hired as a (official RU job title)  in (name of hiring dept.)  on a temporary basis. The intended \\nperiod of H1B employment is from (intended H1B start date ) to (intended H1B end date) , with an annual \\nsalary of $ annual salary .  \\n \\nRutgers, an innovative and rapidly growing public university, is the largest institution in New Jersey \\nand one of the oldest schools in the nation. Rutgers’ hundreds of undergraduate  and graduate programs \\nof study provide something for everyone, including the natural, physical and social sciences, the', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 14}),\n",
118 |        " Document(page_content='liberal, fine and performing arts, business, engineering and everything in between. Rutgers is also \\nknown for its highly varied graduat e and undergraduate research activities, which encompass everything \\nfrom cancer prevention to evolutionary studies to studies in preschool education.  \\n  \\nDr. Doe’s  responsibilities will include teaching of both undergraduate and graduate courses , as well \\nas conducting research in xx. Because of  Dr. Doe’s  broad background in research and teaching in the \\narea of xx, we believe he will make outstanding contributions to the Department of  Y, enhancing Rutgers \\nreputation as a leader in this area . \\n \\nDr. Doe is most highly qualified for a xx position  at Rutgers. He /She received his /her Ph.D. from \\nxx Institute  in year. He/She was employed as a xx at ___XX___University  for the past three years , \\nwhere he /she worked closely with leading scientists in the field of XX and YY . Dr. Doe also xx students', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 14}),\n",
119 |        " Document(page_content='YY courses and received outstanding evaluations. He/She has written numerous articles in professional \\njournals and has been cited by other scholars in the field. Dr. Doe has presented his /he research \\nwork at two major conferences  in the United Sates and has been invited to speak at xxx \\n \\n In conclusion,  Dr. Joe will be an asset to the Department of Y . Rutgers will benefit significantly \\nby having Dr. Joe continue h er/his research at Rutgers on an H -1B1 visa. Please contact me if you  \\nrequire further information.  \\n \\nWe intend to employ Dr. Joe for an initial period of number of years  in the position offered. Our \\ndepartment will bear responsibilities for reasonable costs of return transportation abroad of Dr. \\nJoe should he /she be dismiss ed from employment before the expiration of the H1B petition. Thank you \\nfor your assistance and cooperation in processing this request.  \\n \\nSincerely,  \\nSignature of Hiring official (Department Chair or Dean)', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 14}),\n",
120 |        " Document(page_content='16 \\n  \\n \\n \\n \\n \\n \\n \\n \\n. \\n \\n  \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n  o Request H -1B \\nPacket from \\nISSS  \\no Gather \\nsupporting \\ndocuments  \\no Submit \\nsupporting \\ndocuments & \\n1-pg \\nworksheet \\nfor LCA and \\nfiling fees to \\nISSS Hiring \\nDepartment  Int’l Student &  Scholar \\nServices  \\no Review dept  request for eligibility, create \\nH1B file and enter data in ISSS data base  \\no File LCA with Dept. of Labor  (needs 7-10  \\ndays processing time)  \\no Review supporting documents submitted \\nby hiring unit \\no Review and sign DOL certified LCA & send \\nnotification with copy of LCA to Union   \\no Complete all  USCIS forms   \\no Assemble and submit petition packe t to \\nUSCIS on behalf of dept.  \\no Send copies of processed USCIS forms and \\nLCA to dept. /FN with handling \\ninstructions  \\no Set up Public Access File (PAF) for each \\nH1B FN  \\no Follow up/monitor case status with USCIS \\nand respond to Request for More \\nEvidence when require d \\no Notify dept. and/or H1B FN upon', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 15}),\n",
121 |        " Document(page_content=\"receiving Receipt Notice or Approval \\nNotice  \\n o Maintain forms and instructions on \\nISSS website  \\no Revise process and forms periodically \\nand as required by USCIS and other \\ngovernment  agencies  \\no Schedule \\nH1B FN \\ncheck -in \\nand \\norientation \\nsession with \\nISSS H1B Visa Process  \\no Conduct weekly new H1B check -in & \\norientation  \\no Maintain and update PAF files and stay \\nprepared for Public Inspection  \\nand/or DOL/USCIS audits  \\no Report any changes to H -1B \\nFN's terms  of employment \\nto USCIS if applicable. May \\nrequire filing an amende d \\npetition  \\no As per the DOL \\nrequirement, all H -1B FNs \\nPAFs must be maintained \\nfor 1 year beyond the \\ntermination of their \\nemployment. ISSS will take \\nover this responsibility.  \\n o Dept. to \\nnotify ISSS of \\nany changes \\nto H -1B FN's \\nterms of \\nemployment \\nincluding \\nextensions, \\npromotions , \\nand early \\nterminations\", metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 15})]"
122 |       ]
123 |      },
124 |      "execution_count": 22,
125 |      "metadata": {},
126 |      "output_type": "execute_result"
127 |     }
128 |    ],
129 |    "source": [
130 |     "docs"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 23,
136 |    "metadata": {},
137 |    "outputs": [
138 |     {
139 |      "name": "stdout",
140 |      "output_type": "stream",
141 |      "text": [
142 |       "61\n"
143 |      ]
144 |     }
145 |    ],
146 |    "source": [
147 |     "from langchain_community.vectorstores import FAISS\n",
148 |     "\n",
149 |     "embeddings = OpenAIEmbeddings()\n",
150 |     "db = FAISS.from_documents(docs, embeddings)\n",
151 |     "print(db.index.ntotal)"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 24,
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": [
160 |     "db.save_local(\"db\")"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": 27,
166 |    "metadata": {},
167 |    "outputs": [],
168 |    "source": [
169 |     "query = \"give me Email ISSS\"\n",
170 |     "docs = db.similarity_search(query)"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 30,
176 |    "metadata": {},
177 |    "outputs": [
178 |     {
179 |      "data": {
180 |       "text/plain": [
181 |        "[Document(page_content='2 \\n The First Step:  E -mail ISSS  for an H -1B Request Packet  \\n \\nE-mail ISSS  at gantonatos@global.rutgers.edu  for a request packet. In  the subject line , write, “H -1B Packet \\nneeded .”  In the body of the message,  provide all of the following information :  \\n\\uf0b7 contact in sponsoring unit: name, phone number & e-mail address  to which H1B packet should be sent  \\n\\uf0b7 the name of the alien beneficiary for whom H -1B petition will be filed  \\n\\uf0b7 type of appointme nt (position title) you are offering the alien  \\n\\uf0b7 geographic location where actual work will occur (“on campus” or, if off -campus, provide city and state)  \\n\\uf0b7 name of sponsoring unit   \\n\\uf0b7 please check all that apply to the alien beneficiary:  \\n____current Rutgers emplo yee \\n____not yet a Rutgers employee  \\n____currently in the U.S. in H -1B status  \\n____currently in the U.S. in a nonimmigrant status other than H -1B (What status ? ____________)', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 1}),\n",
182 |        " Document(page_content='4.  ISSS  will provide the hiring unit and the FN with a copy of the completed  I-129 petition form , \\nDOL -certified LCA as soon as it is available and also notify the AAUP of the LCA filing.     \\n \\n5.  Departments are responsible for ensuring that H -1B employees going on Rutgers  payroll for the first \\ntime attend a \"check -in/orientation session \" at ISSS  as soon as possible after the employee has arrived \\nat Rutgers .  The workshop is offered weekly on Thursday at 2:45 p.m. and requires adv ance sign -up by emailing \\nISSS at gan tonatos@global.rutgers.edu.  For employees whose H -1B status is being extended, the workshop is \\nnot necessary.  \\n \\n6.  Federal regulations change regularly and ISSS  routinely updates the H -1B instruction packet.  Please \\nread all of ISSS ’ H-1B instructi ons each time you file an H -1B petition.  Procedures and forms may be', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 2}),\n",
183 |        " Document(page_content=\"complete H -1B packets to ISSS  as early as 6 months prior to the intended starting date of the H -1B petition,  in \\norder to avoid both the USCIS $1,410 Premium Processing fee and the ISSS  late fee  of $200 . There is  also an \\noption for expedited processing service at ISSS , which requires a fee of $300. ISSS  tries to process the request \\nwithin 2 -3 weeks if everything is in order . Please see our Late Fee Schedule .  \\n \\nH-1B P ETITION FOR FACULTY APPOINTMENT  \\n(INSTRUCTION PACKET FOR EMPLOYING DEPARTMENTS ) \\nTable of Contents   \\nDepartments' Legal Responsibilities……………………………………………………………………… ..…3      \\nIntroduction to Each of the Required Forms in this Packet ………………………. ……………….... .........4 \\n    Form  #1:  Worksheet for Labor Condition Application ( LCA)   …………………………………….……. 5-6 \\n    Form  #2: “Deemed Export Certification for H -1B Petitions”   ………………………………………….... 7-8 \\n    Form  #3:  H-1B Scholar  Information Shee t (Required)………………. …………………………… …....  9-10\", metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 1}),\n",
184 |        " Document(page_content='\\uf0a8 Follow instructions on bottom of the Worksheet itself in order to ob tain approval from Academic Labor  \\n    Relations (ALR) prior to submitting it to ISSS with the rest of the H -1B packet.  \\n\\uf0a8 ISSS then generates (online) an actual LCA and submits it to Department of Labor ( DOL ). Upon DOL’s \\ncertification, ISSS will be able to fi le the H1B petition packet to USCIS for adjudication.', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 5})]"
185 |       ]
186 |      },
187 |      "execution_count": 30,
188 |      "metadata": {},
189 |      "output_type": "execute_result"
190 |     }
191 |    ],
192 |    "source": [
193 |     "docs"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": 29,
199 |    "metadata": {},
200 |    "outputs": [
201 |     {
202 |      "data": {
203 |       "text/plain": [
204 |        "Document(page_content='2 \\n The First Step:  E -mail ISSS  for an H -1B Request Packet  \\n \\nE-mail ISSS  at gantonatos@global.rutgers.edu  for a request packet. In  the subject line , write, “H -1B Packet \\nneeded .”  In the body of the message,  provide all of the following information :  \\n\\uf0b7 contact in sponsoring unit: name, phone number & e-mail address  to which H1B packet should be sent  \\n\\uf0b7 the name of the alien beneficiary for whom H -1B petition will be filed  \\n\\uf0b7 type of appointme nt (position title) you are offering the alien  \\n\\uf0b7 geographic location where actual work will occur (“on campus” or, if off -campus, provide city and state)  \\n\\uf0b7 name of sponsoring unit   \\n\\uf0b7 please check all that apply to the alien beneficiary:  \\n____current Rutgers emplo yee \\n____not yet a Rutgers employee  \\n____currently in the U.S. in H -1B status  \\n____currently in the U.S. in a nonimmigrant status other than H -1B (What status ? ____________)', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 1})"
205 |       ]
206 |      },
207 |      "execution_count": 29,
208 |      "metadata": {},
209 |      "output_type": "execute_result"
210 |     }
211 |    ],
212 |    "source": [
213 |     "docs[0]"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": null,
219 |    "metadata": {},
220 |    "outputs": [],
221 |    "source": []
222 |   }
223 |  ],
224 |  "metadata": {
225 |   "kernelspec": {
226 |    "display_name": "env",
227 |    "language": "python",
228 |    "name": "python3"
229 |   },
230 |   "language_info": {
231 |    "codemirror_mode": {
232 |     "name": "ipython",
233 |     "version": 3
234 |    },
235 |    "file_extension": ".py",
236 |    "mimetype": "text/x-python",
237 |    "name": "python",
238 |    "nbconvert_exporter": "python",
239 |    "pygments_lexer": "ipython3",
240 |    "version": "3.12.2"
241 |   }
242 |  },
243 |  "nbformat": 4,
244 |  "nbformat_minor": 2
245 | }
246 | 


--------------------------------------------------------------------------------
/research/utils.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 3,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "\u001b[32m2024-07-08 20:00:02.456\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m3\u001b[0m - \u001b[34m\u001b[1mThat's it, beautiful and simple logging!\u001b[0m\n"
 13 |      ]
 14 |     }
 15 |    ],
 16 |    "source": [
 17 |     "from loguru import logger\n",
 18 |     "\n",
 19 |     "logger.debug(\"That's it, beautiful and simple logging!\")"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 5,
 25 |    "metadata": {},
 26 |    "outputs": [
 27 |     {
 28 |      "name": "stderr",
 29 |      "output_type": "stream",
 30 |      "text": [
 31 |       "\u001b[32m2024-07-08 20:00:27.033\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m1\u001b[0m - \u001b[1mFaijan\u001b[0m\n",
 32 |       "\u001b[32m2024-07-08 20:00:27.033\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m3\u001b[0m - \u001b[33m\u001b[1mThis is a warning message\u001b[0m\n"
 33 |      ]
 34 |     }
 35 |    ],
 36 |    "source": [
 37 |     "logger.info(\"Faijan\")\n",
 38 |     "\n",
 39 |     "logger.warning(\"This is a warning message\")"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 2,
 45 |    "metadata": {},
 46 |    "outputs": [
 47 |     {
 48 |      "name": "stdout",
 49 |      "output_type": "stream",
 50 |      "text": [
 51 |       "Collecting loguru\n",
 52 |       "  Downloading loguru-0.7.2-py3-none-any.whl.metadata (23 kB)\n",
 53 |       "Requirement already satisfied: colorama>=0.3.4 in c:\\users\\faiza\\music\\llmresearch\\rag\\env\\lib\\site-packages (from loguru) (0.4.6)\n",
 54 |       "Collecting win32-setctime>=1.0.0 (from loguru)\n",
 55 |       "  Downloading win32_setctime-1.1.0-py3-none-any.whl.metadata (2.3 kB)\n",
 56 |       "Downloading loguru-0.7.2-py3-none-any.whl (62 kB)\n",
 57 |       "   ---------------------------------------- 0.0/62.5 kB ? eta -:--:--\n",
 58 |       "   ---------------------------------------- 62.5/62.5 kB 1.7 MB/s eta 0:00:00\n",
 59 |       "Downloading win32_setctime-1.1.0-py3-none-any.whl (3.6 kB)\n",
 60 |       "Installing collected packages: win32-setctime, loguru\n",
 61 |       "Successfully installed loguru-0.7.2 win32-setctime-1.1.0\n"
 62 |      ]
 63 |     },
 64 |     {
 65 |      "name": "stderr",
 66 |      "output_type": "stream",
 67 |      "text": [
 68 |       "\n",
 69 |       "[notice] A new release of pip is available: 24.0 -> 24.1.2\n",
 70 |       "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
 71 |      ]
 72 |     }
 73 |    ],
 74 |    "source": [
 75 |     "! pip install  loguru"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 2,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "import queue\n",
 85 |     "\n",
 86 |     "# Create a queue\n",
 87 |     "\n",
 88 |     "q = queue.Queue()\n",
 89 |     "\n",
 90 |     "# Adding elements to the queue\n",
 91 |     "q.put(1)\n",
 92 |     "\n",
 93 |     "q.put(2)\n",
 94 |     "\n",
 95 |     "q.put(1)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 3,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": []
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 9,
108 |    "metadata": {},
109 |    "outputs": [
110 |     {
111 |      "data": {
112 |       "text/plain": [
113 |        "2"
114 |       ]
115 |      },
116 |      "execution_count": 9,
117 |      "metadata": {},
118 |      "output_type": "execute_result"
119 |     }
120 |    ],
121 |    "source": [
122 |     "q.qsize()"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 11,
128 |    "metadata": {},
129 |    "outputs": [
130 |     {
131 |      "data": {
132 |       "text/plain": [
133 |        "deque([1, 2])"
134 |       ]
135 |      },
136 |      "execution_count": 11,
137 |      "metadata": {},
138 |      "output_type": "execute_result"
139 |     }
140 |    ],
141 |    "source": [
142 |     "q.queue"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 6,
148 |    "metadata": {},
149 |    "outputs": [
150 |     {
151 |      "data": {
152 |       "text/plain": [
153 |        "2"
154 |       ]
155 |      },
156 |      "execution_count": 6,
157 |      "metadata": {},
158 |      "output_type": "execute_result"
159 |     }
160 |    ],
161 |    "source": [
162 |     "q.get_nowait()"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 7,
168 |    "metadata": {},
169 |    "outputs": [
170 |     {
171 |      "data": {
172 |       "text/plain": [
173 |        "True"
174 |       ]
175 |      },
176 |      "execution_count": 7,
177 |      "metadata": {},
178 |      "output_type": "execute_result"
179 |     }
180 |    ],
181 |    "source": [
182 |     "q.empty()"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {},
189 |    "outputs": [],
190 |    "source": []
191 |   }
192 |  ],
193 |  "metadata": {
194 |   "kernelspec": {
195 |    "display_name": "env",
196 |    "language": "python",
197 |    "name": "python3"
198 |   },
199 |   "language_info": {
200 |    "codemirror_mode": {
201 |     "name": "ipython",
202 |     "version": 3
203 |    },
204 |    "file_extension": ".py",
205 |    "mimetype": "text/x-python",
206 |    "name": "python",
207 |    "nbconvert_exporter": "python",
208 |    "pygments_lexer": "ipython3",
209 |    "version": "3.12.2"
210 |   }
211 |  },
212 |  "nbformat": 4,
213 |  "nbformat_minor": 2
214 | }
215 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Pull the model (only needs to be done once)
 4 | docker exec -it ollamaa ollama pull llama3:8b
 5 | 
 6 | # Run the command or script you need
 7 | docker exec -it ollamaa ollama run nomic-embed-text
 8 | 
 9 | # Attach to the bayesrag container
10 | docker attach bayesrag_cont
11 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md", "r", encoding="utf-8") as f:
 4 |     long_description = f.read()
 5 | 
 6 | __version__ = "0.0.0"
 7 | 
 8 | REPO_NAME = "bayesrag"
 9 | AUTHOR_USER_NAME = "faizack"
10 | SRC_REPO = "bayesrag"
11 | AUTHOR_EMAIL = "faizack619@gmail.com"
12 | 
13 | setuptools.setup(
14 |     name=SRC_REPO,
15 |     version=__version__,
16 |     author=AUTHOR_USER_NAME,
17 |     author_email=AUTHOR_EMAIL,
18 |     description="A python package for Local RAG app using Local LLM  and local vector db",
19 |     long_description=long_description,
20 |     long_description_content_type="text/markdown",
21 |     url=f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}",
22 |     project_urls={
23 |         "Bug Tracker": f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}/issues",
24 |     },
25 |     package_dir={"": "src"},
26 |     packages=setuptools.find_packages(where="src")
27 | )
28 | 


--------------------------------------------------------------------------------
/src/bayesrag/__init__.py:
--------------------------------------------------------------------------------
 1 | # import os
 2 | # import sys
 3 | # import logging
 4 | 
 5 | # logging_str = "[%(asctime)s: %(levelname)s: %(module)s: %(message)s]"
 6 | 
 7 | # log_dir = "logs"
 8 | # log_filepath = os.path.join(log_dir,"running_logs.log")
 9 | # os.makedirs(log_dir, exist_ok=True)
10 | 
11 | 
12 | # logging.basicConfig(
13 | #     level= logging.INFO,
14 | #     format= logging_str,
15 | 
16 | #     handlers=[
17 | #         logging.FileHandler(log_filepath),
18 | #         logging.StreamHandler(sys.stdout)
19 | #     ]
20 | # )
21 | 
22 | # logger = logging.getLogger("RAGLogger")


--------------------------------------------------------------------------------
/src/bayesrag/config.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import os
 3 | from dotenv import load_dotenv
 4 | import uuid
 5 | load_dotenv()
 6 | 
 7 | DATA_DIR = Path(os.getenv("DATA_DIR", "./data"))
 8 | OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
 9 | OPENAI_API_KEY = "lm-studio"
10 | QDRANT_HOST = os.getenv("QDRANT_HOST", "http://localhost:6333")
11 | ID=uuid.uuid4()
12 | # QDRANT_COLLECTION = os.getenv("QDRANT_COLLECTION", f"law_doc-{ID}")
13 | 
14 | REPLAY_TOPIC = f"USER_TOPIC-{ID}"
15 | QDRANT_COLLECTION=f"law_doc-{ID}"
16 | 
17 | 
18 | # IPFS Config
19 | 
20 | IPFS_RETRY_LIMIT = 3
21 | IPFS_RETRY_DELAY = 5
22 | 
23 | ## Directory for IPFS
24 | directory_path = f"./qdrant_data/collections/global_data"
25 | zip_file_path = "./qdrant_data_backup.zip"
26 | 
27 | download_path = "./zipdownloaded_files/qdrant_data_backup.zip"
28 | extract_to_path = "./qdrant_data/collections/global_data"


--------------------------------------------------------------------------------
/src/bayesrag/constant.py:
--------------------------------------------------------------------------------
1 | SEND_TOPIC="RAG/Query1"
2 | RECEVICE_TOPIC="RAG/#"
3 | 
4 | # Aggregations Topic
5 | AGG_SEND_TOPIC="AGG/SEND"
6 | AGG_RECEIVE_TOPIC="AGG/#"


--------------------------------------------------------------------------------
/src/bayesrag/data_loader.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from langchain_community.document_loaders import PyPDFDirectoryLoader,PyPDFLoader
3 | 
4 | def load_directory_pdf(directory_path: Path):
5 |     loader = PyPDFDirectoryLoader(directory_path)
6 |     docs = loader.load()
7 |     return docs
8 | 
9 | 


--------------------------------------------------------------------------------
/src/bayesrag/embedder.py:
--------------------------------------------------------------------------------
1 | import ollama
2 | 
3 | def get_embedding(text, model='nomic-embed-text'):    
4 |     return ollama.embeddings(model=model, prompt=text)['embedding']
5 | 
6 | if __name__ == '__main__':
7 |     text = "Hello, I am learning OpenAI's LLM"
8 |     embedding = get_embedding(text)
9 |     print(embedding)


--------------------------------------------------------------------------------
/src/bayesrag/evaluator.py:
--------------------------------------------------------------------------------
 1 | # from datasets import Dataset
 2 | # from ragas.metrics import context_precision, answer_relevancy
 3 | # from ragas import evaluate
 4 | 
 5 | # def evaluate_response(user_query, llm_response, context):
 6 | #     data_samples = {
 7 | #         'question': [user_query],
 8 | #         'answer': [llm_response],
 9 | #         'contexts': [[context]],
10 | #     }
11 | #     dataset = Dataset.from_dict(data_samples)
12 | #     # if ground_truth:
13 | #     #     data_samples['ground_truth'] = [ground_truth]
14 | 
15 | #     # metrics = [context_precision] if ground_truth else [answer_relevancy]
16 | #     metrics = [answer_relevancy]
17 | #     score = evaluate(dataset, metrics=metrics)
18 | #     return score.to_pandas()
19 | 
20 | 
21 | from deepeval.metrics import ContextualRelevancyMetric
22 | from deepeval.test_case import LLMTestCase
23 | from bayesrag.llmEvaluator import customLM
24 | 
25 | from deepeval import evaluate
26 | 
27 | def deepEvalutor(user_query: str,generated_response:str,context: list[str]):
28 |         # Evaluate the output using Contextual Relevancy Metric
29 |     metric = ContextualRelevancyMetric(
30 |         threshold=0.7,
31 |         model=customLM(),
32 |         include_reason=True
33 |     )
34 |     
35 |     test_case = LLMTestCase(
36 |         input=user_query,
37 |         actual_output=generated_response,
38 |         retrieval_context=context 
39 |     )
40 |     evaluation = metric.measure(test_case)
41 |     
42 |     result=evaluate([test_case], [metric])
43 |     score,reason=result.test_results[0].metrics_data[0].score,result.test_results[0].metrics_data[0].reason
44 | 
45 |     evaluation_results = {
46 |         "score": score,
47 |         "reason": reason
48 |     }
49 | 
50 |     return evaluation_results


--------------------------------------------------------------------------------
/src/bayesrag/generator.py:
--------------------------------------------------------------------------------
 1 | from bayesrag.utils import ClassificationResult
 2 | import ollama
 3 | 
 4 | import json
 5 | 
 6 | # from openai import OpenAI
 7 | # from bayesrag.config import OPENAI_BASE_URL, OPENAI_API_KEY
 8 | # client = OpenAI(base_url=OPENAI_BASE_URL, api_key=OPENAI_API_KEY)
 9 | 
10 | 
11 | def generate_response(user_query, context,model='llama3:8b'):
12 |     prompt_template = f""" You are a Lawyer. Response only related Law Question. do not use your knowledge use below context to get information
13 | 
14 |     Here is the question: {user_query}
15 | 
16 |     Additional context to support the answer: {context}
17 |     """
18 | 
19 |     system_prompt = "You are a helpful assistant that handles user queries and provides answers using the given context without external information."
20 | 
21 |     response = ollama.chat(model=model, messages=[
22 |     {
23 |         "role": "system", "content": system_prompt,
24 |         'role': 'user',
25 |         'content': prompt_template,
26 |     },
27 |     
28 |     ],stream=True)
29 | 
30 |     for chunk in response:
31 |         if chunk['message']['content'] is not None:
32 |             yield chunk['message']['content']
33 | 
34 | 
35 | def classify_query(user_query) -> ClassificationResult:
36 |     """
37 |     Classifies a user query related to law and returns a ClassificationResult enum.
38 | 
39 |     Args:
40 |         user_query: The user's question.
41 | 
42 |     Returns:
43 |         ClassificationResult.YES if the query is classified as a law-related question, 
44 |         ClassificationResult.NO if it's not a law-related question, 
45 |         ClassificationResult.ERROR if there's an error parsing the response.
46 |     """
47 | 
48 |     system_prompt = """
49 |     You are a Lawyer. Classify the following question related to Law and always give a response in JSON format as {"results": "yes/no"} without giving any reason in the response.
50 |     """
51 | 
52 |     response = ollama.chat(
53 |         model='llama3:8b', 
54 |         messages=[
55 |             {"role": "system", "content": system_prompt},
56 |             {"role": "user", "content": user_query}
57 |         ],
58 |         format="json",
59 |     )
60 | 
61 |     # Parse the response into JSON
62 |     response_json = response['message']['content']
63 |     print(response_json)
64 |     try:
65 |         response_dict = json.loads(response_json)
66 |         result = response_dict["results"].lower()
67 |         if result == "yes":
68 |             print("Yes it related to law")
69 |             return ClassificationResult.YES
70 |         elif result == "no":
71 |             print("No it not related to law")
72 |             return ClassificationResult.NO
73 |         else:
74 |             print("Unexpected result in classification")
75 |             return ClassificationResult.ERROR  # Handle unexpected results
76 |     except json.JSONDecodeError:
77 |         print(f"Error parsing JSON response: {response_json}")
78 |         return ClassificationResult.ERROR
79 | 
80 |     
81 | 
82 | if __name__ == "__main__":
83 |     
84 |     user_query = "how to get h1b visa"
85 |     
86 |     
87 |     result=classify_query(user_query)
88 | 
89 |     if result ==ClassificationResult.NO:
90 |         for text in generate_response(user_query,None):
91 |             print(text,end="")
92 |     else:
93 |         print("Find from vector Database")


--------------------------------------------------------------------------------
/src/bayesrag/ipfs.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import zipfile
  3 | import ipfshttpclient
  4 | import time
  5 | from loguru import logger
  6 | from bayesrag.config import download_path, extract_to_path, directory_path, zip_file_path, IPFS_RETRY_LIMIT, IPFS_RETRY_DELAY
  7 | 
  8 | 
  9 | class IPFSManager:
 10 |     def __init__(self, ipfs_address=None, retry_limit=IPFS_RETRY_LIMIT, retry_delay=IPFS_RETRY_DELAY):
 11 | 
 12 |         self.ipfs_address = ipfs_address or os.getenv('IPFS_ADDRESS', '/ip4/127.0.0.1/tcp/5001/http')
 13 |         self.retry_limit = retry_limit
 14 |         self.retry_delay = retry_delay
 15 |         self.client = self._connect_to_ipfs(self.ipfs_address)
 16 |     
 17 |     def _connect_to_ipfs(self, ipfs_address):
 18 |         """Connect to the IPFS client with retry logic."""
 19 |         for attempt in range(self.retry_limit):
 20 |             try:
 21 |                 ipfs_conn_obj = ipfshttpclient.connect(ipfs_address)
 22 |                 logger.info("Connected to IPFS")
 23 |                 return ipfs_conn_obj
 24 |             except Exception as e:
 25 |                 logger.error(f"Error during IPFS connection attempt {attempt + 1}/{self.retry_limit}: {e}")
 26 |                 if attempt < self.retry_limit - 1:
 27 |                     time.sleep(self.retry_delay)
 28 |                 else:
 29 |                     logger.critical("Unable to connect to IPFS after multiple attempts.")
 30 |                     raise
 31 |     
 32 |     def _validate_path(self, path, path_type="directory"):
 33 |         """Validate the existence of a file or directory."""
 34 |         if path_type == "directory" and not os.path.isdir(path):
 35 |             logger.error(f"Directory {path} does not exist or is not accessible.")
 36 |             raise FileNotFoundError(f"Directory {path} not found.")
 37 |         elif path_type == "file" and not os.path.isfile(path):
 38 |             logger.error(f"File {path} does not exist or is not accessible.")
 39 |             raise FileNotFoundError(f"File {path} not found.")
 40 | 
 41 |     def _zip_directory(self, directory_path, zip_file_path):
 42 |         """Compress a directory into a zip file."""
 43 |         self._validate_path(directory_path, "directory")
 44 |         
 45 |         logger.info(f"Creating zip file at: {zip_file_path}")
 46 |         try:
 47 |             with zipfile.ZipFile(zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
 48 |                 for root, _, files in os.walk(directory_path):
 49 |                     for file in files:
 50 |                         file_path = os.path.join(root, file)
 51 |                         zipf.write(file_path, os.path.relpath(file_path, directory_path))
 52 |             logger.info(f"Zip file created successfully: {zip_file_path}")
 53 |             return zip_file_path
 54 |         except Exception as e:
 55 |             logger.error(f"Error while zipping directory: {e}")
 56 |             raise
 57 | 
 58 |     def _unzip_file(self, zip_file_path, extract_to_path):
 59 |         """Extract a zip file to a specific directory."""
 60 |         self._validate_path(zip_file_path, "file")
 61 |         
 62 |         logger.info(f"Unzipping file: {zip_file_path} to {extract_to_path}")
 63 |         try:
 64 |             with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
 65 |                 zip_ref.extractall(extract_to_path)
 66 |             logger.info(f"Unzipped file successfully to: {extract_to_path}")
 67 |         except Exception as e:
 68 |             logger.error(f"Error while unzipping file: {e}")
 69 |             raise
 70 | 
 71 |     def upload_directory(self, directory_path=directory_path):
 72 |         """Automatically zip and upload a directory to IPFS."""
 73 |         logger.info(f"Zipping and uploading directory: {directory_path}")
 74 |         try:
 75 |             zip_file = self._zip_directory(directory_path, zip_file_path)
 76 |             return self.upload_file(zip_file)
 77 |         except Exception as e:
 78 |             logger.error(f"Error during upload: {e}")
 79 |             return None
 80 | 
 81 |     def upload_file(self, file_path):
 82 |         """Upload a file to IPFS."""
 83 |         self._validate_path(file_path, "file")
 84 |         
 85 |         for attempt in range(self.retry_limit):
 86 |             try:
 87 |                 logger.info(f"Uploading file: {file_path}")
 88 |                 result = self.client.add(file_path)
 89 |                 logger.info(f"Upload successful. IPFS result: {result}")
 90 |                 return result
 91 |             except Exception as e:
 92 |                 logger.error(f"Error during file upload attempt {attempt + 1}/{self.retry_limit}: {e}")
 93 |                 if attempt < self.retry_limit - 1:
 94 |                     time.sleep(self.retry_delay)
 95 |                 else:
 96 |                     logger.critical("Failed to upload file after multiple attempts.")
 97 |                     return None
 98 | 
 99 |     def download_and_extract(self, file_hash, download_path, extract_to_path):
100 |         """Download a zip file from IPFS and extract its contents."""
101 |         logger.info(f"Starting download of IPFS hash: {file_hash}")
102 |         
103 |         os.makedirs(os.path.dirname(download_path), exist_ok=True)
104 |         
105 |         for attempt in range(self.retry_limit):
106 |             try:
107 |                 file_content = self.client.cat(file_hash)
108 |                 with open(download_path, 'wb') as file:
109 |                     file.write(file_content)
110 |                 logger.info(f"Downloaded and saved file: {download_path}")
111 |                 
112 |                 # Unzip the downloaded file
113 |                 self._unzip_file(download_path, extract_to_path)
114 |                 return
115 |             except Exception as e:
116 |                 logger.error(f"Error during download attempt {attempt + 1}/{self.retry_limit}: {e}")
117 |                 if attempt < self.retry_limit - 1:
118 |                     time.sleep(self.retry_delay)
119 |                 else:
120 |                     logger.critical("Failed to download and extract file after multiple attempts.")
121 |                     return None
122 | 
123 | 
124 | # Usage example (to be run only in a non-production environment):
125 | if __name__ == "__main__":
126 |     # Initialize IPFSManager with a retry limit and delay
127 |     ipfs_manager = IPFSManager()
128 | 
129 |     # Zip and upload directory
130 |     upload_result = ipfs_manager.upload_directory(directory_path)
131 | 
132 |     # If upload was successful, download and extract the file
133 |     if upload_result:
134 |         file_hash = upload_result['Hash']
135 |         ipfs_manager.download_and_extract(file_hash, download_path, extract_to_path)
136 |     else:
137 |         logger.critical("Upload failed, no file to download.")
138 | 


--------------------------------------------------------------------------------
/src/bayesrag/llmEvaluator.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | from deepeval.models import DeepEvalBaseLLM
 4 | from ollama import Client
 5 | from ollama import ChatResponse
 6 | from deepeval import evaluate
 7 | 
 8 | 
 9 | 
10 | class customLM(DeepEvalBaseLLM):
11 |     def __init__(self, url="http://localhost:11434",model="llama3:8b"):
12 |         self.model = Client(host=url)
13 |         self.model_name = model
14 | 
15 |     def load_model(self, *args, **kwargs) -> Client:
16 |         return self.model
17 |     
18 |     def generate(self, prompt: str) -> str:
19 |         client = self.load_model()
20 |         completion:ChatResponse = client.chat(
21 |             model=self.model_name,
22 |             messages=[
23 |                 {"role": "system", "content": "Your helpful AI for Evaluation"},
24 |                 {"role": "user", "content": prompt}
25 |             ],
26 |             # format="json"
27 |         )
28 |         return completion['message']["content"]
29 |     
30 |     async def a_generate(self, prompt: str) -> str:
31 |         # Use asyncio.to_thread to run the blocking generate method in a separate thread
32 |         return self.generate(prompt=prompt)
33 | 
34 |     def get_model_name(self):
35 |         return self.model_name
36 | 
37 | from deepeval.metrics import ContextualRelevancyMetric
38 | from deepeval.test_case import LLMTestCase
39 | # from bayesrag.llmEvaluator import customLM
40 | 
41 | def deepEvalutor(user_query: str,generated_response:str,context: list[str]):
42 |         # Evaluate the output using Contextual Relevancy Metric
43 |     metric = ContextualRelevancyMetric(
44 |         threshold=0.7,
45 |         model=customLM(),
46 |         include_reason=True
47 |     )
48 |     
49 |     test_case = LLMTestCase(
50 |         input=user_query,
51 |         actual_output=generated_response,
52 |         retrieval_context=context 
53 |     )
54 |     evaluation = metric.measure(test_case)
55 |     
56 |     result=evaluate([test_case], [metric])
57 |     score,reason=result.test_results[0].metrics_data[0].score,result.test_results[0].metrics_data[0].reason
58 | 
59 |     evaluation_results = {
60 |         "score": score,
61 |         "reason": reason
62 |     }
63 | 
64 |     return evaluation_results
65 | 
66 | 
67 | if __name__=="__main__": 
68 |     
69 |     c = customLM()
70 |     print(c.generate("Hey"))
71 |     
72 |     


--------------------------------------------------------------------------------
/src/bayesrag/mq.py:
--------------------------------------------------------------------------------
  1 | import paho.mqtt.client as mqtt
  2 | import time
  3 | import json
  4 | 
  5 | from bayesrag.retriever import get_context
  6 | from bayesrag.constant import RECEVICE_TOPIC,AGG_RECEIVE_TOPIC,AGG_SEND_TOPIC
  7 | 
  8 | from bayesrag.ipfs import IPFSManager
  9 | import queue
 10 | from loguru import logger
 11 | 
 12 | 
 13 | class Mqttclient:
 14 |     def __init__(self, broker_address="mqtt.eclipseprojects.io", broker_port=1883,replyTopic="USER_TOPIC-",isAdmin=False):
 15 |         self.broker_address = broker_address
 16 |         self.broker_port = broker_port
 17 |         self.replyTopic=replyTopic  # Topic to which the response will be sent.
 18 |         self.ADMIN_NODE=isAdmin
 19 |         self.ipfs=IPFSManager()
 20 |         self.reply_queue = queue.Queue()
 21 |         self.client = mqtt.Client(mqtt.CallbackAPIVersion.VERSION2)
 22 |         self.client.on_connect = self.on_connect
 23 |         self.client.on_message = self.on_message
 24 |         self.client.connect(self.broker_address, self.broker_port)
 25 |         self.client.loop_start()
 26 |         
 27 | 
 28 |     def on_connect(self,client, userdata, flags, reason_code, properties):
 29 |         print(f"Connected with result code {reason_code}")
 30 |         client.subscribe(RECEVICE_TOPIC)
 31 |         client.subscribe(self.replyTopic)
 32 |         if self.ADMIN_NODE:
 33 |             client.subscribe(AGG_RECEIVE_TOPIC)
 34 |             
 35 | 
 36 |     def subscribe(self, topic):
 37 |         self.client.subscribe(topic)
 38 | 
 39 |     # The callback for when a PUBLISH message is received from the server.
 40 |     def on_message(self,client, userdata, msg):
 41 |         print(msg.topic+" "+str(msg.payload))
 42 |         if msg.topic == self.replyTopic:
 43 |             self.handle_reply(msg.payload)  
 44 |         elif msg.topic == AGG_SEND_TOPIC:
 45 |             self.handle_vector_Message(msg.payload)
 46 |         else :
 47 |             self.handle_message(msg.payload)  # Call the function to handle the received message.
 48 | 
 49 |     def handle_reply(self, data):
 50 |         # Compare each node reponse and take the highest accurancy
 51 |         data = json.loads(data)
 52 |         logger.warning(f"Queue size {self.reply_queue.qsize()}")
 53 |         # Process the reply here.
 54 |         # Need Triggered Method so i can send it to receiver so he get reponse information
 55 |         self.reply_queue.put(data)
 56 | 
 57 |     def handle_vector_Message(self, data):
 58 |         data = json.loads(data)
 59 |         print("Handle vector message")
 60 |         print("-"*100)
 61 |         source_embedding = [self.deserialize_record(record) for record in data.get("data")]
 62 |         logger.info("received vector message", source_embedding)
 63 |         # Process Vector Message
 64 |         from bayesrag.vector_db import VectorDB
 65 |         vectorDb = VectorDB()
 66 |         vectorDb.merge_embeddings(source_embedding)
 67 |         
 68 |         directory_path=f"./qdrant_data/collections/{vectorDb.collection_name}"
 69 |         
 70 |         # Upload to IPFS
 71 |         self.ipfs.upload_directory(directory_path)
 72 | 
 73 |     
 74 |     def deserialize_record(self, record):
 75 |         # Convert each record back to the original format
 76 |         from qdrant_client.models import Record
 77 |         return Record(id=record['id'], payload=record['payload'], vector=record['vector'])
 78 | 
 79 | 
 80 |     def serialize_record(self,record):
 81 |         # Convert each record to a serializable format (dict)
 82 |         return {
 83 |             'id': record.id,
 84 |             'payload': record.payload,
 85 |             'vector': record.vector
 86 |         }
 87 |     
 88 |     
 89 |     def handle_message(self,data):
 90 |         data = json.loads(data)
 91 |         replayTopic=data.get('replay_topic')
 92 |         query=data.get('query')
 93 |         context,score=get_context(query)
 94 |         
 95 |         if context:
 96 |             data={"context":context,"score":score}
 97 |             self.send_message(replayTopic,data)
 98 | 
 99 |     def send_message(self,send_topic,payload:dict):
100 |         payload = json.dumps(payload)  # Convert the payload to JSON string before sending it.
101 |         self.client.publish(send_topic, payload,qos=2)
102 |         logger.info(f"Sent message: {payload}")
103 |     
104 |     def send_vector(self,scroll_result):
105 |         Vect_Data = [self.serialize_record(record) for record in scroll_result[0]]
106 |         data={"data":Vect_Data}
107 |         payload = json.dumps(data) 
108 |         self.client.publish(AGG_SEND_TOPIC,payload)
109 |         logger.info(f"Vector sent to admin: ")
110 | 
111 | 
112 |     def stop(self):
113 |         self.client.loop_stop()   
114 |         self.client.disconnect()
115 | 
116 | 
117 | 
118 |     
119 | if __name__ == "__main__":
120 |     import uuid
121 |     import argparse
122 | 
123 |     parser = argparse.ArgumentParser(description="Get node type information to send vector to admin")
124 |     parser.add_argument("--collectionName", type=str, required=True,help="Name of the collection of Vector DB")
125 |     parser.add_argument("--nodetype",type=str,help="Node Type")
126 |     args=parser.parse_args()
127 | 
128 |     
129 |     ID=uuid.uuid4()
130 |     REPLAY_TOPIC = f"USER_TOPIC-{ID}"
131 |     collections=args.collectionName
132 |     logger.info(f"Collection Name: {collections} ")
133 | 
134 |     if args.nodetype:
135 |         client=Mqttclient(collection_name=collections,replyTopic=REPLAY_TOPIC,isAdmin=True)  
136 |     else:
137 |         client=Mqttclient(collection_name=collections,replyTopic=REPLAY_TOPIC,isAdmin=False)
138 |         QDRANT_HOST = "http://localhost:6333"  # Local Qdrant
139 |         from qdrant_client import QdrantClient
140 | 
141 |         qclient = QdrantClient(url=QDRANT_HOST)
142 | 
143 |         
144 |         ##TODO: 
145 |         # Need a function to  quite,send vector and  insert new based key like quit,send,insert (provide datalocation)
146 |     from bayesrag.utils import wait_for_commands
147 |     while True:
148 |             command = wait_for_commands()
149 |             if command == 'quit':
150 |                 break
151 |             elif command == 'send':
152 |                 scroll_result=qclient.scroll(collection_name=collections,with_vectors=True) 
153 |                 client.send_vector(scroll_result)
154 |             elif command.startswith('insert '):
155 |                 data_location = command.split(' ', 1)[1]
156 |                 client.insert_new_data(data_location)
157 | 
158 |     client.stop()
159 |     logger.info("MQTT client stopped.")
160 | 
161 | 


--------------------------------------------------------------------------------
/src/bayesrag/retriever.py:
--------------------------------------------------------------------------------
 1 | from bayesrag.embedder import get_embedding
 2 | from bayesrag.vector_db import VectorDB
 3 | # from bayesrag.config import QDRANT_COLLECTION
 4 | # from bayesrag.constant import SEND_TOPIC
 5 | import time
 6 | import queue
 7 | import uuid
 8 | from loguru import logger
 9 | 
10 | 
11 | def get_context(query):
12 |     qclient=VectorDB()
13 |     
14 |     query_embedding = get_embedding(query)
15 |     results,score = qclient.search_vector(query_embedding)
16 | 
17 |     logger.debug(f"Score: {score}")
18 |     if score ==None:
19 |         logger.info("No results found")
20 |         return
21 |     if score > 0.60:
22 |         logger.info(f"Found in local VectorDb as score is higher that 60% .Score {score}")
23 |         return results
24 |    
25 |     else:
26 |         logger.debug("No relevant context found in local VectorDB,Ask LLM instead.")
27 |         return None  # TODO: return appropriate message or None for no relevant context found in local VectorDB
28 | 
29 |     
30 |     # return qclient.search_vector(query_embedding)
31 | 
32 | 
33 | # def get_Relavant_Context_from_network(query,client,REPLAY_TOPIC,collection_name):
34 | 
35 | #     query_embedding = get_embedding(query)
36 | #     qclient=VectorDB(collection_name)
37 | 
38 | #     results = qclient.search(
39 | #         collection_name=collection_name,
40 | #         query_vector=query_embedding,
41 | #         limit=1,
42 | #     )
43 |     
44 | #     logger.debug("Score: ", results[0].score)
45 | #     if results[0].score > 0.001:
46 | #         logger.debug("No relevant context found in local VectorDB, sending query to network")
47 | #         data = {
48 | #             "replay_topic": REPLAY_TOPIC,
49 | #             "query": query,
50 | #         }
51 | #         # TODO: send request to other node to get relevant information 
52 | #         client.send_message(SEND_TOPIC, data)
53 | #         count=0  
54 | #         reply=None
55 | #         # Check the reply queue for a response
56 | #         while count<3:
57 | #             try:
58 | #                 reply = client.reply_queue.get_nowait()# Wait for a reply for 10 seconds
59 | #                 logger.critical(f"Received reply from another node: {reply}")
60 | #                 if reply!=None:
61 | #                     return reply
62 | #                 # Process the reply as needed
63 | #             except queue.Empty:
64 | #                 logger.warning("No reply received in the last 10 seconds")
65 | #                 count+=1
66 | #                 time.sleep(10)
67 |         
68 |         
69 | #         return reply  # TODO: return appropriate message or None for no relevant context found in local VectorDB
70 |     
71 | #     else:
72 | #         print("Found in local VectorDb")
73 | #         return results[0].payload["data"], results[0].score
74 |     
75 | 
76 | if __name__ == '__main__':
77 |     user_query = input("Enter your query or type 'q' to quit: ")
78 |     from bayesrag.mq import Mqttclient 
79 |     ID=uuid.uuid4()
80 |     REPLAY_TOPIC = f"USER_TOPIC-{ID}"
81 |     collection_name=f"law_docs_global"
82 |     client = Mqttclient(collection_name=collection_name,replyTopic=REPLAY_TOPIC)
83 |     while user_query.lower() != "q":
84 |         relevant_context = get_context(user_query,collection_name)
85 |         user_query = input("\nEnter your query or type 'q' to quit: ")
86 | 
87 | 


--------------------------------------------------------------------------------
/src/bayesrag/text_splitter.py:
--------------------------------------------------------------------------------
 1 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 2 | 
 3 | def split_texts(documents):
 4 |     text_splitter = RecursiveCharacterTextSplitter(
 5 |         chunk_size=2000,
 6 |         chunk_overlap=100,
 7 |         length_function=len,
 8 |         is_separator_regex=False,
 9 |     )
10 |     texts = text_splitter.split_documents(documents)
11 |     return texts
12 | 


--------------------------------------------------------------------------------
/src/bayesrag/utils.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from loguru import logger
 3 | class ClassificationResult(Enum):
 4 |     """
 5 |     a ClassificationResult enum.
 6 | 
 7 |     Returns:
 8 |         ClassificationResult.YES if the query is classified as a law-related question, 
 9 |         ClassificationResult.NO if it's not a law-related question, 
10 |         ClassificationResult.ERROR if there's an error parsing the response.
11 |     """
12 |     
13 |     YES = "yes"
14 |     NO = "no"
15 |     ERROR = "error"
16 | 
17 | def display_commands():
18 |     logger.info("Available commands:")
19 |     print("1. 'quit' - Exit the application ")
20 |     print("2. 'query' - Query from vector with LLM")
21 |     print("3. 'send' - Send vector data")
22 |     print("4. 'insert <data_location>' - Insert new data from the specified location")
23 | 
24 | def wait_for_commands():
25 |     
26 |     
27 |     while True:
28 |         display_commands()
29 |         command = input("\nEnter your command: ").strip().lower()
30 |         
31 |         if command == 'quit':
32 |             return 'quit'
33 |         elif command == 'query':
34 |             return 'query'
35 |         elif command == 'send':
36 |             return 'send'
37 |         elif command.startswith('insert '):
38 |             return command
39 |         else:
40 |             logger.warning("Invalid command. Please enter a valid command.")


--------------------------------------------------------------------------------
/src/bayesrag/vector_db.py:
--------------------------------------------------------------------------------
 1 | from qdrant_client import QdrantClient, models
 2 | from bayesrag.embedder import get_embedding
 3 | from loguru import logger
 4 | from bayesrag.config import QDRANT_HOST,QDRANT_COLLECTION
 5 | class VectorDB:
 6 |     def __init__(self, collection_name=QDRANT_COLLECTION, qdrant_host=QDRANT_HOST):
 7 |         self.collection_name = collection_name
 8 |         self.qclient = QdrantClient(url=qdrant_host)
 9 | 
10 |     def create_db(self):
11 |         if self.qclient.collection_exists(collection_name=self.collection_name):
12 |             logger.debug(f"Vector DB already exists: {self.collection_name}")
13 |         else:
14 |             self.qclient.create_collection(
15 |                 collection_name=self.collection_name,
16 |                 vectors_config=models.VectorParams(size=768, distance=models.Distance.COSINE),
17 |             )
18 |             logger.info(f"Vector DB successfully created: {self.collection_name}")
19 | 
20 |     def search_vector(self, query_embedding, limit=1):
21 |         results = self.qclient.search(
22 |             collection_name=self.collection_name,
23 |             query_vector=query_embedding,
24 |             limit=limit,
25 |         )
26 |         if results:
27 |             top_result = results[0]
28 |             return top_result.payload["data"], top_result.score
29 |         else:
30 |             logger.warning("No results found")
31 |             return None, None
32 | 
33 |     def upsert_embeddings(self, chunks):
34 |         logger.info("Upserting embeddings into Vector DB...")
35 |         for i, chunk in enumerate(chunks):
36 |             embedding = get_embedding(chunk.page_content)
37 |             self.qclient.upsert(
38 |                 collection_name=self.collection_name,
39 |                 points=[
40 |                     models.PointStruct(
41 |                         id=i, vector=embedding, payload={"data": chunk.page_content, "metadata": chunk.metadata}
42 |                     ),
43 |                 ],
44 |             )
45 |         logger.info("Embeddings created successfully")
46 | 
47 |     def merge_embeddings(self, source_points):
48 |         for point in source_points:
49 |             self.qclient.upsert(
50 |                 collection_name=self.collection_name,
51 |                 points=[
52 |                     models.PointStruct(
53 |                         id=point.id,
54 |                         vector=point.vector,
55 |                         payload=point.payload
56 |                     ),
57 |                 ],
58 |             )
59 |         logger.info("Merging embeddings successfully")
60 | 


--------------------------------------------------------------------------------