├── .dockerignore ├── .gitattributes ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── app.py ├── config └── config.yaml ├── contract ├── README.md ├── foundry.toml ├── script │ └── deploy.s.sol ├── src │ └── GlobalVectorManager.sol └── test │ └── GlobalVectorManager.t.sol ├── dev.sh ├── doc ├── drags.JPG ├── fRag.drawio ├── system.drawio └── systemDesign.drawio ├── docker-compose.yml ├── main.py ├── requirements.txt ├── research ├── .deepeval-cache.json ├── Evulate.ipynb ├── RAGAS.ipynb ├── communication │ ├── ipfs.ipynb │ ├── kafka │ │ ├── consumer.py │ │ ├── docker-compose.yml │ │ └── producer.py │ └── mqtt │ │ ├── receive.py │ │ └── send.py ├── groq.ipynb ├── localRag.ipynb ├── ollama.ipynb ├── retriver.ipynb └── utils.ipynb ├── run.sh ├── setup.py └── src └── bayesrag ├── __init__.py ├── config.py ├── constant.py ├── data_loader.py ├── embedder.py ├── evaluator.py ├── generator.py ├── ipfs.py ├── llmEvaluator.py ├── mq.py ├── retriever.py ├── text_splitter.py ├── utils.py └── vector_db.py /.dockerignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | artifacts/* 163 | 164 | logs 165 | 166 | qdrant_storage;C 167 | 168 | *.png 169 | 170 | data*/ 171 | 172 | tut/ 173 | 174 | research/ 175 | 176 | doc/ 177 | 178 | artifacts/* 179 | 180 | logs 181 | 182 | qdrant_* 183 | 184 | *.png 185 | 186 | data*/ 187 | 188 | tut/ 189 | 190 | downloaded_files 191 | zipdownloaded_files 192 | 193 | ipfs_data 194 | ipfs_staging -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | artifacts/* 163 | 164 | logs 165 | 166 | qdrant_* 167 | 168 | *.png 169 | 170 | data*/ 171 | 172 | tut/ 173 | 174 | downloaded_files 175 | zipdownloaded_files 176 | 177 | ipfs_data 178 | ipfs_staging -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official Python 3.10 slim image as a base image 2 | FROM python:3.10-slim 3 | 4 | # Set environment variables for better build caching 5 | ENV PYTHONDONTWRITEBYTECODE 1 6 | ENV PYTHONUNBUF 1 7 | 8 | WORKDIR /app 9 | 10 | # Copy only the requirements.txt file first 11 | COPY requirements.txt /app/ 12 | 13 | # Install Python dependencies if requirements.txt has changed 14 | RUN pip install --no-cache-dir -r requirements.txt 15 | 16 | # Copy the rest of the project files 17 | COPY . /app 18 | 19 | # Build the package 20 | RUN python setup.py sdist bdist_wheel 21 | RUN pip install --no-cache-dir . 22 | 23 | # Expose the application port 24 | EXPOSE 8000 25 | 26 | # Define data directory and default node type 27 | ENV DATA_DIR=/data 28 | ENV NODETYPE=admin 29 | ENV QDRANT_HOST=http://localhost:6333 30 | 31 | # Command to run the applicatio 32 | CMD ["python", "main.py", "--data-dir", "$DATA_DIR", "--nodetype", "$NODETYPE"] 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 TraqueAi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Traque Ai powered by RAG 2 | ![RAQ (1)](https://github.com/user-attachments/assets/afc2009b-2c90-4c17-8a93-dc5b7eb79e5e) 3 | 4 | 5 | 6 | [Website](https://traque.cc) | [Twitter](https://twitter.com/TraqueAi) 7 | 8 | 9 | ## Features 10 | 11 | 12 | - **Decentralized RAG for Blockchain Analysis**: Uses Retrieval-Augmented Generation (RAG) to process and analyze blockchain transactions with high accuracy. 13 | - **Privacy-Preserving Storage**: Leverages IPFS for secure, decentralized data storage. 14 | - **Low-Latency Communication**: Implements MQTT for real-time data sharing between nodes. 15 | - **Immutable and Transparent Records**: Utilizes blockchain to ensure secure and verifiable transaction tracking. 16 | - **Address Clustering & Fund Flow Analysis**: Identifies laundering patterns and traces transactions across addresses. 17 | - **Graph-Based Visualization**: Displays transaction paths interactively for enhanced clarity. 18 | - **AI-Driven Anomaly Detection**: Automatically flags suspicious transactions using machine learning. 19 | - **Scalable & Collaborative Learning**: Nodes contribute knowledge to improve global blockchain analysis. 20 | 21 | ## Key Technologies 22 | 23 | - **IPFS**: Decentralized storage for secure and censorship-resistant data management. 24 | - **MQTT**: Lightweight protocol for fast, efficient communication between nodes. 25 | - **Blockchain**: Immutable ledger for recording analysis results and rewarding contributions. 26 | - **Qdrant**: Vector database for high-speed, scalable similarity search and retrieval. 27 | 28 | ## Setup and Installation 29 | 30 | ### Clone and Build 31 | 32 | ```sh 33 | git clone https://github.com/TraqueAi/Traque-AI 34 | cd Traque-AI 35 | ``` 36 | 37 | Build the package: 38 | 39 | ```sh 40 | python setup.py sdist bdist_wheel 41 | pip install . 42 | ``` 43 | 44 | ### Running in Docker Container 45 | 46 | Run all services: 47 | 48 | ```sh 49 | docker compose up -d 50 | ``` 51 | 52 | Download the model and start the application: 53 | 54 | ```sh 55 | bash run.sh 56 | ``` 57 | 58 | ### Setting up Qdrant (Vector Database) 59 | 60 | Download Qdrant Image: 61 | 62 | ```sh 63 | docker pull qdrant/qdrant 64 | ``` 65 | 66 | Run Qdrant: 67 | 68 | ```sh 69 | docker run -d -p 6333:6333 -p 6334:6334 \ 70 | -v ./qdrant_data:/qdrant/storage \ 71 | qdrant/qdrant 72 | ``` 73 | 74 | For Windows: 75 | 76 | ```sh 77 | docker run -d --name qdrant_container -p 6333:6333 -p 6334:6334 \ 78 | -v C:/path/to/qdrant_data:/qdrant/storage \ 79 | qdrant/qdrant:latest 80 | ``` 81 | 82 | ### Setting up Dependencies 83 | 84 | **Ollama**: Install Ollama by following the instructions on Ollama's website. 85 | 86 | **Language Model**: Pull a language model from the Ollama library: 87 | 88 | ```sh 89 | ollama pull llama3:8b 90 | ``` 91 | 92 | **Text Embedding Model**: 93 | 94 | ```sh 95 | ollama pull nomic-embed-text:latest 96 | ``` 97 | 98 | Install required Python libraries: 99 | 100 | ```sh 101 | pip install -r requirements.txt 102 | ``` 103 | 104 | ## Running the Application 105 | 106 | ### Configure Node Type 107 | 108 | Edit `main.py` to specify the node type: 109 | 110 | - **admin**: Institute Node (manages the global embedding) 111 | - **data**: Data Node (contributes specialized knowledge) 112 | 113 | Start the application: 114 | 115 | ```sh 116 | python main.py --data-dir data --nodetype admin 117 | ``` 118 | 119 | Replace `data` with the desired data directory. Set `--nodetype` to either `admin` or `data`. 120 | 121 | --- 122 | 123 | For more details, check the [Docs](https://github.com/TraqueAi/TraqueAI). 124 | 125 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from bayesrag.config import DATA_DIR, QDRANT_COLLECTION,ID 3 | from bayesrag.data_loader import load_directory_pdf 4 | from bayesrag.text_splitter import split_texts 5 | from bayesrag.vector_db import VectorDB 6 | from bayesrag.retriever import get_context 7 | from bayesrag.generator import generate_response 8 | from bayesrag.mq import Mqttclient 9 | from qdrant_client import QdrantClient 10 | import warnings 11 | from cryptography.utils import CryptographyDeprecationWarning 12 | 13 | warnings.filterwarnings("ignore", category=CryptographyDeprecationWarning, message="ARC4 has been moved to cryptography.hazmat.decrepit.ciphers.algorithms.ARC4 and will be removed from this module in 48.0.0.") 14 | 15 | # Initialize session state 16 | if 'messages' not in st.session_state: 17 | st.session_state.messages = [] 18 | vectorDb=VectorDB(QDRANT_COLLECTION) 19 | 20 | # Create vector database and upsert embeddings 21 | vectorDb.create_db() 22 | def insert_data(file): 23 | with open(DATA_DIR / file.name, "wb") as f: 24 | f.write(file.getbuffer()) 25 | 26 | # Load and split documents 27 | documents = load_directory_pdf(DATA_DIR) 28 | text_chunks = split_texts(documents) 29 | vectorDb=VectorDB(QDRANT_COLLECTION) 30 | 31 | # Create vector database and upsert embeddings 32 | vectorDb.upsert_embeddings(text_chunks) 33 | 34 | def main(): 35 | st.title("Lawyer-Based Chatbot") 36 | 37 | # Sidebar for file upload and actions 38 | st.sidebar.title("Upload Document") 39 | uploaded_file = st.sidebar.file_uploader("Choose a PDF file", type="pdf") 40 | 41 | if uploaded_file is not None: 42 | insert_data(uploaded_file) 43 | st.sidebar.success("Document uploaded and processed successfully!") 44 | 45 | # Sidebar actions 46 | with st.sidebar.expander("Actions", expanded=True): 47 | if st.button("Send Vector"): 48 | qclient = QdrantClient(url="http://localhost:6333") # Update URL if needed 49 | client = Mqttclient(replyTopic=f"USER_TOPIC-{ID}", isAdmin=False) 50 | scroll_result = qclient.scroll(collection_name=QDRANT_COLLECTION, with_vectors=True) 51 | client.send_vector(scroll_result) 52 | st.sidebar.success("Vector sent successfully.") 53 | 54 | st.header("Chat with your documents") 55 | 56 | for message in st.session_state.messages: 57 | with st.chat_message(message["role"]): 58 | st.write(message["content"]) 59 | 60 | # Chat input 61 | user_input = st.chat_input("Type your question here...") 62 | 63 | if user_input: 64 | st.session_state.messages.append({"role": "user", "content": user_input}) 65 | with st.chat_message("user"): 66 | st.write(user_input) 67 | 68 | result = get_context(user_input) 69 | 70 | with st.chat_message("assistant"): 71 | response_container = st.empty() 72 | response_text = "" 73 | for response_part in generate_response(user_input, result): 74 | response_text += response_part 75 | response_container.markdown(response_text) 76 | 77 | st.session_state.messages.append({"role": "assistant", "content": response_text}) 78 | 79 | if __name__ == "__main__": 80 | main() 81 | -------------------------------------------------------------------------------- /config/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TraqueAi/TraqueAI/57c030675723c3f3bb5096dfd0480b53f5f76e8c/config/config.yaml -------------------------------------------------------------------------------- /contract/README.md: -------------------------------------------------------------------------------- 1 | ## Foundry 2 | 3 | **Foundry is a blazing fast, portable and modular toolkit for Ethereum application development written in Rust.** 4 | 5 | Foundry consists of: 6 | 7 | - **Forge**: Ethereum testing framework (like Truffle, Hardhat and DappTools). 8 | - **Cast**: Swiss army knife for interacting with EVM smart contracts, sending transactions and getting chain data. 9 | - **Anvil**: Local Ethereum node, akin to Ganache, Hardhat Network. 10 | - **Chisel**: Fast, utilitarian, and verbose solidity REPL. 11 | 12 | ## Documentation 13 | 14 | https://book.getfoundry.sh/ 15 | 16 | ## Usage 17 | 18 | ### Build 19 | 20 | ```shell 21 | $ forge build 22 | ``` 23 | 24 | ### Test 25 | 26 | ```shell 27 | $ forge test 28 | ``` 29 | 30 | ### Format 31 | 32 | ```shell 33 | $ forge fmt 34 | ``` 35 | 36 | ### Gas Snapshots 37 | 38 | ```shell 39 | $ forge snapshot 40 | ``` 41 | 42 | ### Anvil 43 | 44 | ```shell 45 | $ anvil 46 | ``` 47 | 48 | ### Deploy 49 | 50 | ```shell 51 | $ forge script script/Counter.s.sol:CounterScript --rpc-url --private-key 52 | ``` 53 | 54 | ### Cast 55 | 56 | ```shell 57 | $ cast 58 | ``` 59 | 60 | ### Help 61 | 62 | ```shell 63 | $ forge --help 64 | $ anvil --help 65 | $ cast --help 66 | ``` 67 | -------------------------------------------------------------------------------- /contract/foundry.toml: -------------------------------------------------------------------------------- 1 | [profile.default] 2 | src = "src" 3 | out = "out" 4 | libs = ["lib"] 5 | 6 | # See more config options https://github.com/foundry-rs/foundry/blob/master/crates/config/README.md#all-options 7 | -------------------------------------------------------------------------------- /contract/script/deploy.s.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | pragma solidity ^0.8.0; 3 | 4 | import "forge-std/Script.sol"; 5 | import "../src/GlobalVectorManager.sol"; 6 | 7 | contract Deploy is Script { 8 | function run() external { 9 | vm.startBroadcast(); 10 | new GlobalVectorManager(); 11 | vm.stopBroadcast(); 12 | } 13 | } -------------------------------------------------------------------------------- /contract/src/GlobalVectorManager.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | pragma solidity ^0.8.0; 3 | 4 | contract GlobalVectorManager { 5 | struct VectorData { 6 | string ipfsHash; 7 | address uploader; 8 | uint256 timestamp; 9 | bool verified; 10 | } 11 | 12 | mapping(uint256 => VectorData) public vectors; 13 | uint256 public vectorCount; 14 | address public admin; 15 | mapping(address => bool) public isAdmin; 16 | mapping(address => bool) public isDataNode; 17 | address[] public dataNodes; 18 | address[] public admins; 19 | 20 | event VectorUploaded(uint256 indexed vectorId, string ipfsHash, address indexed uploader, uint256 timestamp); 21 | event VectorVerified(uint256 indexed vectorId, bool verified); 22 | event AdminJoined(address indexed admin); 23 | event DataNodeJoined(address indexed dataNode); 24 | event IncentivePaid(address indexed dataNode, uint256 amount); 25 | 26 | modifier onlyAdmin() { 27 | require(isAdmin[msg.sender], "Not an admin"); 28 | _; 29 | } 30 | 31 | constructor() { 32 | admin = msg.sender; 33 | isAdmin[admin] = true; 34 | admins.push(admin); 35 | emit AdminJoined(admin); 36 | } 37 | 38 | function joinAsDataNode() public { 39 | require(!isDataNode[msg.sender], "Already a data node"); 40 | isDataNode[msg.sender] = true; 41 | dataNodes.push(msg.sender); 42 | emit DataNodeJoined(msg.sender); 43 | } 44 | 45 | function uploadVector(string memory ipfsHash) public { 46 | require(isDataNode[msg.sender], "Not a data node"); 47 | vectorCount++; 48 | vectors[vectorCount] = VectorData(ipfsHash, msg.sender, block.timestamp, false); 49 | emit VectorUploaded(vectorCount, ipfsHash, msg.sender, block.timestamp); 50 | } 51 | 52 | function joinAsAdmin() public { 53 | require(!isAdmin[msg.sender], "Already an admin"); 54 | isAdmin[msg.sender] = true; 55 | admins.push(msg.sender); 56 | emit AdminJoined(msg.sender); 57 | } 58 | 59 | function verifyVector(uint256 vectorId, bool isVerified) public onlyAdmin { 60 | VectorData storage vector = vectors[vectorId]; 61 | vector.verified = isVerified; 62 | emit VectorVerified(vectorId, isVerified); 63 | 64 | if (isVerified) { 65 | uint256 adminCount = 0; 66 | for (uint256 i = 0; i < admins.length; i++) { 67 | if (isAdmin[admins[i]]) { 68 | adminCount++; 69 | } 70 | } 71 | if (adminCount * 2 >= admins.length) { 72 | // Merge vector logic here 73 | // Incentive logic for data node 74 | emit IncentivePaid(vector.uploader, 1 ether); // Example incentive 75 | } 76 | } 77 | } 78 | 79 | function getVector(uint256 vectorId) public view returns (string memory, address, uint256, bool) { 80 | VectorData memory vector = vectors[vectorId]; 81 | return (vector.ipfsHash, vector.uploader, vector.timestamp, vector.verified); 82 | } 83 | 84 | function leaveContract() public { 85 | require(isDataNode[msg.sender] || isAdmin[msg.sender], "Not a member"); 86 | if (isDataNode[msg.sender]) { 87 | isDataNode[msg.sender] = false; 88 | // Remove from dataNodes array logic here 89 | } else { 90 | isAdmin[msg.sender] = false; 91 | // Remove from admins array logic here 92 | } 93 | } 94 | } -------------------------------------------------------------------------------- /contract/test/GlobalVectorManager.t.sol: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | pragma solidity ^0.8.0; 3 | 4 | import "forge-std/Test.sol"; 5 | import "../src/GlobalVectorManager.sol"; 6 | 7 | contract GlobalVectorManagerTest is Test { 8 | GlobalVectorManager public manager; 9 | address public admin; 10 | address public dataNode; 11 | 12 | function setUp() public { 13 | admin = address(this); 14 | dataNode = address(0x2); 15 | manager = new GlobalVectorManager(); 16 | 17 | // Admin joins the contract 18 | manager.joinAsAdmin(); 19 | 20 | // Data node joins the contract 21 | vm.prank(dataNode); 22 | manager.joinAsDataNode(); 23 | } 24 | 25 | function testUploadVectorByDataNode() public { 26 | // Data node uploads a vector 27 | vm.prank(dataNode); 28 | manager.uploadVector("QmHashDataNode"); 29 | 30 | (string memory ipfsHash, address uploader, uint256 timestamp, bool verified) = manager.getVector(1); 31 | assertEq(ipfsHash, "QmHashDataNode"); 32 | assertEq(uploader, dataNode); 33 | assertTrue(timestamp > 0); 34 | assertFalse(verified); 35 | } 36 | 37 | function testVerifyVectorByAdmin() public { 38 | // Data node uploads a vector 39 | vm.prank(dataNode); 40 | manager.uploadVector("QmHashDataNode"); 41 | 42 | // Admin verifies the vector 43 | manager.verifyVector(1, true); 44 | (, , , bool isVerified) = manager.getVector(1); 45 | assertTrue(isVerified); 46 | } 47 | 48 | function testIncentivePaidToDataNode() public { 49 | // Data node uploads a vector 50 | vm.prank(dataNode); 51 | manager.uploadVector("QmHashDataNode"); 52 | 53 | // Admin verifies the vector 54 | manager.verifyVector(1, true); 55 | 56 | // Check if incentive was paid (this would require additional logic to track incentives) 57 | // For example, you could add a mapping to track incentives in the contract and assert here. 58 | } 59 | } -------------------------------------------------------------------------------- /dev.sh: -------------------------------------------------------------------------------- 1 | 2 | docker run -d -p 6333:6333 -p 6334:6334 -v C:/Users/faiza/Music/llmResearch/rag/qdrant_data:/qdrant/storage qdrant/qdrant:latest 3 | 4 | ipfs daemon 5 | -------------------------------------------------------------------------------- /doc/drags.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TraqueAi/TraqueAI/57c030675723c3f3bb5096dfd0480b53f5f76e8c/doc/drags.JPG -------------------------------------------------------------------------------- /doc/system.drawio: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | -------------------------------------------------------------------------------- /doc/systemDesign.drawio: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | qdrant: 3 | image: qdrant/qdrant:latest 4 | container_name: qdrantt 5 | ports: 6 | - "6333:6333" 7 | - "6334:6334" 8 | volumes: 9 | - ./qdrant_storage:/qdrant/storage 10 | networks: 11 | - app-network 12 | 13 | ollamaa: 14 | image: ollama/ollama:latest 15 | container_name: ollamaa 16 | volumes: 17 | - ollama:/root/.ollama 18 | ports: 19 | - "11434:11434" 20 | networks: 21 | - app-network 22 | entrypoint: ["bash","run.sh"] 23 | 24 | bayesrag: 25 | build: . 26 | container_name: bayesrag_cont 27 | stdin_open: true 28 | tty: true 29 | depends_on: 30 | - qdrant 31 | - ollamaa 32 | environment: 33 | - DATA_DIR=/data 34 | - NODETYPE=admin 35 | - QDRANT_HOST=http://qdrant:6333 36 | volumes: 37 | - ./data:/data 38 | networks: 39 | - app-network 40 | command: ["python", "main.py", "--data-dir", "/data", "--nodetype", "admin"] 41 | 42 | volumes: 43 | ollama: 44 | 45 | networks: 46 | app-network: 47 | driver: bridge 48 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | from bayesrag.data_loader import load_directory_pdf 4 | from bayesrag.text_splitter import split_texts 5 | from bayesrag.vector_db import VectorDB 6 | from bayesrag.retriever import get_context 7 | from bayesrag.generator import generate_response,classify_query 8 | from bayesrag.utils import ClassificationResult 9 | from bayesrag.evaluator import deepEvalutor 10 | from bayesrag.config import QDRANT_HOST,QDRANT_COLLECTION,REPLAY_TOPIC 11 | 12 | from bayesrag.mq import Mqttclient 13 | from qdrant_client import QdrantClient 14 | from bayesrag.utils import wait_for_commands 15 | from loguru import logger 16 | 17 | 18 | 19 | 20 | 21 | logger.debug(f"QDRANT_HOST: {QDRANT_HOST}", ) 22 | 23 | def query(): 24 | user_query = input("Enter your query or type 'q' to quit: ") 25 | 26 | while user_query.lower() != "q": 27 | relevant_context = get_context(user_query) 28 | print("-" * 100) 29 | # TODO 30 | # Query classification 31 | # result = classify_query(user_query) 32 | 33 | # if result == ClassificationResult.YES: 34 | # relevant_context = get_context(user_query,collection_name=QDRANT_COLLECTION) 35 | 36 | # print("-" * 100) 37 | # else: 38 | # relevant_context = None 39 | response_text = "" 40 | for text in generate_response(user_query, relevant_context): 41 | print(text, end=" ") 42 | response_text += text 43 | # Prompt the user to decide if evaluation is needed 44 | is_evaluator = input("\nDo you need evaluation? (Y/N): ").strip().lower() 45 | 46 | if is_evaluator == 'y': 47 | evaluator: dict = deepEvalutor(user_query,response_text,[relevant_context]) # Assuming this function returns a dictionary with score and reason 48 | print(f"Score: {evaluator['score']}\nReason: {evaluator['reason']}") 49 | elif is_evaluator == 'n': 50 | print("Evaluation skipped.") 51 | else: 52 | print("Invalid input. Please enter 'Y' or 'N'.") 53 | user_query = input("\nEnter your query or type 'q' to quit: ") 54 | 55 | def insertData(DATA_DIR,QDRANT_COLLECTION): 56 | # Load and split documents 57 | documents = load_directory_pdf(DATA_DIR) 58 | text_chunks = split_texts(documents) 59 | 60 | # Create vector database and upsert embeddings 61 | vectorDb=VectorDB(QDRANT_COLLECTION) 62 | vectorDb.create_db() 63 | vectorDb.upsert_embeddings(text_chunks) 64 | 65 | 66 | def main(): 67 | parser = argparse.ArgumentParser(description="Run the BayesRAG query system.") 68 | parser.add_argument('--data-dir', type=str, help="Directory containing the PDF documents.") 69 | parser.add_argument("--nodetype",type=str,help="Node Type") 70 | args = parser.parse_args() 71 | 72 | 73 | if args.nodetype is not None and args.nodetype.lower() == "admin": 74 | client=Mqttclient(replyTopic=REPLAY_TOPIC,isAdmin=True) 75 | else: 76 | client=Mqttclient(replyTopic=REPLAY_TOPIC,isAdmin=False) 77 | 78 | qclient = QdrantClient(url=QDRANT_HOST) 79 | if args.data_dir: 80 | insertData(args.data_dir,QDRANT_COLLECTION) 81 | logger.info("PreProcess completed successfully.") 82 | 83 | while True: 84 | command = wait_for_commands() 85 | if command == 'quit': 86 | logger.warning('Quitting') 87 | break 88 | 89 | elif command =='query': 90 | query() 91 | 92 | elif command == 'send': 93 | scroll_result=qclient.scroll(collection_name=QDRANT_COLLECTION,with_vectors=True) 94 | client.send_vector(scroll_result) 95 | elif command.startswith('insert '): 96 | data_location = command.split(' ', 1)[1] 97 | insertData(data_location,QDRANT_COLLECTION) 98 | logger.info("insertData completed successfully.") 99 | 100 | 101 | client.stop() 102 | logger.info("MQTT client stopped.") 103 | 104 | if __name__ == "__main__": 105 | main() 106 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.2.15 2 | paho-mqtt==2.1.0 3 | openai==1.42.0 4 | qdrant-client==1.11.1 5 | loguru==0.7.2 6 | python-dotenv==1.0.1 7 | ragas 8 | datasets==2.21.0 9 | ollama==0.3.2 10 | ipfshttpclient==0.7.0 11 | pypdf==4.3.1 12 | deepeval -------------------------------------------------------------------------------- /research/.deepeval-cache.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_cases_lookup_map": { 3 | "{\"actual_output\": \"We offer a 30-day full refund at no extra cost.\", \"context\": null, \"expected_output\": null, \"hyperparameters\": null, \"input\": \"What if these shoes don't fit?\", \"retrieval_context\": null}": { 4 | "cached_metrics_data": [ 5 | { 6 | "metric_metadata": { 7 | "metric": "Answer Relevancy", 8 | "threshold": 0.7, 9 | "success": true, 10 | "score": 1.0, 11 | "reason": "The score is 1.00 because the response perfectly addressed the query without any irrelevant information.", 12 | "strictMode": false, 13 | "evaluationModel": "gpt-4", 14 | "evaluationCost": 0, 15 | "verboseLogs": "Statements:\n[\n \"We offer a 30-day full refund at no extra cost.\"\n]\n\nVerdicts:\n[\n {\n \"verdict\": \"yes\",\n \"reason\": null\n }\n]" 16 | }, 17 | "metric_configuration": { 18 | "threshold": 0.7, 19 | "evaluation_model": "gpt-4", 20 | "strict_mode": false, 21 | "include_reason": true 22 | } 23 | } 24 | ] 25 | }, 26 | "{\"actual_output\": \"We offer a 30-day full refund at no extra cost.\", \"context\": null, \"expected_output\": \"We offer a 40-day full refund at no extra cost\", \"hyperparameters\": null, \"input\": \"What if these shoes don't fit?\", \"retrieval_context\": null}": { 27 | "cached_metrics_data": [ 28 | { 29 | "metric_metadata": { 30 | "metric": "Answer Relevancy", 31 | "threshold": 0.7, 32 | "success": true, 33 | "score": 1.0, 34 | "reason": "The score is 1.00 because the response accurately addresses the concern about the fit of the shoes.", 35 | "strictMode": false, 36 | "evaluationModel": "gpt-4", 37 | "evaluationCost": 0, 38 | "verboseLogs": "Statements:\n[\n \"We offer a 30-day full refund at no extra cost\"\n]\n\nVerdicts:\n[\n {\n \"verdict\": \"yes\",\n \"reason\": null\n }\n]" 39 | }, 40 | "metric_configuration": { 41 | "threshold": 0.7, 42 | "evaluation_model": "gpt-4", 43 | "strict_mode": false, 44 | "include_reason": true 45 | } 46 | } 47 | ] 48 | }, 49 | "{\"actual_output\": \"We offer a 30-day full refund at no extra cost.\", \"context\": [\"We offer a 40-day full refund at no extra cost\"], \"expected_output\": null, \"hyperparameters\": null, \"input\": \"What if these shoes don't fit?\", \"retrieval_context\": null}": { 50 | "cached_metrics_data": [ 51 | { 52 | "metric_metadata": { 53 | "metric": "Answer Relevancy", 54 | "threshold": 0.7, 55 | "success": true, 56 | "score": 1.0, 57 | "reason": "The score is 1.00 because the response completely and accurately addressed the concern raised in the question.", 58 | "strictMode": false, 59 | "evaluationModel": "gpt-4", 60 | "evaluationCost": 0, 61 | "verboseLogs": "Statements:\n[\n \"We offer a 30-day full refund at no extra cost.\"\n]\n\nVerdicts:\n[\n {\n \"verdict\": \"yes\",\n \"reason\": null\n }\n]" 62 | }, 63 | "metric_configuration": { 64 | "threshold": 0.7, 65 | "evaluation_model": "gpt-4", 66 | "strict_mode": false, 67 | "include_reason": true 68 | } 69 | } 70 | ] 71 | }, 72 | "{\"actual_output\": \"We offer a 30-day full refund at no extra cost.\", \"context\": null, \"expected_output\": null, \"hyperparameters\": null, \"input\": \"What if these shoes don't fit?\", \"retrieval_context\": [\"We offer a 40-day full refund at no extra cost\"]}": { 73 | "cached_metrics_data": [ 74 | { 75 | "metric_metadata": { 76 | "metric": "Answer Relevancy", 77 | "threshold": 0.7, 78 | "success": false, 79 | "score": 0.0, 80 | "reason": "The score is 0.00 because the actual output provided statements that were completely unrelated to the input question, discussing shoe refunds instead of addressing the issue of poorly fitting shoes.", 81 | "strictMode": false, 82 | "evaluationModel": "lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF", 83 | "evaluationCost": 0, 84 | "verboseLogs": "Statements:\n[\n \"We offer a 30-day full refund\",\n \"at no extra cost\"\n]\n\nVerdicts:\n[\n {\n \"verdict\": \"no\",\n \"reason\": \"The statements provided are about shoe refunds and are not related to the input question, which asks about what to do if shoes don't fit.\"\n }\n]" 85 | }, 86 | "metric_configuration": { 87 | "threshold": 0.7, 88 | "evaluation_model": "lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF", 89 | "strict_mode": false, 90 | "include_reason": true 91 | } 92 | } 93 | ] 94 | }, 95 | "{\"actual_output\": \"We offer a 300-day full refund at no extra cost.\", \"context\": null, \"expected_output\": \"You are eligible for a 30 day full refund at no extra cost.\", \"hyperparameters\": null, \"input\": \"What if these shoes don't fit?\", \"retrieval_context\": [\"All customers are eligible for a 30 day full refund at no extra cost.\"]}": { 96 | "cached_metrics_data": [ 97 | { 98 | "metric_metadata": { 99 | "metric": "Contextual Precision", 100 | "threshold": 0.7, 101 | "success": true, 102 | "score": 1.0, 103 | "reason": "The score is 1.00 because the only node in the retrieval context directly addresses the user's concern about shoes not fitting, hence its high relevance and top ranking.", 104 | "strictMode": false, 105 | "evaluationModel": "gpt-4", 106 | "evaluationCost": 0, 107 | "verboseLogs": "Verdicts:\n[\n {\n \"verdict\": \"yes\",\n \"reason\": \"The text 'All customers are eligible for a 30 day full refund at no extra cost.' directly addresses the concern about shoes not fitting.\"\n }\n]" 108 | }, 109 | "metric_configuration": { 110 | "threshold": 0.7, 111 | "evaluation_model": "gpt-4", 112 | "strict_mode": false, 113 | "include_reason": true 114 | } 115 | } 116 | ] 117 | } 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /research/RAGAS.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "\n", 11 | "os.environ[\"OPENAI_API_KEY\"]=\"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\"" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "application/vnd.jupyter.widget-view+json": { 29 | "model_id": "088582f0c86c42d28f180e08019fb5c6", 30 | "version_major": 2, 31 | "version_minor": 0 32 | }, 33 | "text/plain": [ 34 | "Evaluating: 0%| | 0/2 [00:00\n", 44 | "\n", 57 | "\n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | "
questionanswercontextsfaithfulness
0When was the first super bowl?The first superbowl was held on Jan 15, 1967[The First AFL–NFL World Championship Game was...0.0
1Who won the most super bowls?The most super bowls have been won by The New ...[The Green Bay Packers...Green Bay, Wisconsin....0.0
\n", 84 | "" 85 | ], 86 | "text/plain": [ 87 | " question \\\n", 88 | "0 When was the first super bowl? \n", 89 | "1 Who won the most super bowls? \n", 90 | "\n", 91 | " answer \\\n", 92 | "0 The first superbowl was held on Jan 15, 1967 \n", 93 | "1 The most super bowls have been won by The New ... \n", 94 | "\n", 95 | " contexts faithfulness \n", 96 | "0 [The First AFL–NFL World Championship Game was... 0.0 \n", 97 | "1 [The Green Bay Packers...Green Bay, Wisconsin.... 0.0 " 98 | ] 99 | }, 100 | "execution_count": 2, 101 | "metadata": {}, 102 | "output_type": "execute_result" 103 | } 104 | ], 105 | "source": [ 106 | "from datasets import Dataset \n", 107 | "from ragas.metrics import faithfulness\n", 108 | "from ragas import evaluate\n", 109 | "\n", 110 | "data_samples = {\n", 111 | " 'question': ['When was the first super bowl?', 'Who won the most super bowls?'],\n", 112 | " 'answer': ['The first superbowl was held on Jan 15, 1967', 'The most super bowls have been won by The New England Patriots'],\n", 113 | " 'contexts' : [['The First AFL–NFL World Championship Game was an American football game played on January 15, 1967, at the Los Angeles Memorial Coliseum in Los Angeles,'], \n", 114 | " ['The Green Bay Packers...Green Bay, Wisconsin.','The Packers compete...Football Conference']],\n", 115 | "}\n", 116 | "dataset = Dataset.from_dict(data_samples)\n", 117 | "score = evaluate(dataset,metrics=[faithfulness])\n", 118 | "score.to_pandas()" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 3, 124 | "metadata": {}, 125 | "outputs": [ 126 | { 127 | "data": { 128 | "application/vnd.jupyter.widget-view+json": { 129 | "model_id": "aac45b3caa9f4031b7253e47a5eaba12", 130 | "version_major": 2, 131 | "version_minor": 0 132 | }, 133 | "text/plain": [ 134 | "Evaluating: 0%| | 0/2 [00:00\n", 144 | "\n", 157 | "\n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | "
questionanswercontextsanswer_relevancy
0When was the first super bowl?The first superbowl was held on Jan 15, 1967[The First AFL–NFL World Championship Game was...0.975320
1Who won the most super bowls?The most super bowls have been won by The New ...[The Green Bay Packers...Green Bay, Wisconsin....0.943043
\n", 184 | "" 185 | ], 186 | "text/plain": [ 187 | " question \\\n", 188 | "0 When was the first super bowl? \n", 189 | "1 Who won the most super bowls? \n", 190 | "\n", 191 | " answer \\\n", 192 | "0 The first superbowl was held on Jan 15, 1967 \n", 193 | "1 The most super bowls have been won by The New ... \n", 194 | "\n", 195 | " contexts answer_relevancy \n", 196 | "0 [The First AFL–NFL World Championship Game was... 0.975320 \n", 197 | "1 [The Green Bay Packers...Green Bay, Wisconsin.... 0.943043 " 198 | ] 199 | }, 200 | "execution_count": 3, 201 | "metadata": {}, 202 | "output_type": "execute_result" 203 | } 204 | ], 205 | "source": [ 206 | "from datasets import Dataset \n", 207 | "from ragas.metrics import answer_relevancy\n", 208 | "from ragas import evaluate\n", 209 | "\n", 210 | "data_samples = {\n", 211 | " 'question': ['When was the first super bowl?', 'Who won the most super bowls?'],\n", 212 | " 'answer': ['The first superbowl was held on Jan 15, 1967', 'The most super bowls have been won by The New England Patriots'],\n", 213 | " 'contexts' : [['The First AFL–NFL World Championship Game was an American football game played on January 15, 1967, at the Los Angeles Memorial Coliseum in Los Angeles,'], \n", 214 | " ['The Green Bay Packers...Green Bay, Wisconsin.','The Packers compete...Football Conference']],\n", 215 | "}\n", 216 | "dataset = Dataset.from_dict(data_samples)\n", 217 | "score = evaluate(dataset,metrics=[answer_relevancy])\n", 218 | "score.to_pandas()" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [] 227 | } 228 | ], 229 | "metadata": { 230 | "kernelspec": { 231 | "display_name": "env", 232 | "language": "python", 233 | "name": "python3" 234 | }, 235 | "language_info": { 236 | "codemirror_mode": { 237 | "name": "ipython", 238 | "version": 3 239 | }, 240 | "file_extension": ".py", 241 | "mimetype": "text/x-python", 242 | "name": "python", 243 | "nbconvert_exporter": "python", 244 | "pygments_lexer": "ipython3", 245 | "version": "3.12.2" 246 | } 247 | }, 248 | "nbformat": 4, 249 | "nbformat_minor": 2 250 | } 251 | -------------------------------------------------------------------------------- /research/communication/kafka/consumer.py: -------------------------------------------------------------------------------- 1 | import os 2 | from kafka import KafkaConsumer 3 | 4 | KAFKA_BOOTSTRAP_SERVERS = os.environ.get("KAFKA_BOOTSTRAP_SERVERS", "localhost:29092") 5 | KAFKA_TOPIC_TEST = os.environ.get("KAFKA_TOPIC_TEST", "test") 6 | KAFKA_API_VERSION = os.environ.get("KAFKA_API_VERSION", "7.3.1") 7 | 8 | consumer = KafkaConsumer( 9 | KAFKA_TOPIC_TEST, 10 | bootstrap_servers=[KAFKA_BOOTSTRAP_SERVERS], 11 | api_version=KAFKA_API_VERSION, 12 | auto_offset_reset="earliest", 13 | enable_auto_commit=True, 14 | ) 15 | 16 | for message in consumer: 17 | print(message.value.decode("utf-8")) 18 | -------------------------------------------------------------------------------- /research/communication/kafka/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.7' 2 | services: 3 | zookeeper: 4 | image: confluentinc/cp-zookeeper:latest 5 | container_name: zookeeper 6 | environment: 7 | ZOOKEEPER_CLIENT_PORT: 2181 8 | ZOOKEEPER_TICK_TIME: 2000 9 | ports: 10 | - 22181:2181 11 | restart: on-failure 12 | kafka: 13 | image: confluentinc/cp-kafka:latest 14 | container_name: kafka 15 | depends_on: 16 | - zookeeper 17 | ports: 18 | - 29092:29092 19 | environment: 20 | KAFKA_BROKER_ID: 1 21 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 22 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092 23 | KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT 24 | KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT 25 | KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 26 | restart: on-failure 27 | -------------------------------------------------------------------------------- /research/communication/kafka/producer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import random 4 | import json 5 | from kafka import KafkaProducer 6 | 7 | KAFKA_BOOTSTRAP_SERVERS = os.environ.get("KAFKA_BOOTSTRAP_SERVERS", "localhost:29092") 8 | KAFKA_TOPIC_TEST = os.environ.get("KAFKA_TOPIC_TEST", "test") 9 | KAFKA_API_VERSION = os.environ.get("KAFKA_API_VERSION", "7.3.1") 10 | 11 | producer = KafkaProducer( 12 | bootstrap_servers=[KAFKA_BOOTSTRAP_SERVERS], 13 | api_version=KAFKA_API_VERSION, 14 | ) 15 | i = 0 16 | while i <= 30: 17 | producer.send( 18 | KAFKA_TOPIC_TEST, 19 | json.dumps({"message": f"Hello, Kafka! - test {i}"}).encode("utf-8"), 20 | ) 21 | i += 1 22 | time.sleep(random.randint(1, 5)) 23 | producer.flush() 24 | -------------------------------------------------------------------------------- /research/communication/mqtt/receive.py: -------------------------------------------------------------------------------- 1 | import pika 2 | import threading 3 | import time 4 | 5 | # Function to send messages 6 | def send_message(): 7 | while True: 8 | time.sleep(10) # Wait for 10 seconds 9 | connection = pika.BlockingConnection(pika.ConnectionParameters('localhost')) 10 | channel = connection.channel() 11 | channel.queue_declare(queue='hello') 12 | message = 'Hello World!' 13 | channel.basic_publish(exchange='', routing_key='hello', body=message) 14 | print(f" [x] Sent '{message}'") 15 | connection.close() 16 | 17 | # Function to receive messages 18 | def receive_message(): 19 | connection = pika.BlockingConnection(pika.ConnectionParameters('localhost')) 20 | channel = connection.channel() 21 | channel.queue_declare(queue='hello') 22 | 23 | def callback(ch, method, properties, body): 24 | print(f" [x] Received {body}") 25 | 26 | channel.basic_consume(queue='hello', on_message_callback=callback, auto_ack=True) 27 | print(' [*] Waiting for messages. To exit press CTRL+C') 28 | channel.start_consuming() 29 | 30 | # Start threads for sending and receiving messages 31 | send_thread = threading.Thread(target=send_message) 32 | receive_thread = threading.Thread(target=receive_message) 33 | 34 | send_thread.start() 35 | receive_thread.start() 36 | 37 | # Keep the main thread running, otherwise signals are ignored. 38 | send_thread.join() 39 | receive_thread.join() 40 | -------------------------------------------------------------------------------- /research/communication/mqtt/send.py: -------------------------------------------------------------------------------- 1 | import pika 2 | import threading 3 | import time 4 | 5 | # Function to send messages 6 | def send_message(): 7 | while True: 8 | time.sleep(10) # Wait for 10 seconds 9 | connection = pika.BlockingConnection(pika.ConnectionParameters('localhost')) 10 | channel = connection.channel() 11 | channel.queue_declare(queue='hello') 12 | message = 'Hello World!' 13 | channel.basic_publish(exchange='', routing_key='hello', body=message) 14 | print(f" [x] Sent '{message}'") 15 | connection.close() 16 | 17 | # Function to receive messages 18 | def receive_message(): 19 | connection = pika.BlockingConnection(pika.ConnectionParameters('localhost')) 20 | channel = connection.channel() 21 | channel.queue_declare(queue='hello') 22 | 23 | def callback(ch, method, properties, body): 24 | print(f" [x] Received {body}") 25 | 26 | channel.basic_consume(queue='hello', on_message_callback=callback, auto_ack=True) 27 | print(' [*] Waiting for messages. To exit press CTRL+C') 28 | channel.start_consuming() 29 | 30 | # Start threads for sending and receiving messages 31 | send_thread = threading.Thread(target=send_message) 32 | receive_thread = threading.Thread(target=receive_message) 33 | 34 | send_thread.start() 35 | receive_thread.start() 36 | 37 | # Keep the main thread running, otherwise signals are ignored. 38 | send_thread.join() 39 | receive_thread.join() 40 | -------------------------------------------------------------------------------- /research/groq.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Fast language models, also known as fasttext or fasttext-based language models, have gained significant attention in recent years due to their ability to process and analyze large amounts of text data quickly and efficiently. Here are some reasons why fast language models are important:\n", 13 | "\n", 14 | "1. **Speed and Scalability**: Fast language models can process text data at incredible speeds, often orders of magnitude faster than traditional language models. This is particularly important for large-scale NLP applications that require real-time processing, such as chatbots, virtual assistants, and social media analytics.\n", 15 | "2. **Large-scale Text Analysis**: Fast language models can analyze massive amounts of text data quickly, making them ideal for applications that involve large-scale text processing, such as:\n", 16 | "\t* Sentiment analysis on social media platforms\n", 17 | "\t* Topic modeling on vast datasets\n", 18 | "\t* Named entity recognition on large documents\n", 19 | "3. **Real-time Processing**: Fast language models can process text data in real-time, enabling applications that require immediate results, such as:\n", 20 | "\t* Sentiment analysis for customer service chatbots\n", 21 | "\t* Automatic language translation for real-time conversations\n", 22 | "\t* Text summarization for news articles\n", 23 | "4. **Cloud and Edge Computing**: Fast language models are well-suited for cloud and edge computing applications, as they can be deployed on distributed architectures and process data in a decentralized manner. This enables secure, efficient, and scalable NLP processing.\n", 24 | "5. **Improved Accuracy**: Fast language models can achieve similar or even better accuracy compared to traditional language models, despite their speed advantages. This is due to their ability to leverage advanced techniques, such as:\n", 25 | "\t* Subword modeling\n", 26 | "\t* Character-level modeling\n", 27 | "\t* Attention mechanisms\n", 28 | "6. **Low Resource Requirements**: Fast language models often require less computational resources and memory compared to traditional language models, making them suitable for resource-constrained devices, such as:\n", 29 | "\t* Mobile devices\n", 30 | "\t* IoT devices\n", 31 | "\t* Edge devices\n", 32 | "7. **Domain Adaptation**: Fast language models can adapt to new domains and datasets quickly, enabling applications that require context-specific language understanding, such as:\n", 33 | "\t* Product recommendation systems\n", 34 | "\t* Medical diagnosis assistance\n", 35 | "\t* Financial report analysis\n", 36 | "8. **Explainability and Interpretability**: Fast language models often provide better explainability and interpretability due to their ability to generate feature importance scores, attention weights, and other transparency-enhancing techniques.\n", 37 | "\n", 38 | "In summary, fast language models offer a unique combination of speed, scalability, accuracy, and resource efficiency, making them an indispensable tool for various NLP applications. As the demand for fast and efficient language processing continues to grow, fast language models will play a crucial role in driving innovation and improving the performance of various NLP-based applications.\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "import os\n", 44 | "\n", 45 | "from groq import Groq\n", 46 | "\n", 47 | "client = Groq(\n", 48 | " # This is the default and can be omitted\n", 49 | " api_key=os.environ.get(\"GROQ_API_KEY\",\"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\"),\n", 50 | ")\n", 51 | "\n", 52 | "chat_completion = client.chat.completions.create(\n", 53 | " messages=[\n", 54 | " {\n", 55 | " \"role\": \"system\",\n", 56 | " \"content\": \"you are a helpful assistant.\"\n", 57 | " },\n", 58 | " {\n", 59 | " \"role\": \"user\",\n", 60 | " \"content\": \"Explain the importance of fast language models\",\n", 61 | " }\n", 62 | " ],\n", 63 | " model=\"llama3-8b-8192\",\n", 64 | ")\n", 65 | "\n", 66 | "print(chat_completion.choices[0].message.content)" 67 | ] 68 | } 69 | ], 70 | "metadata": { 71 | "kernelspec": { 72 | "display_name": "env", 73 | "language": "python", 74 | "name": "python3" 75 | }, 76 | "language_info": { 77 | "codemirror_mode": { 78 | "name": "ipython", 79 | "version": 3 80 | }, 81 | "file_extension": ".py", 82 | "mimetype": "text/x-python", 83 | "name": "python", 84 | "nbconvert_exporter": "python", 85 | "pygments_lexer": "ipython3", 86 | "version": "3.12.2" 87 | } 88 | }, 89 | "nbformat": 4, 90 | "nbformat_minor": 2 91 | } 92 | -------------------------------------------------------------------------------- /research/ollama.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 28, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# from litellm import completion\n", 10 | "\n", 11 | "# response = completion(\n", 12 | "# model='llama3:8b', \n", 13 | "# messages=[{ \"content\": \"respond in 20 words. who are you?\",\"role\": \"user\"}], \n", 14 | "# api_base=\"http://localhost:11434\"\n", 15 | "# )\n", 16 | "# print(response)\n" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 29, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "name": "stdout", 26 | "output_type": "stream", 27 | "text": [ 28 | "{\"results\": \"no\"}\n" 29 | ] 30 | }, 31 | { 32 | "data": { 33 | "text/plain": [ 34 | "{'results': 'no'}" 35 | ] 36 | }, 37 | "execution_count": 29, 38 | "metadata": {}, 39 | "output_type": "execute_result" 40 | } 41 | ], 42 | "source": [ 43 | "import ollama\n", 44 | "\n", 45 | "user_query = \"hey\"\n", 46 | "system_prompt = \"\"\"\n", 47 | "You are a Lawyer. Classify the following question related to Law and always give a response in JSON format as {\"results\": \"yes/no\"} without giving any reason in the response.\n", 48 | "\"\"\"\n", 49 | "\n", 50 | "response = ollama.chat(\n", 51 | " model='llama3:8b', \n", 52 | " messages=[\n", 53 | " {\"role\": \"system\", \"content\": system_prompt},\n", 54 | " {\"role\": \"user\", \"content\": user_query}\n", 55 | " ],\n", 56 | " format=\"json\",\n", 57 | ")\n", 58 | "response_json=response['message']['content']\n", 59 | "\n", 60 | "print(response_json)\n", 61 | "import json\n", 62 | "json.loads(response_json)\n" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 5, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "The sky appears blue because of a phenomenon called Rayleigh scattering, named after the British physicist Lord Rayleigh. Here's what happens:\n", 75 | "\n", 76 | "1. **Sunlight enters Earth's atmosphere**: When sunlight enters our atmosphere, it contains all the colors of the visible spectrum, including red, orange, yellow, green, blue, indigo, and violet.\n", 77 | "2. **Light interacts with tiny molecules**: The light then encounters tiny molecules of gases like nitrogen (N2) and oxygen (O2). These molecules are much smaller than the wavelength of light.\n", 78 | "3. **Rayleigh scattering occurs**: When the light interacts with these small molecules, it scatters in all directions. This scattering is more pronounced for shorter wavelengths (like blue and violet) than longer wavelengths (like red and orange).\n", 79 | "4. **Blue light is scattered more**: The shorter wavelengths of blue light are scattered more than the longer wavelengths of red light. This is because the smaller molecules can scatter the shorter wavelengths more efficiently.\n", 80 | "5. **Our eyes perceive the scattered light**: As we look at the sky, our eyes see the scattered blue light as a blue color. This is why the sky typically appears blue during the daytime when the sun is overhead.\n", 81 | "\n", 82 | "Here are some interesting facts to note:\n", 83 | "\n", 84 | "* The exact shade of blue can vary depending on atmospheric conditions, like pollution levels or dust particles in the air.\n", 85 | "* During sunrise and sunset, the sky can take on hues of red and orange because the light has to travel longer distances through the atmosphere, scattering more blue light along the way.\n", 86 | "* The same Rayleigh scattering effect is responsible for the blue color of many natural bodies of water, like oceans and lakes.\n", 87 | "\n", 88 | "Now, go ahead and gaze up at that beautiful blue sky – it's all about the science!" 89 | ] 90 | } 91 | ], 92 | "source": [ 93 | "import ollama\n", 94 | "response = ollama.chat(model='llama3:8b', messages=[\n", 95 | " {\n", 96 | " 'role': 'user',\n", 97 | " 'content': 'Why is the sky blue?',\n", 98 | " },\n", 99 | " \n", 100 | "],stream=True)\n", 101 | "\n", 102 | "for chunk in response:\n", 103 | " print(chunk['message']['content'], end='', flush=True)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 7, 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "name": "stdout", 113 | "output_type": "stream", 114 | "text": [ 115 | "The sky appears blue because of a phenomenon called Rayleigh scattering, named after the British physicist Lord Rayleigh, who first described it in the late 19th century.\n", 116 | "\n", 117 | "Here's what happens:\n", 118 | "\n", 119 | "1. **Sunlight enters Earth's atmosphere**: When sunlight enters our atmosphere, it contains all the colors of the visible spectrum (red, orange, yellow, green, blue, indigo, and violet).\n", 120 | "2. **Tiny molecules scatter light**: The tiny molecules of gases like nitrogen (N2) and oxygen (O2), as well as aerosols like water vapor, carbon dioxide, and pollutants, are present in the atmosphere. These molecules scatter the shorter, blue wavelengths of light more than the longer, red wavelengths.\n", 121 | "3. **Blue light is scattered in all directions**: As a result of this scattering, the blue light is dispersed throughout the atmosphere, reaching our eyes from all directions.\n", 122 | "4. **Our eyes perceive the blue color**: Since we see the blue light from all angles, it appears to us as a uniform blue color, which is why the sky looks blue.\n", 123 | "\n", 124 | "It's important to note that:\n", 125 | "\n", 126 | "* The exact shade of blue can vary depending on atmospheric conditions, such as pollution levels, dust, and water vapor.\n", 127 | "* During sunrise and sunset, when the sun's rays travel longer distances through the atmosphere, they scatter more red light, giving the sky its characteristic orange or reddish hue.\n", 128 | "* At higher altitudes, where there is less atmospheric scattering, the sky can appear darker blue or even black.\n", 129 | "\n", 130 | "Now, go outside and appreciate that beautiful blue sky!" 131 | ] 132 | } 133 | ], 134 | "source": [ 135 | "import ollama\n", 136 | "response = ollama.chat(model='llama3:8b', messages=[\n", 137 | " {\n", 138 | " \"role\": \"system\", \"content\": \"you are bot\",\n", 139 | " 'role': 'user',\n", 140 | " 'content': 'Why is the sky blue?',\n", 141 | " },\n", 142 | " \n", 143 | "],stream=True)\n", 144 | "\n", 145 | "for chunk in response:\n", 146 | " print(chunk['message']['content'], end='', flush=True)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 6, 152 | "metadata": {}, 153 | "outputs": [ 154 | { 155 | "data": { 156 | "text/plain": [ 157 | "{'embedding': [0.5969330072402954,\n", 158 | " 0.40240398049354553,\n", 159 | " -3.301016330718994,\n", 160 | " -0.5291575193405151,\n", 161 | " 0.7507085204124451,\n", 162 | " 1.5169041156768799,\n", 163 | " -0.1273595243692398,\n", 164 | " 0.39703452587127686,\n", 165 | " 0.0744360014796257,\n", 166 | " -1.1086369752883911,\n", 167 | " 0.689741849899292,\n", 168 | " 1.2773089408874512,\n", 169 | " 1.15134859085083,\n", 170 | " 1.0894445180892944,\n", 171 | " 0.25243091583251953,\n", 172 | " 0.2925049066543579,\n", 173 | " 0.147650808095932,\n", 174 | " -0.6373675465583801,\n", 175 | " -0.2042306363582611,\n", 176 | " -0.19849230349063873,\n", 177 | " -1.7923771142959595,\n", 178 | " -0.6290276646614075,\n", 179 | " 0.039211440831422806,\n", 180 | " -0.6715478897094727,\n", 181 | " 1.2626323699951172,\n", 182 | " 1.2766432762145996,\n", 183 | " -0.1659788340330124,\n", 184 | " -0.001597180962562561,\n", 185 | " -0.2954447865486145,\n", 186 | " -0.481109082698822,\n", 187 | " 1.2064409255981445,\n", 188 | " -0.631598711013794,\n", 189 | " -0.5379438996315002,\n", 190 | " -1.0367927551269531,\n", 191 | " 0.6270114183425903,\n", 192 | " -1.206524133682251,\n", 193 | " 0.6856288313865662,\n", 194 | " -0.05789913982152939,\n", 195 | " -0.19606412947177887,\n", 196 | " 0.12672023475170135,\n", 197 | " -0.014926865696907043,\n", 198 | " -0.5510568618774414,\n", 199 | " 0.35484158992767334,\n", 200 | " 0.04613126814365387,\n", 201 | " 0.5984001755714417,\n", 202 | " -0.9549779891967773,\n", 203 | " 0.5055718421936035,\n", 204 | " 1.5717413425445557,\n", 205 | " -0.7108980417251587,\n", 206 | " -0.38335850834846497,\n", 207 | " -0.6696508526802063,\n", 208 | " 1.1474546194076538,\n", 209 | " -0.1113024652004242,\n", 210 | " -1.9442750215530396,\n", 211 | " 0.47001031041145325,\n", 212 | " 1.4351974725723267,\n", 213 | " 0.5088778138160706,\n", 214 | " -0.3432624042034149,\n", 215 | " 0.5005795955657959,\n", 216 | " 0.07836741209030151,\n", 217 | " 1.3461354970932007,\n", 218 | " 1.7958828210830688,\n", 219 | " 0.09640610218048096,\n", 220 | " 1.1035782098770142,\n", 221 | " 1.2969672679901123,\n", 222 | " -0.9251880645751953,\n", 223 | " -1.1434496641159058,\n", 224 | " -0.18965046107769012,\n", 225 | " 0.195990651845932,\n", 226 | " -0.3228439688682556,\n", 227 | " 1.473804235458374,\n", 228 | " -1.263108730316162,\n", 229 | " 0.06456103175878525,\n", 230 | " 0.8007808923721313,\n", 231 | " -0.5991656184196472,\n", 232 | " -1.1128748655319214,\n", 233 | " -1.3138744831085205,\n", 234 | " 0.41518834233283997,\n", 235 | " -0.10650435090065002,\n", 236 | " 1.0692203044891357,\n", 237 | " 0.3282550573348999,\n", 238 | " 0.43931347131729126,\n", 239 | " 0.4604528844356537,\n", 240 | " 0.2229543924331665,\n", 241 | " 1.2037339210510254,\n", 242 | " 0.6083930134773254,\n", 243 | " 0.747463583946228,\n", 244 | " -0.35381266474723816,\n", 245 | " -0.3033270537853241,\n", 246 | " -0.134285569190979,\n", 247 | " 0.16263678669929504,\n", 248 | " -0.5791411399841309,\n", 249 | " 1.4177135229110718,\n", 250 | " 0.19834254682064056,\n", 251 | " -0.4499247968196869,\n", 252 | " -0.1226959079504013,\n", 253 | " -0.6061049103736877,\n", 254 | " 1.4596483707427979,\n", 255 | " -1.347644329071045,\n", 256 | " -0.6413447260856628,\n", 257 | " -1.183154821395874,\n", 258 | " -0.5076131224632263,\n", 259 | " -0.910965621471405,\n", 260 | " 0.7226020693778992,\n", 261 | " 1.5040684938430786,\n", 262 | " 0.5279815793037415,\n", 263 | " -0.2285068929195404,\n", 264 | " -0.18390727043151855,\n", 265 | " -0.6069064140319824,\n", 266 | " -0.948840320110321,\n", 267 | " 0.19214798510074615,\n", 268 | " 0.6962512731552124,\n", 269 | " -0.08552020788192749,\n", 270 | " 0.04608689248561859,\n", 271 | " -0.2115609347820282,\n", 272 | " -1.4146071672439575,\n", 273 | " 0.09601227939128876,\n", 274 | " -0.8767741918563843,\n", 275 | " 0.2451339066028595,\n", 276 | " 1.9933544397354126,\n", 277 | " 0.8806570768356323,\n", 278 | " -0.07517236471176147,\n", 279 | " 0.08457306772470474,\n", 280 | " -0.35129988193511963,\n", 281 | " -0.21206848323345184,\n", 282 | " 1.0229253768920898,\n", 283 | " -0.6047888994216919,\n", 284 | " 0.008354417979717255,\n", 285 | " 0.26811909675598145,\n", 286 | " -1.03435218334198,\n", 287 | " -0.7351409792900085,\n", 288 | " -0.7662344574928284,\n", 289 | " -0.09213773161172867,\n", 290 | " 0.27880945801734924,\n", 291 | " 0.10574719309806824,\n", 292 | " 0.4355807900428772,\n", 293 | " -0.19979585707187653,\n", 294 | " -0.8391436338424683,\n", 295 | " 0.13358736038208008,\n", 296 | " 0.41741684079170227,\n", 297 | " 0.8142489194869995,\n", 298 | " 0.452465295791626,\n", 299 | " 0.9815285205841064,\n", 300 | " -0.6698671579360962,\n", 301 | " -0.11721830070018768,\n", 302 | " -1.36162269115448,\n", 303 | " 0.5913056135177612,\n", 304 | " -0.017594605684280396,\n", 305 | " -0.241714209318161,\n", 306 | " -0.7156906127929688,\n", 307 | " -0.5511345267295837,\n", 308 | " -0.18371659517288208,\n", 309 | " 0.4178481101989746,\n", 310 | " 0.6450223326683044,\n", 311 | " 0.10575135797262192,\n", 312 | " -1.111432671546936,\n", 313 | " 0.5375056862831116,\n", 314 | " -0.2022087723016739,\n", 315 | " 0.5494593381881714,\n", 316 | " 0.6695343255996704,\n", 317 | " 1.0966334342956543,\n", 318 | " 0.4774543344974518,\n", 319 | " -0.2518484890460968,\n", 320 | " 0.4662938416004181,\n", 321 | " -0.48454609513282776,\n", 322 | " -0.6588786244392395,\n", 323 | " 1.1179336309432983,\n", 324 | " 0.8014687299728394,\n", 325 | " 0.6608789563179016,\n", 326 | " 0.7639712691307068,\n", 327 | " -1.094092845916748,\n", 328 | " -1.6810386180877686,\n", 329 | " -0.5525254011154175,\n", 330 | " -0.9299140572547913,\n", 331 | " -0.0516754575073719,\n", 332 | " 0.18409796059131622,\n", 333 | " 1.4240148067474365,\n", 334 | " -0.45321136713027954,\n", 335 | " 0.18924275040626526,\n", 336 | " -0.8148701786994934,\n", 337 | " -0.2301432490348816,\n", 338 | " -1.396501898765564,\n", 339 | " 0.300866961479187,\n", 340 | " 0.16262906789779663,\n", 341 | " 0.5520248413085938,\n", 342 | " 0.04226088151335716,\n", 343 | " -1.1724492311477661,\n", 344 | " -0.8158020377159119,\n", 345 | " -0.7319320440292358,\n", 346 | " -0.5522674918174744,\n", 347 | " -0.2316242903470993,\n", 348 | " -0.5427405834197998,\n", 349 | " -1.3997856378555298,\n", 350 | " -0.9240342974662781,\n", 351 | " 0.21352456510066986,\n", 352 | " -0.4874984920024872,\n", 353 | " 1.1403656005859375,\n", 354 | " 1.006916880607605,\n", 355 | " 0.35721293091773987,\n", 356 | " 0.36777225136756897,\n", 357 | " -0.14023816585540771,\n", 358 | " -0.31617653369903564,\n", 359 | " 0.4841051399707794,\n", 360 | " -0.21828386187553406,\n", 361 | " 0.23113061487674713,\n", 362 | " 1.2797495126724243,\n", 363 | " 0.2765345275402069,\n", 364 | " 0.9319553375244141,\n", 365 | " -1.1441596746444702,\n", 366 | " 0.0996268168091774,\n", 367 | " 1.2239693403244019,\n", 368 | " -0.39085522294044495,\n", 369 | " 0.22991026937961578,\n", 370 | " -0.1283843219280243,\n", 371 | " 1.2102572917938232,\n", 372 | " 0.0904652327299118,\n", 373 | " -0.9755544662475586,\n", 374 | " -0.01987474411725998,\n", 375 | " -0.28111007809638977,\n", 376 | " 1.691156268119812,\n", 377 | " -0.4613744020462036,\n", 378 | " 0.36020827293395996,\n", 379 | " 0.6461095809936523,\n", 380 | " -0.46216630935668945,\n", 381 | " 0.8203061819076538,\n", 382 | " 0.037721022963523865,\n", 383 | " -0.6281374096870422,\n", 384 | " 0.13812200725078583,\n", 385 | " 0.9441364407539368,\n", 386 | " -0.3522789180278778,\n", 387 | " 0.20636899769306183,\n", 388 | " 0.2807869613170624,\n", 389 | " 0.6878170967102051,\n", 390 | " 0.7122486233711243,\n", 391 | " 0.620973527431488,\n", 392 | " 0.8290592432022095,\n", 393 | " 0.4398501217365265,\n", 394 | " 1.2006498575210571,\n", 395 | " -0.19351670145988464,\n", 396 | " -0.0058797746896743774,\n", 397 | " -0.8346617221832275,\n", 398 | " 1.3418699502944946,\n", 399 | " -1.5052666664123535,\n", 400 | " 0.26593220233917236,\n", 401 | " -0.9789713621139526,\n", 402 | " 1.0011147260665894,\n", 403 | " -0.45902180671691895,\n", 404 | " -0.3896196484565735,\n", 405 | " -0.8297359943389893,\n", 406 | " 0.000681564211845398,\n", 407 | " 0.7317988872528076,\n", 408 | " 0.09971418231725693,\n", 409 | " 0.613571047782898,\n", 410 | " 1.027052402496338,\n", 411 | " 0.022376418113708496,\n", 412 | " -1.6324028968811035,\n", 413 | " -1.2577391862869263,\n", 414 | " 0.07327530533075333,\n", 415 | " -0.06632357835769653,\n", 416 | " -1.0622644424438477,\n", 417 | " -0.663704514503479,\n", 418 | " -0.8165342211723328,\n", 419 | " 0.9021570682525635,\n", 420 | " -1.5365514755249023,\n", 421 | " -1.247976541519165,\n", 422 | " 1.1597577333450317,\n", 423 | " -0.5340402126312256,\n", 424 | " 0.5200604796409607,\n", 425 | " 0.39187031984329224,\n", 426 | " -1.5164936780929565,\n", 427 | " 0.6607534885406494,\n", 428 | " 0.5530205368995667,\n", 429 | " 0.4261172413825989,\n", 430 | " 0.26047423481941223,\n", 431 | " 0.23848170042037964,\n", 432 | " -0.5781704187393188,\n", 433 | " 0.36201000213623047,\n", 434 | " -0.61208176612854,\n", 435 | " -0.10791604965925217,\n", 436 | " 0.9310345649719238,\n", 437 | " -0.347644180059433,\n", 438 | " -0.17619076371192932,\n", 439 | " -0.6879633665084839,\n", 440 | " -0.20133720338344574,\n", 441 | " 0.2978660464286804,\n", 442 | " 0.29992395639419556,\n", 443 | " 0.4997974634170532,\n", 444 | " 1.5950828790664673,\n", 445 | " 0.008256647735834122,\n", 446 | " 0.9875580668449402,\n", 447 | " 0.7937583923339844,\n", 448 | " 0.2185414433479309,\n", 449 | " 0.6552398204803467,\n", 450 | " 1.6166938543319702,\n", 451 | " -0.4650249779224396,\n", 452 | " 0.6875301003456116,\n", 453 | " 1.3440499305725098,\n", 454 | " 0.06105814501643181,\n", 455 | " 0.7095223665237427,\n", 456 | " -0.8062834739685059,\n", 457 | " 0.27129611372947693,\n", 458 | " 0.3888638913631439,\n", 459 | " 1.0222375392913818,\n", 460 | " 0.14357326924800873,\n", 461 | " -0.0519833117723465,\n", 462 | " 0.2811974287033081,\n", 463 | " 0.044580888003110886,\n", 464 | " 0.3818289339542389,\n", 465 | " 0.4734826982021332,\n", 466 | " -0.5939272046089172,\n", 467 | " -0.2055438905954361,\n", 468 | " 1.1936163902282715,\n", 469 | " -0.5398076772689819,\n", 470 | " 1.9759148359298706,\n", 471 | " -0.6846276521682739,\n", 472 | " 1.1709625720977783,\n", 473 | " 0.5542963147163391,\n", 474 | " -0.005060211755335331,\n", 475 | " 0.6842615008354187,\n", 476 | " 0.6797574162483215,\n", 477 | " 0.38509055972099304,\n", 478 | " -0.6929147839546204,\n", 479 | " 0.47245651483535767,\n", 480 | " -0.13119523227214813,\n", 481 | " 0.5963976979255676,\n", 482 | " 0.7866361737251282,\n", 483 | " -0.8986070156097412,\n", 484 | " 0.9712056517601013,\n", 485 | " -0.7969645857810974,\n", 486 | " -1.0501054525375366,\n", 487 | " -0.2113889753818512,\n", 488 | " 0.557990550994873,\n", 489 | " 0.42786452174186707,\n", 490 | " -0.4004181921482086,\n", 491 | " -1.490622639656067,\n", 492 | " -0.1768869161605835,\n", 493 | " -0.19877658784389496,\n", 494 | " -1.122437834739685,\n", 495 | " -0.4374857246875763,\n", 496 | " 1.1497288942337036,\n", 497 | " 1.2931066751480103,\n", 498 | " -1.7598497867584229,\n", 499 | " -0.15349262952804565,\n", 500 | " -1.0941039323806763,\n", 501 | " -0.43680623173713684,\n", 502 | " -0.32257774472236633,\n", 503 | " -0.11588148772716522,\n", 504 | " -0.22306255996227264,\n", 505 | " 0.5648189187049866,\n", 506 | " -0.32458019256591797,\n", 507 | " -0.35316920280456543,\n", 508 | " 0.12598249316215515,\n", 509 | " 0.1456516534090042,\n", 510 | " -0.25830793380737305,\n", 511 | " -0.5012840628623962,\n", 512 | " 0.31420668959617615,\n", 513 | " -0.5568302273750305,\n", 514 | " 0.5325078368186951,\n", 515 | " 0.9101336002349854,\n", 516 | " -0.002856682986021042,\n", 517 | " 0.21774788200855255,\n", 518 | " -1.0361272096633911,\n", 519 | " -0.21058189868927002,\n", 520 | " -0.28332746028900146,\n", 521 | " -0.7360256910324097,\n", 522 | " 0.7603160738945007,\n", 523 | " 0.1860726773738861,\n", 524 | " 0.38276222348213196,\n", 525 | " 0.07648235559463501,\n", 526 | " -0.2578059434890747,\n", 527 | " -0.7038977742195129,\n", 528 | " -0.2706027626991272,\n", 529 | " -0.8514689803123474,\n", 530 | " 1.0904873609542847,\n", 531 | " 0.2680012285709381,\n", 532 | " 0.1882137954235077,\n", 533 | " -0.6451776623725891,\n", 534 | " -1.2454208135604858,\n", 535 | " 0.43608856201171875,\n", 536 | " 0.18699346482753754,\n", 537 | " -0.5061931014060974,\n", 538 | " 0.6829191446304321,\n", 539 | " 0.0227050818502903,\n", 540 | " 0.4512227773666382,\n", 541 | " -0.017853587865829468,\n", 542 | " 0.3598477244377136,\n", 543 | " -0.4943309426307678,\n", 544 | " -1.1516814231872559,\n", 545 | " -0.4648239016532898,\n", 546 | " -0.6143698692321777,\n", 547 | " -0.25584715604782104,\n", 548 | " -1.2217726707458496,\n", 549 | " -0.40027952194213867,\n", 550 | " -0.04058431088924408,\n", 551 | " 0.36340054869651794,\n", 552 | " -1.3007392883300781,\n", 553 | " 1.1115658283233643,\n", 554 | " -0.4899848997592926,\n", 555 | " -0.44514331221580505,\n", 556 | " 0.423132985830307,\n", 557 | " -0.5540224313735962,\n", 558 | " -0.7597556114196777,\n", 559 | " -0.49123504757881165,\n", 560 | " -1.2201086282730103,\n", 561 | " -0.03684288263320923,\n", 562 | " 0.6066595911979675,\n", 563 | " -0.16212333738803864,\n", 564 | " -1.5366402864456177,\n", 565 | " 1.127274751663208,\n", 566 | " 0.9400982856750488,\n", 567 | " 0.5160001516342163,\n", 568 | " 0.7944850921630859,\n", 569 | " -0.1538739800453186,\n", 570 | " -2.2368881702423096,\n", 571 | " -0.008281633257865906,\n", 572 | " 0.03706078231334686,\n", 573 | " 0.4323520362377167,\n", 574 | " 0.07006222754716873,\n", 575 | " 0.37046465277671814,\n", 576 | " 0.2776362895965576,\n", 577 | " 1.47837495803833,\n", 578 | " 1.0875197649002075,\n", 579 | " 0.189472958445549,\n", 580 | " -0.436390221118927,\n", 581 | " 0.9330647587776184,\n", 582 | " 0.36456820368766785,\n", 583 | " 0.22274915874004364,\n", 584 | " 0.5442119240760803,\n", 585 | " 0.42451417446136475,\n", 586 | " -1.065991997718811,\n", 587 | " 0.05369731783866882,\n", 588 | " -0.21914085745811462,\n", 589 | " -0.42247843742370605,\n", 590 | " -0.2379852533340454,\n", 591 | " -1.2094141244888306,\n", 592 | " 0.52691650390625,\n", 593 | " -0.3525978624820709,\n", 594 | " -0.19346676766872406,\n", 595 | " 0.29295334219932556,\n", 596 | " 1.6976794004440308,\n", 597 | " 1.1223934888839722,\n", 598 | " -1.4272985458374023,\n", 599 | " -0.59383225440979,\n", 600 | " -1.2220611572265625,\n", 601 | " 1.466526746749878,\n", 602 | " 2.428591012954712,\n", 603 | " 0.3019430637359619,\n", 604 | " -2.2048487663269043,\n", 605 | " -0.8184424042701721,\n", 606 | " 0.39190220832824707,\n", 607 | " -0.370586633682251,\n", 608 | " -1.0237020254135132,\n", 609 | " 0.8733018040657043,\n", 610 | " 1.659294605255127,\n", 611 | " 1.8495774269104004,\n", 612 | " -0.11591681838035583,\n", 613 | " -0.5781370997428894,\n", 614 | " 0.47891315817832947,\n", 615 | " 1.0765089988708496,\n", 616 | " 1.775381326675415,\n", 617 | " 0.6142991185188293,\n", 618 | " 0.1451084017753601,\n", 619 | " -0.1414390653371811,\n", 620 | " -0.07373569905757904,\n", 621 | " 0.2840208411216736,\n", 622 | " -0.2533877491950989,\n", 623 | " -0.1589106023311615,\n", 624 | " -0.13348475098609924,\n", 625 | " 0.13323791325092316,\n", 626 | " 0.6622788906097412,\n", 627 | " -1.1298929452896118,\n", 628 | " -0.23007945716381073,\n", 629 | " 0.3570381999015808,\n", 630 | " -0.703154444694519,\n", 631 | " -1.211100459098816,\n", 632 | " 0.08954591304063797,\n", 633 | " -1.4300942420959473,\n", 634 | " -0.0627041906118393,\n", 635 | " 0.7108924984931946,\n", 636 | " 1.1379581689834595,\n", 637 | " 0.4691418707370758,\n", 638 | " 0.7356069087982178,\n", 639 | " -0.31013035774230957,\n", 640 | " -0.914227306842804,\n", 641 | " -0.5701828002929688,\n", 642 | " 1.6390591859817505,\n", 643 | " 0.19457614421844482,\n", 644 | " 0.34527868032455444,\n", 645 | " 0.33545637130737305,\n", 646 | " -0.6170878410339355,\n", 647 | " 0.5678530931472778,\n", 648 | " -0.7131389379501343,\n", 649 | " 0.2169259786605835,\n", 650 | " -0.19805964827537537,\n", 651 | " 0.46968305110931396,\n", 652 | " -0.35956519842147827,\n", 653 | " 0.2683506906032562,\n", 654 | " -0.5472516417503357,\n", 655 | " 0.06679923087358475,\n", 656 | " 0.04422000050544739,\n", 657 | " 0.046676263213157654,\n", 658 | " 0.44602248072624207,\n", 659 | " 0.5070140361785889,\n", 660 | " 0.3858823776245117,\n", 661 | " -0.5547109246253967,\n", 662 | " 0.38376620411872864,\n", 663 | " 0.7213706970214844,\n", 664 | " -0.7924078106880188,\n", 665 | " -0.28860747814178467,\n", 666 | " 1.278285026550293,\n", 667 | " -0.49175992608070374,\n", 668 | " -1.6955938339233398,\n", 669 | " 1.5084172487258911,\n", 670 | " 0.19184447824954987,\n", 671 | " -0.07909544557332993,\n", 672 | " -1.0391669273376465,\n", 673 | " -0.32531654834747314,\n", 674 | " 0.927850604057312,\n", 675 | " -0.5210866928100586,\n", 676 | " -0.27227792143821716,\n", 677 | " 0.5483388900756836,\n", 678 | " -0.6289671659469604,\n", 679 | " -0.4185914099216461,\n", 680 | " 0.18712659180164337,\n", 681 | " -0.8611207008361816,\n", 682 | " 1.1203521490097046,\n", 683 | " -0.06220634654164314,\n", 684 | " -0.7590011954307556,\n", 685 | " 0.4623388648033142,\n", 686 | " 0.3431902229785919,\n", 687 | " 0.039070796221494675,\n", 688 | " 0.2561425268650055,\n", 689 | " -0.6439905762672424,\n", 690 | " -0.7210849523544312,\n", 691 | " 0.676421582698822,\n", 692 | " -0.4975731372833252,\n", 693 | " -0.295342355966568,\n", 694 | " 0.9971653819084167,\n", 695 | " -0.005186408758163452,\n", 696 | " 0.8930045962333679,\n", 697 | " 0.02193821594119072,\n", 698 | " 0.5064648985862732,\n", 699 | " 0.45460987091064453,\n", 700 | " -0.006464973092079163,\n", 701 | " -0.2622601389884949,\n", 702 | " 0.23618388175964355,\n", 703 | " -1.4173583984375,\n", 704 | " 0.33357179164886475,\n", 705 | " 1.3777754306793213,\n", 706 | " -0.4219237267971039,\n", 707 | " 0.6515622138977051,\n", 708 | " -0.9134950637817383,\n", 709 | " 0.5213130712509155,\n", 710 | " 0.2514581084251404,\n", 711 | " 1.1781293153762817,\n", 712 | " -0.549098789691925,\n", 713 | " 0.13700707256793976,\n", 714 | " -0.3450290560722351,\n", 715 | " -1.1835514307022095,\n", 716 | " 0.11139538884162903,\n", 717 | " 0.11353511363267899,\n", 718 | " 0.2818242311477661,\n", 719 | " 0.19390495121479034,\n", 720 | " 0.17962710559368134,\n", 721 | " 1.1119208335876465,\n", 722 | " -1.3936175107955933,\n", 723 | " -0.029605504125356674,\n", 724 | " 0.45406073331832886,\n", 725 | " 0.37284982204437256,\n", 726 | " 0.014190159738063812,\n", 727 | " -0.040641166269779205,\n", 728 | " -0.404023677110672,\n", 729 | " -0.5203239917755127,\n", 730 | " -0.5268832445144653,\n", 731 | " -0.14274540543556213,\n", 732 | " 0.3540705740451813,\n", 733 | " 0.19812065362930298,\n", 734 | " 0.12518112361431122,\n", 735 | " -1.0585590600967407,\n", 736 | " -0.44593650102615356,\n", 737 | " 1.1793274879455566,\n", 738 | " 0.060658056288957596,\n", 739 | " 1.1565072536468506,\n", 740 | " -0.048325084149837494,\n", 741 | " -0.32006558775901794,\n", 742 | " -0.46950554847717285,\n", 743 | " 0.23055510222911835,\n", 744 | " -1.1294047832489014,\n", 745 | " 1.7594504356384277,\n", 746 | " -1.0128743648529053,\n", 747 | " -0.46421927213668823,\n", 748 | " -0.2551378309726715,\n", 749 | " -0.5973573327064514,\n", 750 | " -0.8218593001365662,\n", 751 | " -0.2700813412666321,\n", 752 | " -1.0766421556472778,\n", 753 | " -0.26782095432281494,\n", 754 | " -0.9895188808441162,\n", 755 | " -1.8075979948043823,\n", 756 | " -0.07338783890008926,\n", 757 | " 0.4485602378845215,\n", 758 | " -0.7318544387817383,\n", 759 | " 1.0587754249572754,\n", 760 | " -1.0209728479385376,\n", 761 | " 0.32167914509773254,\n", 762 | " 0.38367748260498047,\n", 763 | " -0.10342855751514435,\n", 764 | " -0.9597566723823547,\n", 765 | " 0.3484736979007721,\n", 766 | " -0.2983153462409973,\n", 767 | " 0.37630000710487366,\n", 768 | " -0.4209252893924713,\n", 769 | " 0.12251009792089462,\n", 770 | " -0.1756943166255951,\n", 771 | " 0.07007194310426712,\n", 772 | " -1.29022216796875,\n", 773 | " 1.3690333366394043,\n", 774 | " -0.07290366291999817,\n", 775 | " 0.7200726866722107,\n", 776 | " -2.0160746574401855,\n", 777 | " -0.13274146616458893,\n", 778 | " -1.9638891220092773,\n", 779 | " 0.3720507025718689,\n", 780 | " -1.116735577583313,\n", 781 | " 1.1695648431777954,\n", 782 | " -0.9605998396873474,\n", 783 | " -1.1635740995407104,\n", 784 | " -1.1121426820755005,\n", 785 | " 0.7957886457443237,\n", 786 | " 0.7252646088600159,\n", 787 | " -0.07327114045619965,\n", 788 | " 0.8223224878311157,\n", 789 | " -0.252686083316803,\n", 790 | " -0.39583224058151245,\n", 791 | " -0.06106645241379738,\n", 792 | " 0.4336000978946686,\n", 793 | " -0.22695374488830566,\n", 794 | " 0.6386877298355103,\n", 795 | " 0.5915052890777588,\n", 796 | " 0.5142805576324463,\n", 797 | " 0.2927683889865875,\n", 798 | " 1.0167683362960815,\n", 799 | " 0.3474890887737274,\n", 800 | " -0.43453752994537354,\n", 801 | " 1.0115430355072021,\n", 802 | " -0.1619628220796585,\n", 803 | " 1.168388843536377,\n", 804 | " -0.456600546836853,\n", 805 | " 0.040428996086120605,\n", 806 | " -0.8876240253448486,\n", 807 | " 1.2868940830230713,\n", 808 | " 0.7289477586746216,\n", 809 | " 0.5534698367118835,\n", 810 | " -0.9696080684661865,\n", 811 | " 0.6120789647102356,\n", 812 | " -0.4789103865623474,\n", 813 | " 0.5411056280136108,\n", 814 | " 0.149862140417099,\n", 815 | " -0.4725341796875,\n", 816 | " -2.071988105773926,\n", 817 | " -1.0591026544570923,\n", 818 | " -0.09944861382246017,\n", 819 | " -2.1720995903015137,\n", 820 | " 0.1839204579591751,\n", 821 | " 0.6791471838951111,\n", 822 | " -0.5358027815818787,\n", 823 | " 0.28796473145484924,\n", 824 | " -0.558844804763794,\n", 825 | " -1.7858095169067383,\n", 826 | " 0.13518765568733215,\n", 827 | " -0.8150032758712769,\n", 828 | " 0.5484442710876465,\n", 829 | " -0.4296145737171173,\n", 830 | " -0.44628873467445374,\n", 831 | " -0.013412535190582275,\n", 832 | " -0.2399807721376419,\n", 833 | " 0.42904603481292725,\n", 834 | " 0.8428268432617188,\n", 835 | " 0.5121283531188965,\n", 836 | " -0.08175729215145111,\n", 837 | " 0.9381901621818542,\n", 838 | " 0.9145129323005676,\n", 839 | " -0.3036854863166809,\n", 840 | " -0.2646259069442749,\n", 841 | " -0.3135417401790619,\n", 842 | " -0.20847682654857635,\n", 843 | " -0.4184402525424957,\n", 844 | " -0.2823401093482971,\n", 845 | " 0.6283870339393616,\n", 846 | " -2.468440294265747,\n", 847 | " 0.4418151378631592,\n", 848 | " 0.39606383442878723,\n", 849 | " -1.2600023746490479,\n", 850 | " -1.551861047744751,\n", 851 | " 0.493257611989975,\n", 852 | " -0.6965646743774414,\n", 853 | " 0.3390022814273834,\n", 854 | " -0.6375352740287781,\n", 855 | " 0.4093487858772278,\n", 856 | " -0.3992699682712555,\n", 857 | " -1.1738249063491821,\n", 858 | " -0.11219552904367447,\n", 859 | " 0.38236284255981445,\n", 860 | " 0.8406801223754883,\n", 861 | " 0.1389845609664917,\n", 862 | " -0.9452458620071411,\n", 863 | " 0.48942050337791443,\n", 864 | " -0.44558706879615784,\n", 865 | " -0.2178858518600464,\n", 866 | " -0.0828937292098999,\n", 867 | " -0.30926504731178284,\n", 868 | " 0.581653892993927,\n", 869 | " 0.3554573357105255,\n", 870 | " 0.998319149017334,\n", 871 | " 0.10768307745456696,\n", 872 | " 0.3734738528728485,\n", 873 | " -0.10945256054401398,\n", 874 | " 0.26773497462272644,\n", 875 | " -0.6028599739074707,\n", 876 | " 1.0187873840332031,\n", 877 | " 0.8090458512306213,\n", 878 | " -0.3659715950489044,\n", 879 | " -0.6264289617538452,\n", 880 | " -1.4587609767913818,\n", 881 | " -0.04345208778977394,\n", 882 | " -0.4162880480289459,\n", 883 | " 0.6597855091094971,\n", 884 | " -1.4604471921920776,\n", 885 | " -0.09566283226013184,\n", 886 | " -0.6478845477104187,\n", 887 | " 0.11098421365022659,\n", 888 | " 1.016716480255127,\n", 889 | " -0.25435155630111694,\n", 890 | " 0.4994778335094452,\n", 891 | " -0.3476492762565613,\n", 892 | " -0.8175105452537537,\n", 893 | " -0.5767115354537964,\n", 894 | " 0.13749797642230988,\n", 895 | " -0.050398264080286026,\n", 896 | " -0.3073069751262665,\n", 897 | " -1.3058867454528809,\n", 898 | " 0.24302329123020172,\n", 899 | " 0.2049877941608429,\n", 900 | " 0.32993578910827637,\n", 901 | " 0.014989953488111496,\n", 902 | " -0.45766833424568176,\n", 903 | " 0.29031941294670105,\n", 904 | " 0.5337725877761841,\n", 905 | " 1.4808156490325928,\n", 906 | " -0.36382490396499634,\n", 907 | " 0.44398123025894165,\n", 908 | " 0.22713124752044678,\n", 909 | " -0.1587594598531723,\n", 910 | " 0.3459387421607971,\n", 911 | " 0.41898661851882935,\n", 912 | " 0.6941905617713928,\n", 913 | " 0.43946877121925354,\n", 914 | " -0.022126808762550354,\n", 915 | " 2.0339138507843018,\n", 916 | " -0.050843410193920135,\n", 917 | " 0.45677250623703003,\n", 918 | " -0.21609695255756378,\n", 919 | " 0.34072253108024597,\n", 920 | " 0.7042638659477234,\n", 921 | " -1.1494847536087036,\n", 922 | " -0.14919373393058777,\n", 923 | " -0.5456319451332092,\n", 924 | " 0.4699326455593109]}" 925 | ] 926 | }, 927 | "execution_count": 6, 928 | "metadata": {}, 929 | "output_type": "execute_result" 930 | } 931 | ], 932 | "source": [ 933 | "import ollama\n", 934 | "\n", 935 | "ollama.embeddings(model='nomic-embed-text', prompt='The sky is blue because of rayleigh scattering')" 936 | ] 937 | } 938 | ], 939 | "metadata": { 940 | "kernelspec": { 941 | "display_name": "env", 942 | "language": "python", 943 | "name": "python3" 944 | }, 945 | "language_info": { 946 | "codemirror_mode": { 947 | "name": "ipython", 948 | "version": 3 949 | }, 950 | "file_extension": ".py", 951 | "mimetype": "text/x-python", 952 | "name": "python", 953 | "nbconvert_exporter": "python", 954 | "pygments_lexer": "ipython3", 955 | "version": "3.12.2" 956 | } 957 | }, 958 | "nbformat": 4, 959 | "nbformat_minor": 2 960 | } 961 | -------------------------------------------------------------------------------- /research/retriver.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "\n", 11 | "# OpenAI\n", 12 | "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", 13 | "os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 20, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "from langchain_community.document_loaders import PyPDFLoader\n", 23 | "from langchain_community.vectorstores import FAISS\n", 24 | "from langchain_openai import OpenAIEmbeddings\n", 25 | "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", 26 | "\n", 27 | "loader = PyPDFLoader('./data/H-1B Filing Instructions.pdf')\n", 28 | "documents = loader.load()\n", 29 | "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=10)\n", 30 | "docs = text_splitter.split_documents(documents)\n" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 21, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/plain": [ 41 | "61" 42 | ] 43 | }, 44 | "execution_count": 21, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": [ 50 | "len(docs)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 22, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "data": { 60 | "text/plain": [ 61 | "[Document(page_content='1 \\n H-1B FILING INSTRUCTIONS AND GENERAL INFORMATION \\nWhat is an H -1B Petition? \\nH-1B is a nonimmigrant status that permits professional employment. The employer must file an H -1B \\npetition with the U .S. Citizenship and Immigration Services (US CIS) and obtain a pproval for it; the “alien \\nbeneficiary” must obtain H -1B status either by applying for an H -1B visa abroad and entering the U.S. in H -1B \\nstatus, or by changing to H -1B status from another nonimmigrant status in the U.S. H-1B employment may not \\nbegin until the office of International Student and Scholar Services ( ISSS ) informs the hiring department \\nthat all necessary approvals have been received. \\n \\nRutgers policy restricts H -1B sponsorship to full time faculty appointments only. For teaching faculty, \\n“Assistant Professor” is the lowest range title for which Rutgers will sponsor an H -1B. For research faculty, the', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 0}),\n", 62 | " Document(page_content='lowest range H -1B title is generally “Research Associate” unless very specific exceptional criteria are met; Details \\nof Rutgers International hir ing policy is available at: \\nhttps://policies.rutgers.edu/view -policies/human -resources -hr-%E2%80%93 -section -60 \\nSummary of Requirements and Forms \\nWhen filing an H -1B petition, there are three USCIS forms to complete for all applicants : I-129 petition, the \\nH Classification Supplement to For m I-129, and the H1B Data Collection and Filing Fee Exemption Supplement . \\nThere is also an optional form (I -907) if the petition will be filed with a request for “Premium Processing.” \\n(“Premium Processing” is expedited pr ocessing by USCIS and requires an extra filing fee of $1, 410.) All of t he \\nactual USCIS H1B forms will be completed by ISSS and submitted to USCIS, along with all the required \\nsupporting documentation , in a packet . ISSS obtains all the required information needed for completing the', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 0}),\n", 63 | " Document(page_content='USCIS forms from the RUTGERS INTERNAL H -1B REQUEST Packet, which has already been completed and \\nsubmitted to ISSS by the hiring unit. \\n \\nThere is a required USCIS form (I-539) if the alien is already in the U.S. and has visa depend ents who are \\nalso already physically in the U.S. This form needs to be completed and signed by the dependent. ISSS is not \\nresponsible for reviewing the form I-539, although we will enclose the completed form in the H1B petitio n packet \\nand submit it to USCI S. The I -539 form to use should always be downloaded from the USCIS website at: \\nwww.uscis.gov . Detailed information on filing this form and its fees is also available on this website. \\n \\nIn addition, there are supporting documents that must be attached to the application in duplicate. Please \\nrefer to Page 14 for a detailed list of required documentation. \\n \\nThere is also a regulatory requirement that a \"Public Access File\" (PAF) be set up and maintained by the', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 0}),\n", 64 | " Document(page_content='employer. ISSS creates , update s, and maintain s a PAF for every H1B worker from the day an LCA is submitted \\nto Department of Labor (DOL) to the day 12 months after the approved H1B end date, while making it available \\nfor public inspection upon request and for potential audits by various government agencies. \\n \\nThere are two sets of filing fees for each petition . Each USCIS fee requires a separate check made \\npayable to “ U.S. Dept. of Homeland Security.” The Address in Cornerstone is USCIS , California Service \\nCenter , Laguna Niguel, CA 926 77. USCIS Filing fees vary depending on the specific types of petitions . \\n \\nUSCIS Fee Schedule \\n$460 Fee for Form I -129 ( must be paid by hiring unit under all circumstances ) \\n$500 “Anti-Fraud” Fee ( must be paid by hiring unit for all new pet itions , i.e. all new H1B cases with Rutgers ) \\n$370 Required ONLY IF I-539 is being submitted for dependents ( This fee may be paid by hiring unit or alien)', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 0}),\n", 65 | " Document(page_content='$85 Biometrics fee required for each dependent included on the Form I -539. \\n$1440 Optional Premiu m Processing (PP) fee (must be paid by dept. unless needed strictly for alien’s personal \\nreasons . If the H1B beneficiary pays this fee, a written statement must be submitted to ISSS confirming that the \\nPP fee is paid by the H1B beneficiary because the requ est is for his/her personal benefits. ) \\n \\nRutgers visa fee schedule Please click on the hyperlink for e ach specific fee requirement. \\nRutgers fees are paid via a journal entry. Please complete the Transmittal Form for Visa Processing Fees .', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 0}),\n", 66 | " Document(page_content='2 \\n The First Step: E -mail ISSS for an H -1B Request Packet \\n \\nE-mail ISSS at gantonatos@global.rutgers.edu for a request packet. In the subject line , write, “H -1B Packet \\nneeded .” In the body of the message, provide all of the following information : \\n\\uf0b7 contact in sponsoring unit: name, phone number & e-mail address to which H1B packet should be sent \\n\\uf0b7 the name of the alien beneficiary for whom H -1B petition will be filed \\n\\uf0b7 type of appointme nt (position title) you are offering the alien \\n\\uf0b7 geographic location where actual work will occur (“on campus” or, if off -campus, provide city and state) \\n\\uf0b7 name of sponsoring unit \\n\\uf0b7 please check all that apply to the alien beneficiary: \\n____current Rutgers emplo yee \\n____not yet a Rutgers employee \\n____currently in the U.S. in H -1B status \\n____currently in the U.S. in a nonimmigrant status other than H -1B (What status ? ____________)', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 1}),\n", 67 | " Document(page_content='____currently overseas and expected to apply for H -1B visa at a U.S. consulate abroad \\n____currently in the U.S. with visa dependents who have or will require H -4 status \\n\\uf0b7 a list of every period the foreign national (FN) has ever held J -1 or J -2 status in any J category (Note: the \\n1-129 form asks for documentation of any period(s) of time the FN spent in J-1 or J -2 status, so please \\nask the FN for such document s in the form of copies of DS -2019s, IAP -66s, or J -1/J-2 visa in passport.) \\nThe Next Step – Receive an H -1B Packet via Email \\nAssuming we have no questions about the information you pr ovide in your request for an H -1B packet, \\nwe will e -mail you a complete set of instructions and forms for you to read or complete. \\n \\nTime Frames to Bear in Mind \\nAfter receiving a fully completed request packet from the hiring department and p rior to f iling the H -1 petition with', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 1}),\n", 68 | " Document(page_content='USCIS, our office’ s pre -processing will take about 4 weeks (to submit the LCA to the DOL, obtain additional \\nuniversity clearance, completing actual USCIS petition forms, and thoroughly review, organize and prepare the \\npacket for submission). ISSS will then proceed with filing the H -1B petition with the USCIS. Normally, it take s \\nabout 8+ months for USCIS to adjudicate regular processing cases but it guarantees a 15 -day adjudication time \\nfor “Premium Processing “petitions . The tur n-around times at USCIS will be listed at \\nhttps://egov.uscis.gov/cris/processTimesDisplayInit.do (scroll down to Service Center; select CSC -California \\nService Center, then click on proc essing dates). While Premium Processing will decrease the turn -around time \\nat USCIS to about 2-3 weeks, the turn -around time for pre -processing at our office is determined only by the order \\nin which the paperwork is received from the requesting department s. We therefore advise departments to submit', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 1}),\n", 69 | " Document(page_content=\"complete H -1B packets to ISSS as early as 6 months prior to the intended starting date of the H -1B petition, in \\norder to avoid both the USCIS $1,410 Premium Processing fee and the ISSS late fee of $200 . There is also an \\noption for expedited processing service at ISSS , which requires a fee of $300. ISSS tries to process the request \\nwithin 2 -3 weeks if everything is in order . Please see our Late Fee Schedule . \\n \\nH-1B P ETITION FOR FACULTY APPOINTMENT \\n(INSTRUCTION PACKET FOR EMPLOYING DEPARTMENTS ) \\nTable of Contents \\nDepartments' Legal Responsibilities……………………………………………………………………… ..…3 \\nIntroduction to Each of the Required Forms in this Packet ………………………. ……………….... .........4 \\n Form #1: Worksheet for Labor Condition Application ( LCA) …………………………………….……. 5-6 \\n Form #2: “Deemed Export Certification for H -1B Petitions” ………………………………………….... 7-8 \\n Form #3: H-1B Scholar Information Shee t (Required)………………. …………………………… ….... 9-10\", metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 1}),\n", 70 | " Document(page_content='Form #4: H-1B Department Certification Form ……………………………………………………… ..….11 \\n Form #5: Memorandum Explaining the Actual wage…………… ……………………………………… .12 \\n Form #6: Form I -539 ( Required for Visa Dependents only )…………………………… …….………....13 \\n \\nList of Required Supporting Documents ………………………………………….. …………… …………… 14 \\nSample Letter in Support of H -1B Petition…………………………………………………… ……………… 15 \\nChart of H1B Visa Process ……………………………………………………………… …………………… .16', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 1}),\n", 71 | " Document(page_content='3 \\n \\n \\nDepa rtments\\' Legal Responsibilities in Filing H -1B Petitions \\nDepartments filing H -1B petitions assume significant liability on behalf of Rutgers , and are thus responsible for \\nunderstanding and strictly adhering to certain requirements. Please read the list of \"Departments\\' Legal \\nResponsibilities\" thoroughly before proceeding with the application materials. For most H -1B matters, the actual \\nresponsibility for complying with these federal regulations rests with hiring units , since it is there that H -1B \\nemployees\\' files are initiated and maintained. Because H -1B regulations are complex, and because an \\nemployer\\'s failure to comply with these regulations can result in significant penalties, we advise you to read the \\nresponsibilities listed below with utmost attentio n, to fill out forms exactly as specified on the attached \\ninstructions, and to follow the guidelines noted throughout this packet with care.', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 2}),\n", 72 | " Document(page_content=\"1. Departments must submit in a timely fashion the complete H -1B request packet (approved LCA \\nworksheet, fi ling fees, and the required number of photocopies of all accompanying supporting \\ndocuments) to ISSS —even if the department is requesting and paying for PP service, The USCIS PP Fee \\nexpedites processing at the USCIS but not within the University itself. \\n Departments must allow an absolute minimum of four weeks for ISSS to process an H -1B petition packet \\nbefore it is ready to submit to USCIS. Once the petition is submitted, the amount of time it will take USCIS to \\nadjudicate it depends upon the type of ap plication being filed. For P P service petitions, an approval comes in \\nabout 3 -4 weeks. Departments NOT filing via P P service should check with ISSS for appropriate timeframes, as \\nthey will vary based on a number of different factors . \\n \\n2. The terms o f an H -1B worker's employment may not be changed in ANY way other than routine salary\", metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 2}),\n", 73 | " Document(page_content='increases during the period of the H -1B approval . Any changes in the terms of employment automatically \\ncancel the validity of the H -1B approval, and the employee thereby becomes ineligible to continue working legally \\nat Rutgers. Examples of prohibited changes are: job title, work -site, or job responsibilities. If a department \\ncontemplates any of these changes for a current H -1B employee, please contact ISSS to discuss. \\n \\n3. It is of utmost importance that departments inform ISSS if H-1B employment is terminated for any \\nreason before the end -date requested on form I -129. If an H -1B employee is terminated by Rutgers prior to \\nthe ending date requested on the I -129 petition , the employer (via ISSS ) is required by federal regulation to notify \\nUSCIS. In addition, the employing unit is liable for transportation costs to the foreign country in which the \\nemployee most recently resided (unless the employee leaves by choice).', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 2}),\n", 74 | " Document(page_content='4. ISSS will provide the hiring unit and the FN with a copy of the completed I-129 petition form , \\nDOL -certified LCA as soon as it is available and also notify the AAUP of the LCA filing. \\n \\n5. Departments are responsible for ensuring that H -1B employees going on Rutgers payroll for the first \\ntime attend a \"check -in/orientation session \" at ISSS as soon as possible after the employee has arrived \\nat Rutgers . The workshop is offered weekly on Thursday at 2:45 p.m. and requires adv ance sign -up by emailing \\nISSS at gan tonatos@global.rutgers.edu. For employees whose H -1B status is being extended, the workshop is \\nnot necessary. \\n \\n6. Federal regulations change regularly and ISSS routinely updates the H -1B instruction packet. Please \\nread all of ISSS ’ H-1B instructi ons each time you file an H -1B petition. Procedures and forms may be', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 2}),\n", 75 | " Document(page_content='different from those you used the last time you filed an H -1B petition. H-1B employment may not begin until ISSS informs the department that all necessary approvals have \\nbeen received. “Employment” refers not only to being on Payroll, but also to perfo rming services for \\nwhich an individual would normally be compensated.', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 2}),\n", 76 | " Document(page_content='4 \\n \\n Introduction to Each of the Required Form s in the H1B Packet \\n \\nForm #1 : Worksheet for La bor Condition Application (LCA ) \\n The LCA is the form used to obtain clearance from the U.S. Department of Labor (DOL) to proceed with filing the \\nH-1B petition. An LCA approval from DOL gives us the \"go ahead\" to file the actual H -1B petition with the USCIS . \\n Extreme care must be taken in completing the LCA Worksheet accurately per the attached instructions; incorrect \\ninformation could result in serious liability for Rutgers. \\nProcedures for the form : \\n\\uf0a8 Dept. completes “Worksheet for Labor Condition Application ”; \\n\\uf0a8 Dept .sends the Worksheet to the Dean\\'s office for Dean\\'s review and si gnature ; \\n\\uf0a8 Department retrieves those documents from the Dean’s office, then email the Worksheet to Academic Labor \\nRelations (ALR) for review and approval : OALR@oq.rutgers.edu .', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 3}),\n", 77 | " Document(page_content='\\uf0a8 ALR reviews materials, signs Worksheet for approval and faxes it back to hiring department; \\n\\uf0a8 Department attaches approved worksheet to the rest of the H -1B request packet and submits it to ISSS ; \\n\\uf0a8 ISSS files the LCA application with the DOL and obtains certificatio n in about 7 -10 days . \\n\\uf0a8 \\nForm #2: Deemed Export Certification for H -1B Petitions \\nThis is a formal certification of employer stating that the FN ’s work is /is not “deemed” to be an “export” by \\nCommerce Dept. This form must be first signed by fac ulty sponsor and chair or director; original plus \\ndocumentation must be sent to the Export Compliance Office , who will then send the certifi cation to ISSS . \\n \\nForm #3 : H-1B Scholar Information Sheet \\nThis form elicits information from the FN needed by the de partment in order to complete the H1B Request Form. \\nDept . asks the H -1B worker to complete the form & then uses the information to complete the Internal H1B \\nRequest Form.', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 3}),\n", 78 | " Document(page_content='Form #4: H -1B Department Certification \\nThis form is related to the DO L’s LCA regulat ions. Before ISSS can submit the LCA for certification from the DOL \\nand USCIS, the hiring department must certify the 9 statements specified on the Certification form. Department \\nhas this form signed by the direct supervisor and by Chair or Director of the hiring unit and then submit it to ISSS \\nwith the rest of the require d documents. \\n \\nForm # 5: Memorandum Explaining the Actual Wage \\nThis is a document required by the DOL H1B regulations. It must be completed by the hiring unit and submitted \\nto ISSS with the other required documents. It will be kept in the Public Access File (PAF) at ISSS . \\n \\nForm # 6: I-539 (Required ONLY for visa dependents of the H -1B who are already physically in the U.S .) \\nI-539 is a generic form used for change of nonimmigrant status or extension of status. In this context, the I -539', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 3}),\n", 79 | " Document(page_content='is used ONLY for the visa dependents of H1B employees already in the U.S. who need to change or extend \\ndependent H4 status along with the H1B employee. (The change or extension of status for the employee \\nhim/herself is included in the I -129 petition itself.) Please refer to the I -539 filing and fees instructions that can be \\nfound at www.uscis.gov . No I-539 is needed for the H -1B employee, nor is it needed if the employee \\'s visa \\ndependents are not in the U.S. at the time of filing the I -129 Petition, nor if the employee’s dependent(s) have their \\nown independent non -immigrant status. \\n \\nProcedures for the form : \\n\\uf0a8 H-1B employee has his or her visa dependent(s) complete and sign the form; \\n\\uf0a8 H1B employee returns completed I -539 to the department with filing fees and supporting document s (original \\nplus one copy), as described on the attached \"Instructions for Completing Form I -539\"', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 3}),\n", 80 | " Document(page_content='\\uf0a8 Dept. submits I -539 and supporting documents to ISSS along with the rest of the H -1B petition packet', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 3}),\n", 81 | " Document(page_content='5 \\n \\nWORKSHEET FOR LABOR CONDITION APPLICATION (LCA) \\n (Please follow instructions provided on the next page ) \\n1. Name of Employee (LAS T, First, Middle): …………………………………………………………………….. \\n 2. Rate of Pay per Year: $ …………………………… (This is the person’s actual salary; must be equal to or higher than minimum annual sa lary in AAUP \\nContract for this job title.) \\n \\n 3. Check Here to Confirm this is a Full-Time Position _____ (Note: Rutgers does not support the filing of H -1B petitions for part -time positions. \\nPlease refer to the University Policy on Hiring Foreign Nationals) \\n \\n 4. Period of the Proposed H -1B Employment: From* ………./………./……… To ….../……… …/……….. (MM/DD/YYYY) \\n (*“From” date is the date on which H -1B status should become effective, which may or may not be the same as the appointment start date.)', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 4}),\n", 82 | " Document(page_content='5. The Standard Occupational Classification (SOC) Code: See instructions for completing the L CA Worksheet and provide the SOC code selected and \\nits description) \\n \\n SOC code: …………………………………… SOC code description: ……………………………………………………………. \\n6. Job Title / Department: ………………………………………………. /................................................. ..................... \\n 7. Job Address: List ALL locations where the foreign national will work (actual address and county) --but do NOT list multiple NB/Piscataway campus \\nlocations (Specify only one primary campus location): \\n \\n……………………………………………………………………………… …………………………….…. \\n \\n \\n 8. Prevailing Wage per Year: $............................ (Minimum annual salary for this title as it appears in AAUP Contra ct) \\n 9. Prevailing Wage Source: \\uf0a0 Collective Bargaining Unit (AAUP) …………………. \\uf0a0 Other \\n 10. Prepared by: Name (print): …………………………………………………… Title: ……………………………………………………………………….', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 4}),\n", 83 | " Document(page_content='Phone: ……………………………………………. Email: …………………………………………………………………… \\n Signature: ……………………………………. Date: ………………………………..…………………. \\n 11. Approval by Dean/Director: \\n Name printed: …………………………………………….. Signature: …………………………………. Date: ………………………………….. \\n \\nAfter completing ITEM 11 above, email tis form to ACADEMIC LABOR RELATIONS (ALR) for review and approval at : \\noalr@oq.rutgers.edu \\n (If appointment has a 1 -year term but department is requesting the H -1B approval for more than one year, also email to ALR a copy of the official \\n1-year appointment letter or form.) \\nTHIS FORM WILL BE emailed BACK TO ORIGINATING DEPARTMENT AFTER APPROVED BY ALR (SEE APPROVAL BELOW), and should then be \\nsubmitted to the Rutgers Global --ISSS with the rest of the H -1B completed petition packet.', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 4}),\n", 84 | " Document(page_content='ALR approva l: …………………………… ……………………………….. Date : …………………………………………….', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 4}),\n", 85 | " Document(page_content='6 \\n WORKSHEET FOR LABOR CONDITION APPLICATION (LCA) \\n(This is instruction for completing the LCA Worksheet on previous page.) \\n \\nPlease follow these line -by-line instructions carefully. The department assumes a serious lia bility on behalf of \\nRutgers if the LCA Worksheet is not completed with accurate information as outlined below. \\n1. Enter the employee’s (or prospective employee’s) name; include full middle name (if known). \\n2. “Rate of Pay per Year :” enter the actual annual salary to be paid to the employee \\n3. “Full-Time Position? ” Verify that this is a full -time position by checking “yes.” \\n4. “Period of Employment :” For the begin date, enter the date you want this H -1B authorization to start (which \\nmight be different from the actual employment starting date if the individual began working in this position in \\na different status or is now extending H -1B status). If the person is co ntinuing an appointment already begun', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 5}),\n", 86 | " Document(page_content='under a different visa status (such as F -1 on OPT or J -1), show the date immediately following the date on \\nwhich that current visa status will expire. Remember that we cannot file LCA’s or H -1B petitions more than \\nsix months before the requested begin date. \\n5. “The Standard Occupational Classification (SOC) . Please refer to the SOC code manual at : \\nhttps://www.bls.gov/soc/2018/soc_2 018_manual.pdf :” for teaching positions , use SOC codes within \\n“25-0000 Education, Training, and Library Occupations. ” For research positions , look up and select the \\ncode most appropriate to the academic discipline. \\n6. “Job Title :” enter the official Rutgers payroll title followed by the name of the department, e.g. Assistant \\nProfessor/ Chemistry or Research Associate/Physics. Note: any change in title, job responsibilities or \\ndepartment may require filing a new LCA and a new H -1B petition. Please check with ISSS before', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 5}),\n", 87 | " Document(page_content=\"initiating any changes to determine if new forms will be required. \\n7. “Job Address(es) :” provide a complete address ( including the lab or office number and County ) of the \\nlocation(s) where th e actual work will be performed. Note: any change in the location of the work -site \\nafter filing this LCA -- even a short -term, temporary change of more than five days -- may require the \\nfiling of a completely new LCA. \\n8. “Prevailing Wage per Year :” enter the dollar figure on the AAUP contract (applicable for the “begin date” \\nshown in #4 above) which represents the minimum annual salary for this job title. (If you have ANY questions \\nabout this figure, please call your dean's office or ISSS for assistance. \\n9. “Prevailing Wage Source :” for all AAUP positions, check “Collective Bargaining Unit (AAUP).” \\n10-11. Complete as appropriate, and have Dean or Director Sign.\", metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 5}),\n", 88 | " Document(page_content='\\uf0a8 Follow instructions on bottom of the Worksheet itself in order to ob tain approval from Academic Labor \\n Relations (ALR) prior to submitting it to ISSS with the rest of the H -1B packet. \\n\\uf0a8 ISSS then generates (online) an actual LCA and submits it to Department of Labor ( DOL ). Upon DOL’s \\ncertification, ISSS will be able to fi le the H1B petition packet to USCIS for adjudication.', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 5}),\n", 89 | " Document(page_content='7 \\n \\n \\n \\nForm # 2: DEEMED EXPORT CERTIFICATION FOR H -1B PETITIONS \\n(This page is for information and processing instructions . The f ollowing page is the form itself) \\n \\nDeemed Export Certification for H -1B Petitions \\n \\nThe Form I -129 issued by USCIS is a petition for a non -immigrant alien to come to work in the United States \\ntemporarily in a specialty occupation. This fo rm requires a “deemed export certification.” This means that \\nRutgers must certify whet her or not the beneficiary of the visa petition will have access to export controlled \\ninformation or technology through his or her work at Rutgers. The certification is based on knowledge as of the \\ntime of the application \\n \\nThe certification reads as follo ws: \\n \\nWith respect to technology or technical data the petitioner will release or otherwise provide access to the \\nbeneficiary, the petitioner certifies that it has reviewed the Export Administration Regulations (EAR) and the', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 6}),\n", 90 | " Document(page_content='International Traffic in Arms Re gulations and has determined that: \\n \\n1. A license is not required from either US Department of Commerce or the US Department of State to \\nrelease such technology or technical data to the foreign person; or \\n \\n2. A license is required from the US Department of Commer ce or the US Department of State to release \\nsuch technology or technical data to the beneficiary and the petitioner will prevent access to the controlled \\ntechnology or technical data by the beneficiary until and unless the petitioner has received the requi red \\nlicense or other authorization to release to the beneficiary. \\nThis formal certification is made as part of the visa pe tition process conducted by the office of International \\nStudent and Scholar Services upon advice from the Office of General Counsel. W hile the department does not \\nmake the certification, the certification cannot be completed without information from the department. The visa', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 6}),\n", 91 | " Document(page_content='petition cannot be processed without the certification so it is very important that department provide the \\nnecessa ry information as promptly as possible. \\n \\nPlease note that the certification is made under penalty of perjury. Therefore the information provided \\nmust be as complete and accurate as possible. Both the information we require and the certification \\nstatement appear on page 2 of this document. \\n \\n \\nPROCESSING INSTRUCTIONS : \\n \\n1. The fully executed copy of the form on the next page and all necessary documentation related to this form \\nshould be emailed to: \\nRobert Phillips , Export Compliance Officer ; export -support@rutgers.edu ; \\nAdministrative Services Building III, Cook Campus . \\n2. A single photocopy of the fully executed form (form ONLY —no documentation ) must be included \\nalong with the rest of the H -1B packet submit ted to : ISSS , 30 College Ave., New Brunswick', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 6}),\n", 92 | " Document(page_content='Please note: All questions about this form & relevant documentation should be directed to Robert Phillips . \\n His phone : 848-932-4522. His office location : Knightsbridge Rd, 2nd Floor East, Piscataway, NJ 08854)', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 6}),\n", 93 | " Document(page_content='8 \\n DEEMED EXPORT CERTIFICATION FOR H -1B PETITIONS \\n \\nThe following needed information can be provided by dep artment /center administrators: \\n \\n1. A general description of the beneficiary’s duties \\n2. Name and contact information for faculty sponsor and Chair of Department \\n3. Name and contact information for beneficiary \\n4. A copy of any & all grants, contracts & awards to which FN is assigned as of date of visa petition if any \\n5. A copy of the beneficiary’s C.V. \\nThe following information (as well as any unanswered question s above) should be provided by the \\nbeneficiary’s faculty sponsor and/or department chair or center director: \\n \\n To the extent known at time of visa petitions, will the beneficiary be: \\n \\n\\uf0b7 Yes No Working on any grant, co ntract or award containing publication restrictions \\n \\n\\uf0b7 Yes No Working on any grant, contract or award restricting participation of foreign nationals', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 7}),\n", 94 | " Document(page_content='\\uf0b7 Yes No Working on corporate sponsore d contracts with access to company proprietary information \\n \\n\\uf0b7 Yes No Having access to technology or equipment designed or developed with military or space \\napplications \\n \\n\\uf0b7 Yes No Working on high-tech or experimental equipment (e.g. high speed computers, lasers, satellites) \\n \\nIf the answer to any of the above is yes, please explain. \\n \\nThe following certification must be signed by the beneficiary’s faculty sponsor and the chair of the \\nrelevant department or director of the relevant center. \\nI certify under penalty of perjury that, to the best of my knowledge, the information herein provided is true and \\naccurate as it pertains to the H -1B petition for Name of beneficiary :_______________________ _ \\nAnd that, with respect to the beneficiary’s expected duties at Rutgers : \\n\\uf0a0 A license is n ot required from either the US Department of Commerce or the US Department of State to allow', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 7}),\n", 95 | " Document(page_content='beneficiary access to the technology or technical data he/she will use or be exposed to while working for Rutgers. \\n\\uf0a0 A license is required from the US Department of Commerce and/or the US Department of State in order for \\nbeneficiary to access certain export controlled technology or technical data beneficiary will use or be expo sed to while \\nworking for Rutgers. Please note, if a license is required, beneficiary may have no access to said export controlled \\nmaterials until and unless a proper license is in place. \\n \\n \\nFaculty Sponsor Name _______________ Center Direct or or Dept. Chair Name _____ __________ \\n \\nSignature ________ Date: ______ Signature ____________________ Date: __ ______', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 7}),\n", 96 | " Document(page_content='9 \\n Form # 3: H-1B Scholar Information Sheet (page 1 of 2) \\n(Provides hiring unit information it needs from the employee in order to complete forms in the packet) \\nTO BE COMPLETED BY THE EMPLOYEE NAMED IN THE H -1B PETITION \\n \\nCURRENT EMAIL OF THE SCHOLAR: _______________________________________________________ \\n \\n1. Family name (exactly as it appears i n passport) _______________________________________________ \\n \\n2. Given name (first name exactly as it appears in passport)________________________________________ \\n \\n3. Middle name (if applicable and only if listed in passport)________________________________ _________ \\n \\n4. Gender : Male ______ Female ______ \\n \\n5. All other names used_____________________________________________________________ \\n \\n6. Date of Birth (mm/dd/yyyy) ______________ \\n \\n7. USCIS A# (if any) _____________________________________________________ \\n \\n8. Country of Birth _______________________________________________________', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 8}),\n", 97 | " Document(page_content='9. Province or State or City of Birth ____________________________________________________ \\n \\n10. Country of Citizenship __________________________________________________________ _ \\n \\n11. Do you and any dependents filing with you have a valid passport? Yes____ No____ \\n If you answer “no” please provide on a separate sheet of paper an explanation and/or proof that an \\n extension has been applied for. \\n \\n12. Are appli cations for dependents being filed with this petition? Yes____ No____ \\n If you answer “yes” please indicate how many dependents are included___________ \\n \\n13. Are you or your dependents currently in U.S. immigration removal proceedings? Yes_ ___ No____ \\n If you answer “yes” please provide an explanation on a separate sheet of paper. \\n \\n14. Has a U.S. immigrant petition ever been filed for any person in this petition, including dependents?', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 8}),\n", 98 | " Document(page_content='Yes____ No____ If you answe r “yes” please provide an explanation on a separate sheet of paper. \\n \\n15. Have you ever been given any H status of any kind (including H -4) before? Yes____ No____ \\n If you answer “yes” please indicate all the dates on a separate sheet of paper. \\n \\n16. Have you ever been denied H status? Yes____ No____ \\n If you answer “yes” please explain on a separate sheet of paper. \\n \\n17. Please provide a list of every period during which you have ever held J -1 or J -2 status in any J category. \\n(Note: because the H -1B petition form asks for documentation of all J status periods, we will need you to \\nprovide documentation in the form of copies of DS -2019s, IAP -66s, or J -1/J-2 visa in passport . \\n \\n(Continued on page 1 3)', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 8}),\n", 99 | " Document(page_content='10 \\n Form # 3: H-1B Scholar Information Sheet (page 2 of 2) \\n(Complete ONLY ONE Section Below : Section A or Section B, but not both). \\n \\nSECTION A H-1B STATUS: Complete #19 - #27 ONLY IF you are currently in the U.S. and do NOT \\nintend to leave the U.S. before beginning employment at Rutge rs. \\n \\n18. To help us file your H -1B petition in the most appropriate way for your situation, please list approximate dates \\n and destinations of all your planned travel outside the U.S. in the next 12 months \\n \\n _________________________________________ ____________________________________ \\n \\n19. Date of Most Recent Arrival in the U.S., if applicable. __________________________________________ \\n \\n20. I-94# (from the most recent arrival/departure document)_________________________________________ \\n \\n21. Current Nonimmigrant Status in the U.S. :________ (Note: if currently in H -1B status, you are eligible for', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 9}),\n", 100 | " Document(page_content='Rutgers H -1B sponsorship only if you can document with copies of your 3 most recent pay stubs that you are still \\nemployed in your current H -1B job at the time Rutgers files its H -1B petition for you. ) \\n \\n22. Date Status Expires, if applicable (F -1 and J -1 visa holders: put “D/S”) __________________________ \\n \\n23. Passport Number________________________________ \\n \\n24. Date passport issued (mm/dd/yyyy)________ _________ 25. Date passport expires: _________________ \\n \\n26. Current U.S. address ____________________________________________________________________ \\n \\nSECTION B H-1B VISA: Complete #2 7 and #2 8 ONLY IF you will be visiting a U.S. consulate abroad \\nand appl ying for an H -1B visa prior to beginning employment at Rutgers. (Note for Canadian Citizens only: \\nunless you are changing to H -1B status within the U.S. , please complete #2 7 and # 89 even though you will NOT \\nneed to visit a U.S. embassy )', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 9}),\n", 101 | " Document(page_content='27. The U.S. cons ulate or U.S. immigration inspection facility you will visit to obtain your U.S. H -1B visa. \\n \\n Office Address (City):_________________ Country of Citizenship or Nationality : ___________________ \\n \\n28. Your foreign address (your permanent addres s outside the U.S.) Please provide complete address. \\n Street # & Name : ________________ Apt. \\uf0a0 Ste.\\uf0a0 Flr.\\uf0a0 City or Town: ______________ \\n \\n State or Province : __________________ Postal Code : _________ Country: __________ ________ \\n \\nBy signing below, I certify that all of the above information is correct to the best of my knowledge AND that neither \\nI nor any dependents are currently subject to regulation 212(e) which subjects certain J visa holders to a 2 -year \\nhome residence requirement. (NOTE: If you ARE currently subject to this requirement, do not sign this \\nform, but contact your department at Rutgers immediately.)', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 9}),\n", 102 | " Document(page_content='Signature Date \\n \\nPLEASE RETURN THE COMPLETED FORM TO YOUR DEPARTMENT ADMINISTRATOR AT RUTGERS \\n(If you have questions concerning this form please email Ruimin Zhang at rzhang@global.rutgers.edu . Questions \\nregarding your appointment or visa eligibility should be directed to the hiring unit, however. )', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 9}),\n", 103 | " Document(page_content='11 \\n \\n \\nForm # 4: H-1B Department Certification From \\n \\nBefore ISSS can submit the LCA for certification from the U.S. D OL and USCIS adjudication, \\nThe hiring unit must certify the following statements and complete the required information. \\n \\nI certify that: \\n\\uf0b7 The salary being paid to the above named employee is at least the actual wage being paid to all other \\nindividuals with similar experience and qualifications for the specific employment in question or the \\nprevailing wage level for the occupation in the area of employment (regional average), whichever is \\nhigher. \\n\\uf0b7 Fringe benefits offered to this employee are equivalent to that offered to other U.S. workers in the same \\nclassification. \\n\\uf0b7 Employing this person will not adversely affect the working conditions of U.S. workers similarly employed. \\n\\uf0b7 There is no strike, lockout, or work stopp age due to labor dispute in this occupation.', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 10}),\n", 104 | " Document(page_content=\"\\uf0b7 We agree to comply fully with the terms of the Labor Condition Application stated above for the duration \\nof the alien's employment in H -1B status at Rutgers \\n\\uf0b7 We fully understand that any willful violation conne cted with providing inaccurate information in the LCA \\nmay incur severe penalties that have a long -range impact at Rutgers to include fines and legal \\nprosecution. \\n\\uf0b7 As required by the US Citizenship and Immigration Services, we agree to pay the reasonable co st of \\nreturn transportation to the alien's home country if s/he is dismissed before the end of the authorized \\nperiod of H -1B employment. \\n\\uf0b7 We have contacted the Rutgers Export Compliance Manager and have the Deemed Export Certification \\nForm signed and we will comply with all Licensure Requirements for research activities. \\n \\n \\n Certified by : \\n \\n\\uf0b7 Direct Supervisor of Hiring Unit: \\n \\n _____________________________________________________________ ___________________\", metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 10}),\n", 105 | " Document(page_content='(Name printed ) (Signature) (Date) \\n \\n\\uf0b7 Chair or Director of Hiring Unit : \\n \\n ________________________________________________________________________________ \\n (Name printed) (Signature) (Date) \\n \\n \\n\\uf0b7 Contact Person of Hiring Unit : \\n \\n \\n Name: ______________ _________________ Phone #: _________________ \\n \\n \\n Email: _______________________________', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 10}),\n", 106 | " Document(page_content='12 \\n Form # 5: Memorandum Explaining the Actual Wage \\n \\n(Required For THE PUBLIC ACCESS FILE to be set up and maintained at ISSS ) \\n \\nDepartment of Labor (D OL) regulations are design ed to protect U.S. workers. As such, the DOL wants to ensure \\nthat U.S. workers are not being displaced by H -1B employees. The requirements to provide an “explanation of the \\nactual wage” are to document that employers are not using the H -1B program to hire foreign workers at salaries \\nlower than those a U.S. worker would expect for a similar position with similar requirements and responsibilities. \\n Please note that the DOL reserves the right to conduct employer audits of Public Access Files. \\n \\n \\nName of the H -1B employee ______________________________________ \\n \\nName of Hiring Department or Center_______________________________ \\n \\nActual wage being paid to the H -1B employee ___________________________', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 11}),\n", 107 | " Document(page_content='(Exactly as actual wage is noted on the LCA Worksheet after sig ned by the VPAA’s office) \\n \\nThe following explanations must be included below or on additional pages: \\n \\n_____An explanation of how the “prevailing wage” was determined. At Rutgers, “prevailing wage” is the minimum \\nannual salary for the job title as it appea rs in AAUP contract, so this documentation should consist of a copy of the \\napplicable page of the AAUP contract with the “prevailing wage” (minimum salary) highlighted for this specific job \\ntitle and term of appointment (CY or AY) \\n \\n_____An explanation of h ow the “actual wage” (actual salary) for the H -1B employee was determined. \\nRegulations provide guidance on this requirement as follows: ) A full, clear explanation of the system that the \\nemployer used to set the \"actual wage\" the employer has paid or will pay workers in the occupation [job title] for', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 11}),\n", 108 | " Document(page_content=\"which the H -1B nonimmigrant is sought, including any periodic increases which the system may provide -- e.g., \\nmemorandum summarizing the system or a copy of the employer's pay system or scale (payroll records are not \\nrequired, although they shall be made available to the Department in an enforcement action). \\nPlease note : \\n \\n\\uf0b7 If U.S. employees are paid more than the H -1B employee is , you must provide a detailed explanation and \\njustification as to why this is the case, bearing in mind the DOL’s reason for seeking this information (see top \\nof page). \\n\\uf0b7 Any records documenting wages/salary should not violate the privacy of any employees. As such, if you use \\nactual copies of other employees’ pay records, you should blac k out the employees’ names and SSN.\", metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 11}),\n", 109 | " Document(page_content='13 \\n \\n \\n \\nForm #6: I -539, Application to Extend/Change Nonimmigrant Status \\n \\n(This form is required ONLY if visa dependent (s) is (are) physically in the U.S. at the time of filing the H 1B \\npetition. If the alien has a spouse and/or children already in the U.S. in dependent nonimmigrant status, the \\nfollowing should be submitted to ISSS by the hiring unit along with the rest of the H -1B petition packet. (Please \\nsubmit one original and one photocopy of each item only.) \\n \\nIMPORTANT NOTE : our office is not responsible for reviewing this form for the dependent(s), although we will \\nenclose the form in the H1B petition packet to be submitted to USCIS, so the H1B beneficiary’s dependent(s) \\nmust rea d and follow the filing instructions closely and make sure the form is fully completed and signed by the \\ndependent in blue ink. The dependent(s) must complete the I -539 in his/her/their name(s). The H -1B principal', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 12}),\n", 110 | " Document(page_content='alien is not the applicant on form I -539; the applicant(s) is (are) the dependent(s). \\n \\nFORM I -539 and Instructions CAN BE OBTAINED ON THE USCIS WEB SITE AT : www.uscis.gov \\n \\n• Click on “Forms” tab at the top of the page \\n• Scroll down to Form I -539 \\n \\nThe following ite ms constitute a completed I -539 packet for dependents: \\n \\n• Form I -539, filled out by and in the name of the spouse, or, if there is no spouse , in the name of the first \\n Visa dependent (original plus one copy); \\n \\n• A completed \"I -539A if there is more than one visa dependent (original plus one copy); \\n \\n• Two copies of all Forms I -94 of all visa dependents (the most recent I-94 admission number/record, which \\nis proof of legal visitor status, can be downloaded from https://i94.cbp.dhs.gov/I94/#/home. \\n \\n• Two cop ies* of marriage license (for spouse only) and birth certificates (for children only); and', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 12}),\n", 111 | " Document(page_content='• A check to \"Dept . of Homeland Security\" for $ 370. 00 and a check of $85 for each dependent include on \\n the I-539. \\n \\nProof of relationships: \\nIf the marria ge license and/or birth certificate(s) are in a foreign language, a certified translation must be \\nattached. \\n \\nA certified translation is one on which the translator has written, \"I certify that I am competent in both the English \\nand _____ languages and th at this is a true and accurate translation of the attached document.\" The translator \\nthen signs and dates this statement in the presence of a notary public, who then notarizes the signature.', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 12}),\n", 112 | " Document(page_content='14 \\n \\nREQUIRED SUPPORTING DOCUMENTS FOR H -1B PETITIONS \\n(The l isted documents must be submitted by the department to ISSS with the completed Internal H1B Request \\nForm as a package.) \\n1. Strong letter of support from department chair (See sample letter on next Page) . Address letter to: \\n U.S. Citizenship and Immigra tion Services, California Service Center, Laguna Niguel, CA 92607 \\n Letter should include at least the following : \\n(a) Title of position, exact salary as specified on AAUP contract, and inclusive appointment dates for the current \\nH-1B petition (letter must state that the appointment is temporary); \\n(b) Position description in general terms, e.g., responsibilities include biomedical research and writing papers , the \\nspecific academic credentials required for it, and an explanation as to why these specific cred entials are required; \\n(c) Explanation of how the individual is considered to have a \"specialty occupation.\" (This is the language of the', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 13}),\n", 113 | " Document(page_content=\"regulations.) The explanation should be more than a perfunctory statement, and can address the way in which the \\nunique talents of the individual will help the department to meet its specific needs; \\n(d) Original signature of department chair (on at least one copy of the letter). \\n2. Individual's C.V./Resume. \\n3. Photocopies of the highest degree diploma: \\nIf the di ploma is from a foreign university, you must obtain a “credential evaluation” which certifies it is \\nequivalent to a U.S. Ph.D. (USCIS frequently returns H -1B petitions submitted with a foreign degree but without \\na credential evaluation, and this can delay the petition approval process by several weeks or more. Following are \\nlinks for 3 Credit Evaluation Services : \\n http://www.evaluationservice.net/ http://naces.org/ https://www.wes.org/ \\nNote: if the Ph.D. diploma has not yet been awarded , you may submit a certified copy of the Master's diploma\", metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 13}),\n", 114 | " Document(page_content=\"PLUS a letter with original signature from the registrar at the Ph.D. -granting instit ution stating that all degree \\nrequirements have been completed and indicating the expected date of conferral of the Ph.D. degree. \\n4. Photocopy of Dean’s letter offering the position to the H1B worker . \\n5. Official job description from the Dean’s office , Director, or HR \\n6. If the individual is already in the U.S ., attach 2 photocopies of the forms in the individual's possession --this \\nshould be at least a little white card in the passport or a computer -generated I -94 printout which can be downloaded \\nat www.cbp.gov/I94 ); (b) the individual’s passport picture and expiration page (these are usually on the same page \\nbut may be separate) plus 2 copies of items listed under the one applicable bullet below : \\n\\uf0a8 If individual is currently in F -l student status , attach photocopy of front and back of his/her Form I -20 and, if on\", metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 13}),\n", 115 | " Document(page_content='authorized Practical Training, a photocopy of the Employment Authorization Document (EAD, small ID -sized card) \\n\\uf0a8 If FN is currently in J -1 status , attach photocopies of all DS -2019 forms in his/her possession and a copy of the \\nJ-1 visa stamp from the passport. Also, if the J -1 is subject to the 2 -year home country residence, provide a copy \\nof the waiver of this requirement from the USCIS. (If wai ver is required but not yet obtained, contact our office \\nimmediately, as obtaining a waiver of the 2 -year requirement can be a lengthy process.) \\n\\uf0a8 If the individual is currently in H -1B status with another employer , attach a copy of his/her current & all pre vious \\nH1B Approval s (Form I -797) AND copies of his/her most recent 3 pay stubs (as proof that s/he is still employed.) \\n\\uf0a8 If the individual is currently in H -4 status, attach a copy of the H -1B principal’s documents as listed immediately', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 13}),\n", 116 | " Document(page_content='above (under “If the individual is currently in H -1B status with another employer”) plus a copy of the marriage \\nlicense documenting the alien and his/her H -1B spouse are legal spouses, copies of all H -4 Notices of Approval \\nissued by USCIS to the H -4 individual and dependent(s) passport bio data page showing the expiration date \\n6. If H1B status is being requested for a period beyond the employment end date stated in the appointment letter , \\na statement from the Chair confirming that funding is guaranteed to be available for thi s position for the entire \\nperiod requested, and confirming that the department is aware it must notify ISSS if the H1B employee leaves the \\nposition before the end date requested on the petition.', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 13}),\n", 117 | " Document(page_content='15 \\n SAMPLE LETTER IN SUPPORT OF H1B PETITION \\n (Please Print out on Department letterhead and Sign in Blue Ink) \\nUSCIS, California Service Center \\nAttn: Cap Exempt H -1B Processing Unit \\n24000 Avila Road, Room 2312 \\nLaguna Niguel, CA 92677 \\n \\nRe: Dr. Doe’s H -1B Petition \\n \\nTo Whom It May Concern: \\n \\nThis letter is submitted in support of the H1B petition of Rutgers University for Dr. Doe, who has \\nbeen hired as a (official RU job title) in (name of hiring dept.) on a temporary basis. The intended \\nperiod of H1B employment is from (intended H1B start date ) to (intended H1B end date) , with an annual \\nsalary of $ annual salary . \\n \\nRutgers, an innovative and rapidly growing public university, is the largest institution in New Jersey \\nand one of the oldest schools in the nation. Rutgers’ hundreds of undergraduate and graduate programs \\nof study provide something for everyone, including the natural, physical and social sciences, the', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 14}),\n", 118 | " Document(page_content='liberal, fine and performing arts, business, engineering and everything in between. Rutgers is also \\nknown for its highly varied graduat e and undergraduate research activities, which encompass everything \\nfrom cancer prevention to evolutionary studies to studies in preschool education. \\n \\nDr. Doe’s responsibilities will include teaching of both undergraduate and graduate courses , as well \\nas conducting research in xx. Because of Dr. Doe’s broad background in research and teaching in the \\narea of xx, we believe he will make outstanding contributions to the Department of Y, enhancing Rutgers \\nreputation as a leader in this area . \\n \\nDr. Doe is most highly qualified for a xx position at Rutgers. He /She received his /her Ph.D. from \\nxx Institute in year. He/She was employed as a xx at ___XX___University for the past three years , \\nwhere he /she worked closely with leading scientists in the field of XX and YY . Dr. Doe also xx students', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 14}),\n", 119 | " Document(page_content='YY courses and received outstanding evaluations. He/She has written numerous articles in professional \\njournals and has been cited by other scholars in the field. Dr. Doe has presented his /he research \\nwork at two major conferences in the United Sates and has been invited to speak at xxx \\n \\n In conclusion, Dr. Joe will be an asset to the Department of Y . Rutgers will benefit significantly \\nby having Dr. Joe continue h er/his research at Rutgers on an H -1B1 visa. Please contact me if you \\nrequire further information. \\n \\nWe intend to employ Dr. Joe for an initial period of number of years in the position offered. Our \\ndepartment will bear responsibilities for reasonable costs of return transportation abroad of Dr. \\nJoe should he /she be dismiss ed from employment before the expiration of the H1B petition. Thank you \\nfor your assistance and cooperation in processing this request. \\n \\nSincerely, \\nSignature of Hiring official (Department Chair or Dean)', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 14}),\n", 120 | " Document(page_content='16 \\n \\n \\n \\n \\n \\n \\n \\n \\n. \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n o Request H -1B \\nPacket from \\nISSS \\no Gather \\nsupporting \\ndocuments \\no Submit \\nsupporting \\ndocuments & \\n1-pg \\nworksheet \\nfor LCA and \\nfiling fees to \\nISSS Hiring \\nDepartment Int’l Student & Scholar \\nServices \\no Review dept request for eligibility, create \\nH1B file and enter data in ISSS data base \\no File LCA with Dept. of Labor (needs 7-10 \\ndays processing time) \\no Review supporting documents submitted \\nby hiring unit \\no Review and sign DOL certified LCA & send \\nnotification with copy of LCA to Union \\no Complete all USCIS forms \\no Assemble and submit petition packe t to \\nUSCIS on behalf of dept. \\no Send copies of processed USCIS forms and \\nLCA to dept. /FN with handling \\ninstructions \\no Set up Public Access File (PAF) for each \\nH1B FN \\no Follow up/monitor case status with USCIS \\nand respond to Request for More \\nEvidence when require d \\no Notify dept. and/or H1B FN upon', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 15}),\n", 121 | " Document(page_content=\"receiving Receipt Notice or Approval \\nNotice \\n o Maintain forms and instructions on \\nISSS website \\no Revise process and forms periodically \\nand as required by USCIS and other \\ngovernment agencies \\no Schedule \\nH1B FN \\ncheck -in \\nand \\norientation \\nsession with \\nISSS H1B Visa Process \\no Conduct weekly new H1B check -in & \\norientation \\no Maintain and update PAF files and stay \\nprepared for Public Inspection \\nand/or DOL/USCIS audits \\no Report any changes to H -1B \\nFN's terms of employment \\nto USCIS if applicable. May \\nrequire filing an amende d \\npetition \\no As per the DOL \\nrequirement, all H -1B FNs \\nPAFs must be maintained \\nfor 1 year beyond the \\ntermination of their \\nemployment. ISSS will take \\nover this responsibility. \\n o Dept. to \\nnotify ISSS of \\nany changes \\nto H -1B FN's \\nterms of \\nemployment \\nincluding \\nextensions, \\npromotions , \\nand early \\nterminations\", metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 15})]" 122 | ] 123 | }, 124 | "execution_count": 22, 125 | "metadata": {}, 126 | "output_type": "execute_result" 127 | } 128 | ], 129 | "source": [ 130 | "docs" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 23, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "name": "stdout", 140 | "output_type": "stream", 141 | "text": [ 142 | "61\n" 143 | ] 144 | } 145 | ], 146 | "source": [ 147 | "from langchain_community.vectorstores import FAISS\n", 148 | "\n", 149 | "embeddings = OpenAIEmbeddings()\n", 150 | "db = FAISS.from_documents(docs, embeddings)\n", 151 | "print(db.index.ntotal)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 24, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "db.save_local(\"db\")" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 27, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "query = \"give me Email ISSS\"\n", 170 | "docs = db.similarity_search(query)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 30, 176 | "metadata": {}, 177 | "outputs": [ 178 | { 179 | "data": { 180 | "text/plain": [ 181 | "[Document(page_content='2 \\n The First Step: E -mail ISSS for an H -1B Request Packet \\n \\nE-mail ISSS at gantonatos@global.rutgers.edu for a request packet. In the subject line , write, “H -1B Packet \\nneeded .” In the body of the message, provide all of the following information : \\n\\uf0b7 contact in sponsoring unit: name, phone number & e-mail address to which H1B packet should be sent \\n\\uf0b7 the name of the alien beneficiary for whom H -1B petition will be filed \\n\\uf0b7 type of appointme nt (position title) you are offering the alien \\n\\uf0b7 geographic location where actual work will occur (“on campus” or, if off -campus, provide city and state) \\n\\uf0b7 name of sponsoring unit \\n\\uf0b7 please check all that apply to the alien beneficiary: \\n____current Rutgers emplo yee \\n____not yet a Rutgers employee \\n____currently in the U.S. in H -1B status \\n____currently in the U.S. in a nonimmigrant status other than H -1B (What status ? ____________)', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 1}),\n", 182 | " Document(page_content='4. ISSS will provide the hiring unit and the FN with a copy of the completed I-129 petition form , \\nDOL -certified LCA as soon as it is available and also notify the AAUP of the LCA filing. \\n \\n5. Departments are responsible for ensuring that H -1B employees going on Rutgers payroll for the first \\ntime attend a \"check -in/orientation session \" at ISSS as soon as possible after the employee has arrived \\nat Rutgers . The workshop is offered weekly on Thursday at 2:45 p.m. and requires adv ance sign -up by emailing \\nISSS at gan tonatos@global.rutgers.edu. For employees whose H -1B status is being extended, the workshop is \\nnot necessary. \\n \\n6. Federal regulations change regularly and ISSS routinely updates the H -1B instruction packet. Please \\nread all of ISSS ’ H-1B instructi ons each time you file an H -1B petition. Procedures and forms may be', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 2}),\n", 183 | " Document(page_content=\"complete H -1B packets to ISSS as early as 6 months prior to the intended starting date of the H -1B petition, in \\norder to avoid both the USCIS $1,410 Premium Processing fee and the ISSS late fee of $200 . There is also an \\noption for expedited processing service at ISSS , which requires a fee of $300. ISSS tries to process the request \\nwithin 2 -3 weeks if everything is in order . Please see our Late Fee Schedule . \\n \\nH-1B P ETITION FOR FACULTY APPOINTMENT \\n(INSTRUCTION PACKET FOR EMPLOYING DEPARTMENTS ) \\nTable of Contents \\nDepartments' Legal Responsibilities……………………………………………………………………… ..…3 \\nIntroduction to Each of the Required Forms in this Packet ………………………. ……………….... .........4 \\n Form #1: Worksheet for Labor Condition Application ( LCA) …………………………………….……. 5-6 \\n Form #2: “Deemed Export Certification for H -1B Petitions” ………………………………………….... 7-8 \\n Form #3: H-1B Scholar Information Shee t (Required)………………. …………………………… ….... 9-10\", metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 1}),\n", 184 | " Document(page_content='\\uf0a8 Follow instructions on bottom of the Worksheet itself in order to ob tain approval from Academic Labor \\n Relations (ALR) prior to submitting it to ISSS with the rest of the H -1B packet. \\n\\uf0a8 ISSS then generates (online) an actual LCA and submits it to Department of Labor ( DOL ). Upon DOL’s \\ncertification, ISSS will be able to fi le the H1B petition packet to USCIS for adjudication.', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 5})]" 185 | ] 186 | }, 187 | "execution_count": 30, 188 | "metadata": {}, 189 | "output_type": "execute_result" 190 | } 191 | ], 192 | "source": [ 193 | "docs" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 29, 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "data": { 203 | "text/plain": [ 204 | "Document(page_content='2 \\n The First Step: E -mail ISSS for an H -1B Request Packet \\n \\nE-mail ISSS at gantonatos@global.rutgers.edu for a request packet. In the subject line , write, “H -1B Packet \\nneeded .” In the body of the message, provide all of the following information : \\n\\uf0b7 contact in sponsoring unit: name, phone number & e-mail address to which H1B packet should be sent \\n\\uf0b7 the name of the alien beneficiary for whom H -1B petition will be filed \\n\\uf0b7 type of appointme nt (position title) you are offering the alien \\n\\uf0b7 geographic location where actual work will occur (“on campus” or, if off -campus, provide city and state) \\n\\uf0b7 name of sponsoring unit \\n\\uf0b7 please check all that apply to the alien beneficiary: \\n____current Rutgers emplo yee \\n____not yet a Rutgers employee \\n____currently in the U.S. in H -1B status \\n____currently in the U.S. in a nonimmigrant status other than H -1B (What status ? ____________)', metadata={'source': './data/H-1B Filing Instructions.pdf', 'page': 1})" 205 | ] 206 | }, 207 | "execution_count": 29, 208 | "metadata": {}, 209 | "output_type": "execute_result" 210 | } 211 | ], 212 | "source": [ 213 | "docs[0]" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": {}, 220 | "outputs": [], 221 | "source": [] 222 | } 223 | ], 224 | "metadata": { 225 | "kernelspec": { 226 | "display_name": "env", 227 | "language": "python", 228 | "name": "python3" 229 | }, 230 | "language_info": { 231 | "codemirror_mode": { 232 | "name": "ipython", 233 | "version": 3 234 | }, 235 | "file_extension": ".py", 236 | "mimetype": "text/x-python", 237 | "name": "python", 238 | "nbconvert_exporter": "python", 239 | "pygments_lexer": "ipython3", 240 | "version": "3.12.2" 241 | } 242 | }, 243 | "nbformat": 4, 244 | "nbformat_minor": 2 245 | } 246 | -------------------------------------------------------------------------------- /research/utils.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "\u001b[32m2024-07-08 20:00:02.456\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m3\u001b[0m - \u001b[34m\u001b[1mThat's it, beautiful and simple logging!\u001b[0m\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "from loguru import logger\n", 18 | "\n", 19 | "logger.debug(\"That's it, beautiful and simple logging!\")" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 5, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stderr", 29 | "output_type": "stream", 30 | "text": [ 31 | "\u001b[32m2024-07-08 20:00:27.033\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m1\u001b[0m - \u001b[1mFaijan\u001b[0m\n", 32 | "\u001b[32m2024-07-08 20:00:27.033\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m3\u001b[0m - \u001b[33m\u001b[1mThis is a warning message\u001b[0m\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "logger.info(\"Faijan\")\n", 38 | "\n", 39 | "logger.warning(\"This is a warning message\")" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "name": "stdout", 49 | "output_type": "stream", 50 | "text": [ 51 | "Collecting loguru\n", 52 | " Downloading loguru-0.7.2-py3-none-any.whl.metadata (23 kB)\n", 53 | "Requirement already satisfied: colorama>=0.3.4 in c:\\users\\faiza\\music\\llmresearch\\rag\\env\\lib\\site-packages (from loguru) (0.4.6)\n", 54 | "Collecting win32-setctime>=1.0.0 (from loguru)\n", 55 | " Downloading win32_setctime-1.1.0-py3-none-any.whl.metadata (2.3 kB)\n", 56 | "Downloading loguru-0.7.2-py3-none-any.whl (62 kB)\n", 57 | " ---------------------------------------- 0.0/62.5 kB ? eta -:--:--\n", 58 | " ---------------------------------------- 62.5/62.5 kB 1.7 MB/s eta 0:00:00\n", 59 | "Downloading win32_setctime-1.1.0-py3-none-any.whl (3.6 kB)\n", 60 | "Installing collected packages: win32-setctime, loguru\n", 61 | "Successfully installed loguru-0.7.2 win32-setctime-1.1.0\n" 62 | ] 63 | }, 64 | { 65 | "name": "stderr", 66 | "output_type": "stream", 67 | "text": [ 68 | "\n", 69 | "[notice] A new release of pip is available: 24.0 -> 24.1.2\n", 70 | "[notice] To update, run: python.exe -m pip install --upgrade pip\n" 71 | ] 72 | } 73 | ], 74 | "source": [ 75 | "! pip install loguru" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 2, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "import queue\n", 85 | "\n", 86 | "# Create a queue\n", 87 | "\n", 88 | "q = queue.Queue()\n", 89 | "\n", 90 | "# Adding elements to the queue\n", 91 | "q.put(1)\n", 92 | "\n", 93 | "q.put(2)\n", 94 | "\n", 95 | "q.put(1)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 3, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 9, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "data": { 112 | "text/plain": [ 113 | "2" 114 | ] 115 | }, 116 | "execution_count": 9, 117 | "metadata": {}, 118 | "output_type": "execute_result" 119 | } 120 | ], 121 | "source": [ 122 | "q.qsize()" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 11, 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "deque([1, 2])" 134 | ] 135 | }, 136 | "execution_count": 11, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | } 140 | ], 141 | "source": [ 142 | "q.queue" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 6, 148 | "metadata": {}, 149 | "outputs": [ 150 | { 151 | "data": { 152 | "text/plain": [ 153 | "2" 154 | ] 155 | }, 156 | "execution_count": 6, 157 | "metadata": {}, 158 | "output_type": "execute_result" 159 | } 160 | ], 161 | "source": [ 162 | "q.get_nowait()" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 7, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "data": { 172 | "text/plain": [ 173 | "True" 174 | ] 175 | }, 176 | "execution_count": 7, 177 | "metadata": {}, 178 | "output_type": "execute_result" 179 | } 180 | ], 181 | "source": [ 182 | "q.empty()" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [] 191 | } 192 | ], 193 | "metadata": { 194 | "kernelspec": { 195 | "display_name": "env", 196 | "language": "python", 197 | "name": "python3" 198 | }, 199 | "language_info": { 200 | "codemirror_mode": { 201 | "name": "ipython", 202 | "version": 3 203 | }, 204 | "file_extension": ".py", 205 | "mimetype": "text/x-python", 206 | "name": "python", 207 | "nbconvert_exporter": "python", 208 | "pygments_lexer": "ipython3", 209 | "version": "3.12.2" 210 | } 211 | }, 212 | "nbformat": 4, 213 | "nbformat_minor": 2 214 | } 215 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Pull the model (only needs to be done once) 4 | docker exec -it ollamaa ollama pull llama3:8b 5 | 6 | # Run the command or script you need 7 | docker exec -it ollamaa ollama run nomic-embed-text 8 | 9 | # Attach to the bayesrag container 10 | docker attach bayesrag_cont 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r", encoding="utf-8") as f: 4 | long_description = f.read() 5 | 6 | __version__ = "0.0.0" 7 | 8 | REPO_NAME = "bayesrag" 9 | AUTHOR_USER_NAME = "faizack" 10 | SRC_REPO = "bayesrag" 11 | AUTHOR_EMAIL = "faizack619@gmail.com" 12 | 13 | setuptools.setup( 14 | name=SRC_REPO, 15 | version=__version__, 16 | author=AUTHOR_USER_NAME, 17 | author_email=AUTHOR_EMAIL, 18 | description="A python package for Local RAG app using Local LLM and local vector db", 19 | long_description=long_description, 20 | long_description_content_type="text/markdown", 21 | url=f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}", 22 | project_urls={ 23 | "Bug Tracker": f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}/issues", 24 | }, 25 | package_dir={"": "src"}, 26 | packages=setuptools.find_packages(where="src") 27 | ) 28 | -------------------------------------------------------------------------------- /src/bayesrag/__init__.py: -------------------------------------------------------------------------------- 1 | # import os 2 | # import sys 3 | # import logging 4 | 5 | # logging_str = "[%(asctime)s: %(levelname)s: %(module)s: %(message)s]" 6 | 7 | # log_dir = "logs" 8 | # log_filepath = os.path.join(log_dir,"running_logs.log") 9 | # os.makedirs(log_dir, exist_ok=True) 10 | 11 | 12 | # logging.basicConfig( 13 | # level= logging.INFO, 14 | # format= logging_str, 15 | 16 | # handlers=[ 17 | # logging.FileHandler(log_filepath), 18 | # logging.StreamHandler(sys.stdout) 19 | # ] 20 | # ) 21 | 22 | # logger = logging.getLogger("RAGLogger") -------------------------------------------------------------------------------- /src/bayesrag/config.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import os 3 | from dotenv import load_dotenv 4 | import uuid 5 | load_dotenv() 6 | 7 | DATA_DIR = Path(os.getenv("DATA_DIR", "./data")) 8 | OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "http://localhost:1234/v1") 9 | OPENAI_API_KEY = "lm-studio" 10 | QDRANT_HOST = os.getenv("QDRANT_HOST", "http://localhost:6333") 11 | ID=uuid.uuid4() 12 | # QDRANT_COLLECTION = os.getenv("QDRANT_COLLECTION", f"law_doc-{ID}") 13 | 14 | REPLAY_TOPIC = f"USER_TOPIC-{ID}" 15 | QDRANT_COLLECTION=f"law_doc-{ID}" 16 | 17 | 18 | # IPFS Config 19 | 20 | IPFS_RETRY_LIMIT = 3 21 | IPFS_RETRY_DELAY = 5 22 | 23 | ## Directory for IPFS 24 | directory_path = f"./qdrant_data/collections/global_data" 25 | zip_file_path = "./qdrant_data_backup.zip" 26 | 27 | download_path = "./zipdownloaded_files/qdrant_data_backup.zip" 28 | extract_to_path = "./qdrant_data/collections/global_data" -------------------------------------------------------------------------------- /src/bayesrag/constant.py: -------------------------------------------------------------------------------- 1 | SEND_TOPIC="RAG/Query1" 2 | RECEVICE_TOPIC="RAG/#" 3 | 4 | # Aggregations Topic 5 | AGG_SEND_TOPIC="AGG/SEND" 6 | AGG_RECEIVE_TOPIC="AGG/#" -------------------------------------------------------------------------------- /src/bayesrag/data_loader.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from langchain_community.document_loaders import PyPDFDirectoryLoader,PyPDFLoader 3 | 4 | def load_directory_pdf(directory_path: Path): 5 | loader = PyPDFDirectoryLoader(directory_path) 6 | docs = loader.load() 7 | return docs 8 | 9 | -------------------------------------------------------------------------------- /src/bayesrag/embedder.py: -------------------------------------------------------------------------------- 1 | import ollama 2 | 3 | def get_embedding(text, model='nomic-embed-text'): 4 | return ollama.embeddings(model=model, prompt=text)['embedding'] 5 | 6 | if __name__ == '__main__': 7 | text = "Hello, I am learning OpenAI's LLM" 8 | embedding = get_embedding(text) 9 | print(embedding) -------------------------------------------------------------------------------- /src/bayesrag/evaluator.py: -------------------------------------------------------------------------------- 1 | # from datasets import Dataset 2 | # from ragas.metrics import context_precision, answer_relevancy 3 | # from ragas import evaluate 4 | 5 | # def evaluate_response(user_query, llm_response, context): 6 | # data_samples = { 7 | # 'question': [user_query], 8 | # 'answer': [llm_response], 9 | # 'contexts': [[context]], 10 | # } 11 | # dataset = Dataset.from_dict(data_samples) 12 | # # if ground_truth: 13 | # # data_samples['ground_truth'] = [ground_truth] 14 | 15 | # # metrics = [context_precision] if ground_truth else [answer_relevancy] 16 | # metrics = [answer_relevancy] 17 | # score = evaluate(dataset, metrics=metrics) 18 | # return score.to_pandas() 19 | 20 | 21 | from deepeval.metrics import ContextualRelevancyMetric 22 | from deepeval.test_case import LLMTestCase 23 | from bayesrag.llmEvaluator import customLM 24 | 25 | from deepeval import evaluate 26 | 27 | def deepEvalutor(user_query: str,generated_response:str,context: list[str]): 28 | # Evaluate the output using Contextual Relevancy Metric 29 | metric = ContextualRelevancyMetric( 30 | threshold=0.7, 31 | model=customLM(), 32 | include_reason=True 33 | ) 34 | 35 | test_case = LLMTestCase( 36 | input=user_query, 37 | actual_output=generated_response, 38 | retrieval_context=context 39 | ) 40 | evaluation = metric.measure(test_case) 41 | 42 | result=evaluate([test_case], [metric]) 43 | score,reason=result.test_results[0].metrics_data[0].score,result.test_results[0].metrics_data[0].reason 44 | 45 | evaluation_results = { 46 | "score": score, 47 | "reason": reason 48 | } 49 | 50 | return evaluation_results -------------------------------------------------------------------------------- /src/bayesrag/generator.py: -------------------------------------------------------------------------------- 1 | from bayesrag.utils import ClassificationResult 2 | import ollama 3 | 4 | import json 5 | 6 | # from openai import OpenAI 7 | # from bayesrag.config import OPENAI_BASE_URL, OPENAI_API_KEY 8 | # client = OpenAI(base_url=OPENAI_BASE_URL, api_key=OPENAI_API_KEY) 9 | 10 | 11 | def generate_response(user_query, context,model='llama3:8b'): 12 | prompt_template = f""" You are a Lawyer. Response only related Law Question. do not use your knowledge use below context to get information 13 | 14 | Here is the question: {user_query} 15 | 16 | Additional context to support the answer: {context} 17 | """ 18 | 19 | system_prompt = "You are a helpful assistant that handles user queries and provides answers using the given context without external information." 20 | 21 | response = ollama.chat(model=model, messages=[ 22 | { 23 | "role": "system", "content": system_prompt, 24 | 'role': 'user', 25 | 'content': prompt_template, 26 | }, 27 | 28 | ],stream=True) 29 | 30 | for chunk in response: 31 | if chunk['message']['content'] is not None: 32 | yield chunk['message']['content'] 33 | 34 | 35 | def classify_query(user_query) -> ClassificationResult: 36 | """ 37 | Classifies a user query related to law and returns a ClassificationResult enum. 38 | 39 | Args: 40 | user_query: The user's question. 41 | 42 | Returns: 43 | ClassificationResult.YES if the query is classified as a law-related question, 44 | ClassificationResult.NO if it's not a law-related question, 45 | ClassificationResult.ERROR if there's an error parsing the response. 46 | """ 47 | 48 | system_prompt = """ 49 | You are a Lawyer. Classify the following question related to Law and always give a response in JSON format as {"results": "yes/no"} without giving any reason in the response. 50 | """ 51 | 52 | response = ollama.chat( 53 | model='llama3:8b', 54 | messages=[ 55 | {"role": "system", "content": system_prompt}, 56 | {"role": "user", "content": user_query} 57 | ], 58 | format="json", 59 | ) 60 | 61 | # Parse the response into JSON 62 | response_json = response['message']['content'] 63 | print(response_json) 64 | try: 65 | response_dict = json.loads(response_json) 66 | result = response_dict["results"].lower() 67 | if result == "yes": 68 | print("Yes it related to law") 69 | return ClassificationResult.YES 70 | elif result == "no": 71 | print("No it not related to law") 72 | return ClassificationResult.NO 73 | else: 74 | print("Unexpected result in classification") 75 | return ClassificationResult.ERROR # Handle unexpected results 76 | except json.JSONDecodeError: 77 | print(f"Error parsing JSON response: {response_json}") 78 | return ClassificationResult.ERROR 79 | 80 | 81 | 82 | if __name__ == "__main__": 83 | 84 | user_query = "how to get h1b visa" 85 | 86 | 87 | result=classify_query(user_query) 88 | 89 | if result ==ClassificationResult.NO: 90 | for text in generate_response(user_query,None): 91 | print(text,end="") 92 | else: 93 | print("Find from vector Database") -------------------------------------------------------------------------------- /src/bayesrag/ipfs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | import ipfshttpclient 4 | import time 5 | from loguru import logger 6 | from bayesrag.config import download_path, extract_to_path, directory_path, zip_file_path, IPFS_RETRY_LIMIT, IPFS_RETRY_DELAY 7 | 8 | 9 | class IPFSManager: 10 | def __init__(self, ipfs_address=None, retry_limit=IPFS_RETRY_LIMIT, retry_delay=IPFS_RETRY_DELAY): 11 | 12 | self.ipfs_address = ipfs_address or os.getenv('IPFS_ADDRESS', '/ip4/127.0.0.1/tcp/5001/http') 13 | self.retry_limit = retry_limit 14 | self.retry_delay = retry_delay 15 | self.client = self._connect_to_ipfs(self.ipfs_address) 16 | 17 | def _connect_to_ipfs(self, ipfs_address): 18 | """Connect to the IPFS client with retry logic.""" 19 | for attempt in range(self.retry_limit): 20 | try: 21 | ipfs_conn_obj = ipfshttpclient.connect(ipfs_address) 22 | logger.info("Connected to IPFS") 23 | return ipfs_conn_obj 24 | except Exception as e: 25 | logger.error(f"Error during IPFS connection attempt {attempt + 1}/{self.retry_limit}: {e}") 26 | if attempt < self.retry_limit - 1: 27 | time.sleep(self.retry_delay) 28 | else: 29 | logger.critical("Unable to connect to IPFS after multiple attempts.") 30 | raise 31 | 32 | def _validate_path(self, path, path_type="directory"): 33 | """Validate the existence of a file or directory.""" 34 | if path_type == "directory" and not os.path.isdir(path): 35 | logger.error(f"Directory {path} does not exist or is not accessible.") 36 | raise FileNotFoundError(f"Directory {path} not found.") 37 | elif path_type == "file" and not os.path.isfile(path): 38 | logger.error(f"File {path} does not exist or is not accessible.") 39 | raise FileNotFoundError(f"File {path} not found.") 40 | 41 | def _zip_directory(self, directory_path, zip_file_path): 42 | """Compress a directory into a zip file.""" 43 | self._validate_path(directory_path, "directory") 44 | 45 | logger.info(f"Creating zip file at: {zip_file_path}") 46 | try: 47 | with zipfile.ZipFile(zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zipf: 48 | for root, _, files in os.walk(directory_path): 49 | for file in files: 50 | file_path = os.path.join(root, file) 51 | zipf.write(file_path, os.path.relpath(file_path, directory_path)) 52 | logger.info(f"Zip file created successfully: {zip_file_path}") 53 | return zip_file_path 54 | except Exception as e: 55 | logger.error(f"Error while zipping directory: {e}") 56 | raise 57 | 58 | def _unzip_file(self, zip_file_path, extract_to_path): 59 | """Extract a zip file to a specific directory.""" 60 | self._validate_path(zip_file_path, "file") 61 | 62 | logger.info(f"Unzipping file: {zip_file_path} to {extract_to_path}") 63 | try: 64 | with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: 65 | zip_ref.extractall(extract_to_path) 66 | logger.info(f"Unzipped file successfully to: {extract_to_path}") 67 | except Exception as e: 68 | logger.error(f"Error while unzipping file: {e}") 69 | raise 70 | 71 | def upload_directory(self, directory_path=directory_path): 72 | """Automatically zip and upload a directory to IPFS.""" 73 | logger.info(f"Zipping and uploading directory: {directory_path}") 74 | try: 75 | zip_file = self._zip_directory(directory_path, zip_file_path) 76 | return self.upload_file(zip_file) 77 | except Exception as e: 78 | logger.error(f"Error during upload: {e}") 79 | return None 80 | 81 | def upload_file(self, file_path): 82 | """Upload a file to IPFS.""" 83 | self._validate_path(file_path, "file") 84 | 85 | for attempt in range(self.retry_limit): 86 | try: 87 | logger.info(f"Uploading file: {file_path}") 88 | result = self.client.add(file_path) 89 | logger.info(f"Upload successful. IPFS result: {result}") 90 | return result 91 | except Exception as e: 92 | logger.error(f"Error during file upload attempt {attempt + 1}/{self.retry_limit}: {e}") 93 | if attempt < self.retry_limit - 1: 94 | time.sleep(self.retry_delay) 95 | else: 96 | logger.critical("Failed to upload file after multiple attempts.") 97 | return None 98 | 99 | def download_and_extract(self, file_hash, download_path, extract_to_path): 100 | """Download a zip file from IPFS and extract its contents.""" 101 | logger.info(f"Starting download of IPFS hash: {file_hash}") 102 | 103 | os.makedirs(os.path.dirname(download_path), exist_ok=True) 104 | 105 | for attempt in range(self.retry_limit): 106 | try: 107 | file_content = self.client.cat(file_hash) 108 | with open(download_path, 'wb') as file: 109 | file.write(file_content) 110 | logger.info(f"Downloaded and saved file: {download_path}") 111 | 112 | # Unzip the downloaded file 113 | self._unzip_file(download_path, extract_to_path) 114 | return 115 | except Exception as e: 116 | logger.error(f"Error during download attempt {attempt + 1}/{self.retry_limit}: {e}") 117 | if attempt < self.retry_limit - 1: 118 | time.sleep(self.retry_delay) 119 | else: 120 | logger.critical("Failed to download and extract file after multiple attempts.") 121 | return None 122 | 123 | 124 | # Usage example (to be run only in a non-production environment): 125 | if __name__ == "__main__": 126 | # Initialize IPFSManager with a retry limit and delay 127 | ipfs_manager = IPFSManager() 128 | 129 | # Zip and upload directory 130 | upload_result = ipfs_manager.upload_directory(directory_path) 131 | 132 | # If upload was successful, download and extract the file 133 | if upload_result: 134 | file_hash = upload_result['Hash'] 135 | ipfs_manager.download_and_extract(file_hash, download_path, extract_to_path) 136 | else: 137 | logger.critical("Upload failed, no file to download.") 138 | -------------------------------------------------------------------------------- /src/bayesrag/llmEvaluator.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from deepeval.models import DeepEvalBaseLLM 4 | from ollama import Client 5 | from ollama import ChatResponse 6 | from deepeval import evaluate 7 | 8 | 9 | 10 | class customLM(DeepEvalBaseLLM): 11 | def __init__(self, url="http://localhost:11434",model="llama3:8b"): 12 | self.model = Client(host=url) 13 | self.model_name = model 14 | 15 | def load_model(self, *args, **kwargs) -> Client: 16 | return self.model 17 | 18 | def generate(self, prompt: str) -> str: 19 | client = self.load_model() 20 | completion:ChatResponse = client.chat( 21 | model=self.model_name, 22 | messages=[ 23 | {"role": "system", "content": "Your helpful AI for Evaluation"}, 24 | {"role": "user", "content": prompt} 25 | ], 26 | # format="json" 27 | ) 28 | return completion['message']["content"] 29 | 30 | async def a_generate(self, prompt: str) -> str: 31 | # Use asyncio.to_thread to run the blocking generate method in a separate thread 32 | return self.generate(prompt=prompt) 33 | 34 | def get_model_name(self): 35 | return self.model_name 36 | 37 | from deepeval.metrics import ContextualRelevancyMetric 38 | from deepeval.test_case import LLMTestCase 39 | # from bayesrag.llmEvaluator import customLM 40 | 41 | def deepEvalutor(user_query: str,generated_response:str,context: list[str]): 42 | # Evaluate the output using Contextual Relevancy Metric 43 | metric = ContextualRelevancyMetric( 44 | threshold=0.7, 45 | model=customLM(), 46 | include_reason=True 47 | ) 48 | 49 | test_case = LLMTestCase( 50 | input=user_query, 51 | actual_output=generated_response, 52 | retrieval_context=context 53 | ) 54 | evaluation = metric.measure(test_case) 55 | 56 | result=evaluate([test_case], [metric]) 57 | score,reason=result.test_results[0].metrics_data[0].score,result.test_results[0].metrics_data[0].reason 58 | 59 | evaluation_results = { 60 | "score": score, 61 | "reason": reason 62 | } 63 | 64 | return evaluation_results 65 | 66 | 67 | if __name__=="__main__": 68 | 69 | c = customLM() 70 | print(c.generate("Hey")) 71 | 72 | -------------------------------------------------------------------------------- /src/bayesrag/mq.py: -------------------------------------------------------------------------------- 1 | import paho.mqtt.client as mqtt 2 | import time 3 | import json 4 | 5 | from bayesrag.retriever import get_context 6 | from bayesrag.constant import RECEVICE_TOPIC,AGG_RECEIVE_TOPIC,AGG_SEND_TOPIC 7 | 8 | from bayesrag.ipfs import IPFSManager 9 | import queue 10 | from loguru import logger 11 | 12 | 13 | class Mqttclient: 14 | def __init__(self, broker_address="mqtt.eclipseprojects.io", broker_port=1883,replyTopic="USER_TOPIC-",isAdmin=False): 15 | self.broker_address = broker_address 16 | self.broker_port = broker_port 17 | self.replyTopic=replyTopic # Topic to which the response will be sent. 18 | self.ADMIN_NODE=isAdmin 19 | self.ipfs=IPFSManager() 20 | self.reply_queue = queue.Queue() 21 | self.client = mqtt.Client(mqtt.CallbackAPIVersion.VERSION2) 22 | self.client.on_connect = self.on_connect 23 | self.client.on_message = self.on_message 24 | self.client.connect(self.broker_address, self.broker_port) 25 | self.client.loop_start() 26 | 27 | 28 | def on_connect(self,client, userdata, flags, reason_code, properties): 29 | print(f"Connected with result code {reason_code}") 30 | client.subscribe(RECEVICE_TOPIC) 31 | client.subscribe(self.replyTopic) 32 | if self.ADMIN_NODE: 33 | client.subscribe(AGG_RECEIVE_TOPIC) 34 | 35 | 36 | def subscribe(self, topic): 37 | self.client.subscribe(topic) 38 | 39 | # The callback for when a PUBLISH message is received from the server. 40 | def on_message(self,client, userdata, msg): 41 | print(msg.topic+" "+str(msg.payload)) 42 | if msg.topic == self.replyTopic: 43 | self.handle_reply(msg.payload) 44 | elif msg.topic == AGG_SEND_TOPIC: 45 | self.handle_vector_Message(msg.payload) 46 | else : 47 | self.handle_message(msg.payload) # Call the function to handle the received message. 48 | 49 | def handle_reply(self, data): 50 | # Compare each node reponse and take the highest accurancy 51 | data = json.loads(data) 52 | logger.warning(f"Queue size {self.reply_queue.qsize()}") 53 | # Process the reply here. 54 | # Need Triggered Method so i can send it to receiver so he get reponse information 55 | self.reply_queue.put(data) 56 | 57 | def handle_vector_Message(self, data): 58 | data = json.loads(data) 59 | print("Handle vector message") 60 | print("-"*100) 61 | source_embedding = [self.deserialize_record(record) for record in data.get("data")] 62 | logger.info("received vector message", source_embedding) 63 | # Process Vector Message 64 | from bayesrag.vector_db import VectorDB 65 | vectorDb = VectorDB() 66 | vectorDb.merge_embeddings(source_embedding) 67 | 68 | directory_path=f"./qdrant_data/collections/{vectorDb.collection_name}" 69 | 70 | # Upload to IPFS 71 | self.ipfs.upload_directory(directory_path) 72 | 73 | 74 | def deserialize_record(self, record): 75 | # Convert each record back to the original format 76 | from qdrant_client.models import Record 77 | return Record(id=record['id'], payload=record['payload'], vector=record['vector']) 78 | 79 | 80 | def serialize_record(self,record): 81 | # Convert each record to a serializable format (dict) 82 | return { 83 | 'id': record.id, 84 | 'payload': record.payload, 85 | 'vector': record.vector 86 | } 87 | 88 | 89 | def handle_message(self,data): 90 | data = json.loads(data) 91 | replayTopic=data.get('replay_topic') 92 | query=data.get('query') 93 | context,score=get_context(query) 94 | 95 | if context: 96 | data={"context":context,"score":score} 97 | self.send_message(replayTopic,data) 98 | 99 | def send_message(self,send_topic,payload:dict): 100 | payload = json.dumps(payload) # Convert the payload to JSON string before sending it. 101 | self.client.publish(send_topic, payload,qos=2) 102 | logger.info(f"Sent message: {payload}") 103 | 104 | def send_vector(self,scroll_result): 105 | Vect_Data = [self.serialize_record(record) for record in scroll_result[0]] 106 | data={"data":Vect_Data} 107 | payload = json.dumps(data) 108 | self.client.publish(AGG_SEND_TOPIC,payload) 109 | logger.info(f"Vector sent to admin: ") 110 | 111 | 112 | def stop(self): 113 | self.client.loop_stop() 114 | self.client.disconnect() 115 | 116 | 117 | 118 | 119 | if __name__ == "__main__": 120 | import uuid 121 | import argparse 122 | 123 | parser = argparse.ArgumentParser(description="Get node type information to send vector to admin") 124 | parser.add_argument("--collectionName", type=str, required=True,help="Name of the collection of Vector DB") 125 | parser.add_argument("--nodetype",type=str,help="Node Type") 126 | args=parser.parse_args() 127 | 128 | 129 | ID=uuid.uuid4() 130 | REPLAY_TOPIC = f"USER_TOPIC-{ID}" 131 | collections=args.collectionName 132 | logger.info(f"Collection Name: {collections} ") 133 | 134 | if args.nodetype: 135 | client=Mqttclient(collection_name=collections,replyTopic=REPLAY_TOPIC,isAdmin=True) 136 | else: 137 | client=Mqttclient(collection_name=collections,replyTopic=REPLAY_TOPIC,isAdmin=False) 138 | QDRANT_HOST = "http://localhost:6333" # Local Qdrant 139 | from qdrant_client import QdrantClient 140 | 141 | qclient = QdrantClient(url=QDRANT_HOST) 142 | 143 | 144 | ##TODO: 145 | # Need a function to quite,send vector and insert new based key like quit,send,insert (provide datalocation) 146 | from bayesrag.utils import wait_for_commands 147 | while True: 148 | command = wait_for_commands() 149 | if command == 'quit': 150 | break 151 | elif command == 'send': 152 | scroll_result=qclient.scroll(collection_name=collections,with_vectors=True) 153 | client.send_vector(scroll_result) 154 | elif command.startswith('insert '): 155 | data_location = command.split(' ', 1)[1] 156 | client.insert_new_data(data_location) 157 | 158 | client.stop() 159 | logger.info("MQTT client stopped.") 160 | 161 | -------------------------------------------------------------------------------- /src/bayesrag/retriever.py: -------------------------------------------------------------------------------- 1 | from bayesrag.embedder import get_embedding 2 | from bayesrag.vector_db import VectorDB 3 | # from bayesrag.config import QDRANT_COLLECTION 4 | # from bayesrag.constant import SEND_TOPIC 5 | import time 6 | import queue 7 | import uuid 8 | from loguru import logger 9 | 10 | 11 | def get_context(query): 12 | qclient=VectorDB() 13 | 14 | query_embedding = get_embedding(query) 15 | results,score = qclient.search_vector(query_embedding) 16 | 17 | logger.debug(f"Score: {score}") 18 | if score ==None: 19 | logger.info("No results found") 20 | return 21 | if score > 0.60: 22 | logger.info(f"Found in local VectorDb as score is higher that 60% .Score {score}") 23 | return results 24 | 25 | else: 26 | logger.debug("No relevant context found in local VectorDB,Ask LLM instead.") 27 | return None # TODO: return appropriate message or None for no relevant context found in local VectorDB 28 | 29 | 30 | # return qclient.search_vector(query_embedding) 31 | 32 | 33 | # def get_Relavant_Context_from_network(query,client,REPLAY_TOPIC,collection_name): 34 | 35 | # query_embedding = get_embedding(query) 36 | # qclient=VectorDB(collection_name) 37 | 38 | # results = qclient.search( 39 | # collection_name=collection_name, 40 | # query_vector=query_embedding, 41 | # limit=1, 42 | # ) 43 | 44 | # logger.debug("Score: ", results[0].score) 45 | # if results[0].score > 0.001: 46 | # logger.debug("No relevant context found in local VectorDB, sending query to network") 47 | # data = { 48 | # "replay_topic": REPLAY_TOPIC, 49 | # "query": query, 50 | # } 51 | # # TODO: send request to other node to get relevant information 52 | # client.send_message(SEND_TOPIC, data) 53 | # count=0 54 | # reply=None 55 | # # Check the reply queue for a response 56 | # while count<3: 57 | # try: 58 | # reply = client.reply_queue.get_nowait()# Wait for a reply for 10 seconds 59 | # logger.critical(f"Received reply from another node: {reply}") 60 | # if reply!=None: 61 | # return reply 62 | # # Process the reply as needed 63 | # except queue.Empty: 64 | # logger.warning("No reply received in the last 10 seconds") 65 | # count+=1 66 | # time.sleep(10) 67 | 68 | 69 | # return reply # TODO: return appropriate message or None for no relevant context found in local VectorDB 70 | 71 | # else: 72 | # print("Found in local VectorDb") 73 | # return results[0].payload["data"], results[0].score 74 | 75 | 76 | if __name__ == '__main__': 77 | user_query = input("Enter your query or type 'q' to quit: ") 78 | from bayesrag.mq import Mqttclient 79 | ID=uuid.uuid4() 80 | REPLAY_TOPIC = f"USER_TOPIC-{ID}" 81 | collection_name=f"law_docs_global" 82 | client = Mqttclient(collection_name=collection_name,replyTopic=REPLAY_TOPIC) 83 | while user_query.lower() != "q": 84 | relevant_context = get_context(user_query,collection_name) 85 | user_query = input("\nEnter your query or type 'q' to quit: ") 86 | 87 | -------------------------------------------------------------------------------- /src/bayesrag/text_splitter.py: -------------------------------------------------------------------------------- 1 | from langchain.text_splitter import RecursiveCharacterTextSplitter 2 | 3 | def split_texts(documents): 4 | text_splitter = RecursiveCharacterTextSplitter( 5 | chunk_size=2000, 6 | chunk_overlap=100, 7 | length_function=len, 8 | is_separator_regex=False, 9 | ) 10 | texts = text_splitter.split_documents(documents) 11 | return texts 12 | -------------------------------------------------------------------------------- /src/bayesrag/utils.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from loguru import logger 3 | class ClassificationResult(Enum): 4 | """ 5 | a ClassificationResult enum. 6 | 7 | Returns: 8 | ClassificationResult.YES if the query is classified as a law-related question, 9 | ClassificationResult.NO if it's not a law-related question, 10 | ClassificationResult.ERROR if there's an error parsing the response. 11 | """ 12 | 13 | YES = "yes" 14 | NO = "no" 15 | ERROR = "error" 16 | 17 | def display_commands(): 18 | logger.info("Available commands:") 19 | print("1. 'quit' - Exit the application ") 20 | print("2. 'query' - Query from vector with LLM") 21 | print("3. 'send' - Send vector data") 22 | print("4. 'insert ' - Insert new data from the specified location") 23 | 24 | def wait_for_commands(): 25 | 26 | 27 | while True: 28 | display_commands() 29 | command = input("\nEnter your command: ").strip().lower() 30 | 31 | if command == 'quit': 32 | return 'quit' 33 | elif command == 'query': 34 | return 'query' 35 | elif command == 'send': 36 | return 'send' 37 | elif command.startswith('insert '): 38 | return command 39 | else: 40 | logger.warning("Invalid command. Please enter a valid command.") -------------------------------------------------------------------------------- /src/bayesrag/vector_db.py: -------------------------------------------------------------------------------- 1 | from qdrant_client import QdrantClient, models 2 | from bayesrag.embedder import get_embedding 3 | from loguru import logger 4 | from bayesrag.config import QDRANT_HOST,QDRANT_COLLECTION 5 | class VectorDB: 6 | def __init__(self, collection_name=QDRANT_COLLECTION, qdrant_host=QDRANT_HOST): 7 | self.collection_name = collection_name 8 | self.qclient = QdrantClient(url=qdrant_host) 9 | 10 | def create_db(self): 11 | if self.qclient.collection_exists(collection_name=self.collection_name): 12 | logger.debug(f"Vector DB already exists: {self.collection_name}") 13 | else: 14 | self.qclient.create_collection( 15 | collection_name=self.collection_name, 16 | vectors_config=models.VectorParams(size=768, distance=models.Distance.COSINE), 17 | ) 18 | logger.info(f"Vector DB successfully created: {self.collection_name}") 19 | 20 | def search_vector(self, query_embedding, limit=1): 21 | results = self.qclient.search( 22 | collection_name=self.collection_name, 23 | query_vector=query_embedding, 24 | limit=limit, 25 | ) 26 | if results: 27 | top_result = results[0] 28 | return top_result.payload["data"], top_result.score 29 | else: 30 | logger.warning("No results found") 31 | return None, None 32 | 33 | def upsert_embeddings(self, chunks): 34 | logger.info("Upserting embeddings into Vector DB...") 35 | for i, chunk in enumerate(chunks): 36 | embedding = get_embedding(chunk.page_content) 37 | self.qclient.upsert( 38 | collection_name=self.collection_name, 39 | points=[ 40 | models.PointStruct( 41 | id=i, vector=embedding, payload={"data": chunk.page_content, "metadata": chunk.metadata} 42 | ), 43 | ], 44 | ) 45 | logger.info("Embeddings created successfully") 46 | 47 | def merge_embeddings(self, source_points): 48 | for point in source_points: 49 | self.qclient.upsert( 50 | collection_name=self.collection_name, 51 | points=[ 52 | models.PointStruct( 53 | id=point.id, 54 | vector=point.vector, 55 | payload=point.payload 56 | ), 57 | ], 58 | ) 59 | logger.info("Merging embeddings successfully") 60 | --------------------------------------------------------------------------------