├── .gitignore
├── imgs
├── qdurllm.png
├── tutorial1.png
├── tutorial2.png
└── tutorial3.png
├── app
├── __pycache__
│ ├── rag.cpython-312.pyc
│ ├── loadUrls.cpython-312.pyc
│ └── texInference.cpython-312.pyc
├── loadUrls.py
├── texInference.py
├── app.py
└── rag.py
├── compose.yaml
├── docs
├── about.md
├── contributing.md
└── index.md
├── mkdocs.yaml
├── LICENSE
├── CONTRIBUTING.md
├── README.md
└── environment.yml
/.gitignore:
--------------------------------------------------------------------------------
1 | */.cache
2 | application/flagged
3 | qdrant_storage/
--------------------------------------------------------------------------------
/imgs/qdurllm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AstraBert/qdurllm/HEAD/imgs/qdurllm.png
--------------------------------------------------------------------------------
/imgs/tutorial1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AstraBert/qdurllm/HEAD/imgs/tutorial1.png
--------------------------------------------------------------------------------
/imgs/tutorial2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AstraBert/qdurllm/HEAD/imgs/tutorial2.png
--------------------------------------------------------------------------------
/imgs/tutorial3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AstraBert/qdurllm/HEAD/imgs/tutorial3.png
--------------------------------------------------------------------------------
/app/__pycache__/rag.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AstraBert/qdurllm/HEAD/app/__pycache__/rag.cpython-312.pyc
--------------------------------------------------------------------------------
/app/__pycache__/loadUrls.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AstraBert/qdurllm/HEAD/app/__pycache__/loadUrls.cpython-312.pyc
--------------------------------------------------------------------------------
/app/__pycache__/texInference.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AstraBert/qdurllm/HEAD/app/__pycache__/texInference.cpython-312.pyc
--------------------------------------------------------------------------------
/compose.yaml:
--------------------------------------------------------------------------------
1 | networks:
2 | mynet:
3 | driver: bridge
4 |
5 | services:
6 | qdrant:
7 | image: qdrant/qdrant
8 | ports:
9 | - "6333:6333"
10 | volumes:
11 | - "./qdrant_storage:/qdrant/storage"
12 | networks:
13 | - mynet
14 |
15 |
--------------------------------------------------------------------------------
/docs/about.md:
--------------------------------------------------------------------------------
1 | # About `qdurllm`
2 |
3 | **`qdurllm`** is an open source local Gradio application that leverages:
4 |
5 | - semantic caching
6 | - sparse retrieval + reranking
7 | - LLM inference
8 |
9 | To allow you to upload website pages to a Qdrant database, search them and chat with them.
10 |
11 | > _Powered by [Qdrant](https://qdrant.tech), [FastEmbed](https://pypi.org/project/fastembed/), [SentenceTransformers](https://sbert.net/), [Nomic AI](https://www.nomic.ai/), [HuggingFace](https://huggingface.co), [Gradio](https://gradio.app), [Unstructured.io](https://unstructured.io/) and [LangChain](langchain.com)_
12 |
13 |
--------------------------------------------------------------------------------
/mkdocs.yaml:
--------------------------------------------------------------------------------
1 | site_name: qdurllm
2 | theme:
3 | name: material
4 | features:
5 | - navigation.instant
6 | - navigation.tracking
7 | - navigation.tabs
8 | - navigation.sections
9 | - navigation.expand
10 | - search.highlight
11 | - search.share
12 | palette:
13 | scheme: default
14 | primary: indigo
15 | accent: indigo
16 | plugins:
17 | - search
18 | - awesome-pages
19 | markdown_extensions:
20 | - pymdownx.highlight
21 | - pymdownx.superfences
22 | - pymdownx.inlinehilite
23 | - pymdownx.snippets
24 | - pymdownx.tasklist
25 | - admonition
26 | - toc
27 | - attr_list
28 | nav:
29 | - Home: index.md
30 | - About: about.md
31 | - Contributing: contributing.md
--------------------------------------------------------------------------------
/app/loadUrls.py:
--------------------------------------------------------------------------------
1 | from langchain_community.document_loaders.url import UnstructuredURLLoader
2 | from langchain.text_splitter import CharacterTextSplitter
3 | from rag import upload_text_to_qdrant, client
4 | from typing import List, Dict
5 |
6 | def urlload(urls: str) -> List[Dict[str,str]]:
7 | links = urls.split(",")
8 | try:
9 | loader = UnstructuredURLLoader(
10 | urls=links, method="elements",
11 | strategy="fast"
12 | )
13 | docs = loader.load()
14 | text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
15 | pages = text_splitter.split_documents(docs)
16 | contents = [{"text": pages[i].page_content, "url": pages[i].metadata["source"]} for i in range(len(pages))]
17 | return contents
18 | except Exception as e:
19 | return f"An error occurred while parsing the URLs: {e}"
20 |
21 |
22 | def to_db(contents = List[Dict[str, str]]) -> None:
23 | c = 0
24 | for content in contents:
25 | upload_text_to_qdrant(client, "memory", content, c)
26 | c+=1
27 | return
28 |
29 |
30 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 Clelia (Astra) Bertelli
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/app/texInference.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | warnings.filterwarnings('ignore')
3 | import torch
4 | from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
5 | from trl import setup_chat_format
6 | from typing import Dict, List
7 |
8 | model_name = 'Qwen/Qwen2.5-1.5B-Instruct'
9 | quantization_config = BitsAndBytesConfig(load_in_4bit = True, bnb_4bit_compute_dtype = torch.bfloat16, bnb_4bit_use_double_quant = True, bnb_4bit_quant_type = 'nf4')
10 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
11 | quantized_model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype = torch.bfloat16, quantization_config = quantization_config).to(device)
12 | tokenizer = AutoTokenizer.from_pretrained(model_name)
13 | tokenizer.chat_template = None
14 | (quantized_model, tokenizer) = setup_chat_format(model = quantized_model, tokenizer = tokenizer)
15 |
16 | def pipe(prompt: List[Dict[(str, str)]], temperature: float, top_p: float, max_new_tokens: int, repetition_penalty: float) -> str:
17 | tokenized_chat = tokenizer.apply_chat_template(prompt, tokenize = True, add_generation_prompt = True, return_tensors = 'pt').to(device)
18 | outputs = quantized_model.generate(tokenized_chat, max_new_tokens = max_new_tokens, temperature = temperature, top_p = top_p, repetition_penalty = repetition_penalty).to(device)
19 | results = tokenizer.decode(outputs[0])
20 | return results
21 |
22 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to `conda`
2 |
3 | Do you want to contribute to this project? Make sure to read this guidelines first :)
4 |
5 | ## Issue
6 |
7 | **When to do it**:
8 |
9 | - You found bugs but you don't know how to solve them or don't have time/will to do the solve
10 | - You want new features but you don't know how to implement them or don't have time/will to do the implementation
11 |
12 | > ⚠️ _Always check open and closed issues before you submit yours to avoid duplicates_
13 |
14 | **How to do it**:
15 |
16 | - Open an issue
17 | - Give the issue a meaningful title (short but effective problem description)
18 | - Describe the problem following the issue template
19 |
20 | ## Traditional contribution
21 |
22 | **When to do it**:
23 |
24 | - You found bugs and corrected them
25 | - You optimized/improved the code
26 | - You added new features that you think could be useful to others
27 |
28 | **How to do it**:
29 |
30 | 1. Fork this repository
31 | 2. Commit your changes
32 | 3. Submit pull request (make sure to provide a thorough description of the changes)
33 |
34 |
35 | ## Showcase your PrAIvateSearch
36 |
37 | **When to do it**:
38 |
39 | - You modified the base application with new features but you don't want/can't merge them with the original PrAIvateSearch
40 |
41 | **How to do it**:
42 |
43 | - Go to [_GitHub Discussions > Show and tell_](https://github.com/AstraBert/PrAIvateSearch/discussions/categories/show-and-tell) page
44 | - Open a new discussion there, describing your PrAIvateSearch application
45 |
46 | ### Thanks for contributing!
--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
1 | # Contributing to `qdurllm`
2 |
3 | Do you want to contribute to this project? Make sure to read this guidelines first :)
4 |
5 | ## Issue
6 |
7 | **When to do it**:
8 |
9 | - You found bugs but you don't know how to solve them or don't have time/will to do the solve
10 | - You want new features but you don't know how to implement them or don't have time/will to do the implementation
11 |
12 | > ⚠️ _Always check open and closed issues before you submit yours to avoid duplicates_
13 |
14 | **How to do it**:
15 |
16 | - Open an issue
17 | - Give the issue a meaningful title (short but effective problem description)
18 | - Describe the problem following the issue template
19 |
20 | ## Traditional contribution
21 |
22 | **When to do it**:
23 |
24 | - You found bugs and corrected them
25 | - You optimized/improved the code
26 | - You added new features that you think could be useful to others
27 |
28 | **How to do it**:
29 |
30 | 1. Fork this repository
31 | 2. Commit your changes
32 | 3. Submit pull request (make sure to provide a thorough description of the changes)
33 |
34 |
35 | ## Showcase your PrAIvateSearch
36 |
37 | **When to do it**:
38 |
39 | - You modified the base application with new features but you don't want/can't merge them with the original PrAIvateSearch
40 |
41 | **How to do it**:
42 |
43 | - Go to [_GitHub Discussions > Show and tell_](https://github.com/AstraBert/PrAIvateSearch/discussions/categories/show-and-tell) page
44 | - Open a new discussion there, describing your PrAIvateSearch application
45 |
46 | ### Thanks for contributing!
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
qdurllm
2 | Search your favorite websites and chat with them, on your desktop🌐
3 |
4 | # Docs in active development!👷♀️
5 |
6 | They will be soon available on: https://astrabert.github.io/qdurllm/
7 |
8 | In the meantime, refer to the **Quickstart guide** in this README!
9 |
10 | ## Quickstart
11 |
12 | ### 1. Prerequisites
13 |
14 | - [`conda`](https://docs.conda.io/projects/conda/en/latest/user-guide/getting-started.html) package manager
15 | - [`docker`](https://www.docker.com/) and [`docker compose`](https://docs.docker.com/compose/).
16 |
17 | ### 2. Installation
18 |
19 | > [!IMPORTANT]
20 | > _This is only for the pre-release of `v1.0.0`, i.e. `v1.0.0-rc.0`_
21 |
22 | 1. Clone the `january-2025` branch of this GitHub repo:
23 |
24 | ```bash
25 | git clone -b january-2025 --single-branch https://github.com/AstraBert/qdurllm.git
26 | cd qdurllm/
27 | ```
28 |
29 | 2. Create the `conda` environment:
30 |
31 | ```bash
32 | conda env create -f environment.yml
33 | ```
34 |
35 | 3. Pull `qdrant` from Docker Hub:
36 |
37 | ```bash
38 | docker pull qdrant/qdrant
39 | ```
40 |
41 | ### 3. Launching
42 |
43 | 1. Launch `qdrant` vector database services with `docker compose` (from within the `qdurllm` folder):
44 |
45 | ```bash
46 | docker compose up
47 | ```
48 |
49 | 2. Activate the `qdurllm` conda environment you just created:
50 |
51 | ```bash
52 | conda activate qdurllm
53 | ```
54 |
55 | 3. Go inside the `app` directory and launch the Gradio application:
56 |
57 | ```bash
58 | cd app/
59 | python3 app.py
60 | ```
61 |
62 | You should see the app running on `http://localhost:7860` once all the models are downloaded from HuggingFace Hub.
63 |
64 | ## Relies on
65 |
66 | - [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct), with Apache 2.0 license
67 | - [nomic-ai/modernbert-embed-base](https://huggingface.co/nomic-ai/modernbert-embed-base), with Apache 2.0 license
68 | - [prithivida/Splade_PP_en_v1](https://huggingface.co/prithivida/Splade_PP_en_v1), with Apache 2.0 license
69 |
70 |
71 | ## Give feedback!
72 |
73 | Comment on the [**discussion thread created for this release**](https://github.com/AstraBert/qdurllm/discussions) with your feedback or create [**issues**](https://github.com/AstraBert/qdurllm/issues) :)
74 |
75 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | qdurllm
2 | Search your favorite websites and chat with them, on your desktop🌐
3 |
4 | # Docs in active development!👷♀️
5 |
6 | They will be soon available on: https://astrabert.github.io/qdurllm/
7 |
8 | In the meantime, refer to the **Quickstart guide** in this README!
9 |
10 | ## Quickstart
11 |
12 | ### 1. Prerequisites
13 |
14 | - [`conda`](https://docs.conda.io/projects/conda/en/latest/user-guide/getting-started.html) package manager
15 | - [`docker`](https://www.docker.com/) and [`docker compose`](https://docs.docker.com/compose/).
16 |
17 | ### 2. Installation
18 |
19 | > [!IMPORTANT]
20 | > _This is only for the pre-release of `v1.0.0`, i.e. `v1.0.0-rc.0`_
21 |
22 | 1. Clone the `january-2025` branch of this GitHub repo:
23 |
24 | ```bash
25 | git clone -b january-2025 --single-branch https://github.com/AstraBert/qdurllm.git
26 | cd qdurllm/
27 | ```
28 |
29 | 2. Create the `conda` environment:
30 |
31 | ```bash
32 | conda env create -f environment.yml
33 | ```
34 |
35 | 3. Pull `qdrant` from Docker Hub:
36 |
37 | ```bash
38 | docker pull qdrant/qdrant
39 | ```
40 |
41 | ### 3. Launching
42 |
43 | 1. Launch `qdrant` vector database services with `docker compose` (from within the `qdurllm` folder):
44 |
45 | ```bash
46 | docker compose up
47 | ```
48 |
49 | 2. Activate the `qdurllm` conda environment you just created:
50 |
51 | ```bash
52 | conda activate qdurllm
53 | ```
54 |
55 | 3. Go inside the `app` directory and launch the Gradio application:
56 |
57 | ```bash
58 | cd app/
59 | python3 app.py
60 | ```
61 |
62 | You should see the app running on `http://localhost:7860` once all the models are downloaded from HuggingFace Hub.
63 |
64 | ## Relies on
65 |
66 | - [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct), with Apache 2.0 license
67 | - [nomic-ai/modernbert-embed-base](https://huggingface.co/nomic-ai/modernbert-embed-base), with Apache 2.0 license
68 | - [prithivida/Splade_PP_en_v1](https://huggingface.co/prithivida/Splade_PP_en_v1), with Apache 2.0 license
69 |
70 |
71 | ## Give feedback!
72 |
73 | Comment on the [**discussion thread created for this release**](https://github.com/AstraBert/qdurllm/discussions) with your feedback or create [**issues**](https://github.com/AstraBert/qdurllm/issues) :)
74 |
75 |
--------------------------------------------------------------------------------
/app/app.py:
--------------------------------------------------------------------------------
1 | from rag import client, SemanticCache, NeuralSearcher, dense_encoder, sparse_encoder
2 | from texInference import pipe
3 | from loadUrls import urlload, to_db
4 | import gradio as gr
5 | import time
6 |
7 |
8 | searcher = NeuralSearcher("memory", client, dense_encoder, sparse_encoder)
9 | semantic_cache = SemanticCache(client, dense_encoder, "semantic_cache")
10 |
11 |
12 | def upload2qdrant(url):
13 | global client
14 | documents = urlload(url)
15 | if type(documents) == list:
16 | try:
17 | to_db(documents)
18 | return "URLs successfully uploaded to Qdrant collection!"
19 | except Exception as e:
20 | return f"An error occured: {e}"
21 | else:
22 | return documents
23 |
24 | demo0 = gr.Interface(fn=upload2qdrant, title="Upload URL content to Qdrant", inputs=gr.Textbox(label="URL(s)", info="Add one URL or more (if more, you should provide them comma-separated, like this: URL1,URL2,...,URLn)"), outputs=gr.Textbox(label="Logs"))
25 |
26 |
27 | def reply(message, history, ntokens, rep_pen, temp, topp, systemins):
28 | sr = semantic_cache.search_cache(message)
29 | if sr:
30 | response = sr
31 | this_hist = ''
32 | for c in response:
33 | this_hist+=c
34 | time.sleep(0.001)
35 | yield this_hist
36 | else:
37 | context, url = searcher.search_text(message)
38 | prompt = [{"role": "system", "content": systemins}, {"role": "user", "content": f"This is the context information to reply to my prompt:\n\n{context}"}, {"role": "user", "content": message}]
39 | results = pipe(prompt, temp, topp, ntokens, rep_pen)
40 | results = results.split("<|im_start|>assistant\n")[1]
41 | response = results.replace("<|im_end|>", "")
42 | semantic_cache.upload_to_cache(message, response)
43 | this_hist = ''
44 | for c in response:
45 | this_hist+=c
46 | time.sleep(0.001)
47 | yield this_hist
48 |
49 | def direct_search(input_text):
50 | context, url = searcher.search_text(input_text)
51 | return context, f"Reference website [here]({url})"
52 |
53 | demo2 = gr.Interface(fn=direct_search, inputs=gr.Textbox(label="Search Query", placeholder="Input your search query here...", ), outputs=[gr.Textbox(label="Retrieved Content"), gr.Markdown(label="URL")], title="Search your URLs")
54 |
55 | user_max_new_tokens = gr.Slider(0, 4096, value=512, label="Max new tokens", info="Select max output tokens (higher number of tokens will result in a longer latency)")
56 | user_max_temperature = gr.Slider(0, 1, value=0.1, step=0.1, label="Temperature", info="Select generation temperature")
57 | user_max_rep_pen = gr.Slider(0, 10, value=1.2, step=0.1, label="Repetition penalty", info="Select repetition penalty")
58 | user_top_p = gr.Slider(0.1, 1, value=1, step=0.1, label="top_p", info="Select top_p for the generation")
59 | system_ins = gr.Textbox(label="System Prompt", info="Insert your system prompt here", value="You are an helpful web searching assistant. You reply based on the contextual information you are provided with and on your knowledge.")
60 | additional_accordion = gr.Accordion(label="Parameters to be set before you start chatting", open=True)
61 | demo1 = gr.ChatInterface(fn=reply, title="Chat with your URLs", additional_inputs=[user_max_new_tokens, user_max_temperature, user_max_rep_pen, user_top_p, system_ins], additional_inputs_accordion=additional_accordion)
62 |
63 | my_theme = gr.themes.Soft(primary_hue=gr.themes.colors.rose, secondary_hue=gr.themes.colors.pink)
64 |
65 | demo = gr.TabbedInterface([demo0, demo1, demo2], ["Upload URLs", "Chat with URLs", "Direct Search"], theme=my_theme)
66 |
67 | if __name__ == "__main__":
68 | demo.launch(server_name="0.0.0.0", server_port=7860)
--------------------------------------------------------------------------------
/app/rag.py:
--------------------------------------------------------------------------------
1 | from qdrant_client import QdrantClient, models
2 | from fastembed import SparseTextEmbedding
3 | from sentence_transformers import SentenceTransformer
4 | import torch
5 | import uuid
6 | from typing import List, Dict
7 |
8 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
9 |
10 | dense_encoder = SentenceTransformer('nomic-ai/modernbert-embed-base').to(device)
11 | sparse_encoder = SparseTextEmbedding(model_name = 'prithivida/Splade_PP_en_v1')
12 |
13 | client = QdrantClient('http://localhost:6333')
14 | client.recreate_collection(
15 | collection_name = 'memory',
16 | vectors_config = {},
17 | sparse_vectors_config = {
18 | 'sparse-text': models.SparseVectorParams(index = models.SparseIndexParams(on_disk = False))
19 | })
20 | client.recreate_collection(
21 | collection_name= 'semantic_cache',
22 | vectors_config = models.VectorParams(size = 768, distance = models.Distance.COSINE)
23 | )
24 |
25 | def reranking(docs: List[str], query: str, dense_encoder: SentenceTransformer):
26 | query = "search_query: " + query
27 | docs = ["search_document: " + doc for doc in docs]
28 | query_vector = dense_encoder.encode(query)
29 | docs_vector = dense_encoder.encode(docs)
30 | similarities = dense_encoder.similarity(docs_vector, query_vector)
31 | sims = [float(sim[0]) for sim in similarities]
32 | text2sims = {docs[i]: sims[i] for i in range(len(sims))}
33 | sorted_items = sorted(text2sims.items(), key=lambda x: x[1], reverse=True)
34 | return sorted_items[0][0]
35 |
36 |
37 | def get_sparse_embedding(text: str, model: SparseTextEmbedding):
38 | embeddings = list(model.embed(text))
39 | vector = {f"sparse-text": models.SparseVector(indices=embeddings[0].indices, values=embeddings[0].values)}
40 | return vector
41 |
42 |
43 | def get_query_sparse_embedding(text: str, model: SparseTextEmbedding):
44 | embeddings = list(model.embed(text))
45 | query_vector = models.NamedSparseVector(
46 | name="sparse-text",
47 | vector=models.SparseVector(
48 | indices=embeddings[0].indices,
49 | values=embeddings[0].values,
50 | ),
51 | )
52 | return query_vector
53 |
54 |
55 | def upload_text_to_qdrant(client: QdrantClient, collection_name: str, docs: dict, point_id_sparse: int):
56 | try:
57 | client.upsert(
58 | collection_name=collection_name,
59 | points=[
60 | models.PointStruct(
61 | id=point_id_sparse,
62 | vector=get_sparse_embedding(docs["text"], sparse_encoder),
63 | payload=docs,
64 | )
65 | ],
66 | )
67 | return True
68 | except Exception as e:
69 | return False
70 |
71 |
72 | class SemanticCache:
73 | def __init__(self, client: QdrantClient, text_encoder: SentenceTransformer, collection_name: str, threshold: float = 0.75):
74 | self.client = client
75 | self.text_encoder = text_encoder
76 | self.collection_name = collection_name
77 | self.threshold = threshold
78 | def upload_to_cache(self, question = str, answer = str):
79 | docs = {"question": question, "answer": answer}
80 | tct = 'search_document: ' + question
81 | point_id = str(uuid.uuid4())
82 | self.client.upsert(
83 | collection_name=self.collection_name,
84 | points=[
85 | models.PointStruct(
86 | id=point_id,
87 | vector=self.text_encoder.encode(tct).tolist(),
88 | payload=docs,
89 | )
90 | ],
91 | )
92 | def search_cache(self, question: str, limit: int = 5):
93 | question = 'search_query: ' + question
94 | vector = self.text_encoder.encode(question).tolist()
95 | search_result = self.client.search(
96 | collection_name=self.collection_name,
97 | query_vector=vector,
98 | query_filter=None,
99 | limit=limit,
100 | )
101 | payloads = [hit.payload["answer"] for hit in search_result if hit.score > self.threshold]
102 | if len(payloads) > 0:
103 | return payloads[0]
104 | else:
105 | return ""
106 |
107 |
108 |
109 | class NeuralSearcher:
110 | def __init__(self, text_collection_name: str, client: QdrantClient, dense_encoder: SentenceTransformer , sparse_encoder: SparseTextEmbedding):
111 | self.text_collection_name = text_collection_name
112 | self.dense_encoder = dense_encoder
113 | self.qdrant_client = client
114 | self.sparse_encoder = sparse_encoder
115 | def search_text(self, text: str, limit: int = 5):
116 | search_result_sparse = self.qdrant_client.search(
117 | collection_name=self.text_collection_name,
118 | query_vector=get_query_sparse_embedding(text, self.sparse_encoder),
119 | query_filter=None,
120 | limit=limit,
121 | )
122 | payloads = [hit.payload["text"] for hit in search_result_sparse]
123 | urls = [hit.payload["url"] for hit in search_result_sparse]
124 | txt2url = {payloads[i]: urls[i] for i in range(len(urls))}
125 | context = reranking(payloads, text, self.dense_encoder)
126 | context = context.replace("search_document: ", "")
127 | return context, txt2url[context]
128 |
129 |
130 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: qdurllm
2 | channels:
3 | - pytorch
4 | - nvidia
5 | - conda-forge
6 | dependencies:
7 | - _libgcc_mutex=0.1=conda_forge
8 | - _openmp_mutex=4.5=2_kmp_llvm
9 | - aiohappyeyeballs=2.4.4=pyhd8ed1ab_1
10 | - aiohttp=3.11.11=py312h178313f_0
11 | - aiosignal=1.3.2=pyhd8ed1ab_0
12 | - annotated-types=0.7.0=pyhd8ed1ab_1
13 | - anyio=4.8.0=pyhd8ed1ab_0
14 | - async-timeout=4.0.3=pyhd8ed1ab_0
15 | - attrs=24.3.0=pyh71513ae_0
16 | - aws-c-auth=0.7.22=h96bc93b_2
17 | - aws-c-cal=0.6.14=h88a6e22_1
18 | - aws-c-common=0.9.19=h4ab18f5_0
19 | - aws-c-compression=0.2.18=h83b837d_6
20 | - aws-c-event-stream=0.4.2=ha47c788_12
21 | - aws-c-http=0.8.1=h29d6fba_17
22 | - aws-c-io=0.14.8=h21d4f22_5
23 | - aws-c-mqtt=0.10.4=h759edc4_4
24 | - aws-c-s3=0.5.9=h594631b_3
25 | - aws-c-sdkutils=0.1.16=h83b837d_2
26 | - aws-checksums=0.1.18=h83b837d_6
27 | - aws-crt-cpp=0.26.9=he3a8b3b_0
28 | - aws-sdk-cpp=1.11.329=hba8bd5f_3
29 | - backoff=2.2.1=pyhd8ed1ab_1
30 | - beautifulsoup4=4.12.3=pyha770c72_1
31 | - blas=2.116=mkl
32 | - blas-devel=3.9.0=16_linux64_mkl
33 | - brotli-python=1.1.0=py312h2ec8cdc_2
34 | - bzip2=1.0.8=h4bc722e_7
35 | - c-ares=1.34.4=hb9d3cd8_0
36 | - ca-certificates=2024.12.14=hbcca054_0
37 | - certifi=2024.2.2=pyhd8ed1ab_0
38 | - cffi=1.17.1=py312h06ac9bb_0
39 | - chardet=5.2.0=py312h7900ff3_2
40 | - charset-normalizer=3.3.2=pyhd8ed1ab_0
41 | - click=8.1.7=unix_pyh707e725_1
42 | - colorama=0.4.6=pyhd8ed1ab_1
43 | - cryptography=44.0.0=py312hda17c39_0
44 | - cuda-cudart=12.4.127=0
45 | - cuda-cupti=12.4.127=0
46 | - cuda-libraries=12.4.1=0
47 | - cuda-nvrtc=12.4.127=0
48 | - cuda-nvtx=12.4.127=0
49 | - cuda-opencl=12.6.77=0
50 | - cuda-runtime=12.4.1=0
51 | - cuda-version=12.6=3
52 | - dataclasses-json=0.6.7=pyhd8ed1ab_1
53 | - datasets=3.2.0=pyhd8ed1ab_0
54 | - deepdiff=8.1.1=pyhd8ed1ab_0
55 | - dill=0.3.8=pyhd8ed1ab_0
56 | - emoji=2.14.0=pyhd8ed1ab_1
57 | - eval-type-backport=0.2.2=pyhd8ed1ab_0
58 | - eval_type_backport=0.2.2=pyha770c72_0
59 | - exceptiongroup=1.2.2=pyhd8ed1ab_1
60 | - ffmpeg=4.3=hf484d3e_0
61 | - filelock=3.16.1=pyhd8ed1ab_1
62 | - filetype=1.2.0=pyhd8ed1ab_0
63 | - freetype=2.12.1=h267a509_2
64 | - frozenlist=1.5.0=py312h66e93f0_0
65 | - fsspec=2024.9.0=pyhff2d567_0
66 | - gflags=2.2.2=h5888daf_1005
67 | - giflib=5.2.2=hd590300_0
68 | - glog=0.7.1=hbabe93e_0
69 | - gmp=6.3.0=hac33072_2
70 | - gmpy2=2.1.5=py312h7201bc8_3
71 | - gnutls=3.6.13=h85f3911_1
72 | - greenlet=3.1.1=py312h2ec8cdc_1
73 | - h11=0.14.0=pyhd8ed1ab_1
74 | - h2=4.1.0=pyhd8ed1ab_1
75 | - hpack=4.0.0=pyhd8ed1ab_1
76 | - html5lib=1.1=pyhd8ed1ab_2
77 | - httpcore=1.0.7=pyh29332c3_1
78 | - httpx=0.28.1=pyhd8ed1ab_0
79 | - huggingface_hub=0.27.1=pyhd8ed1ab_0
80 | - hyperframe=6.0.1=pyhd8ed1ab_1
81 | - icu=73.2=h59595ed_0
82 | - idna=3.6=pyhd8ed1ab_0
83 | - jinja2=3.1.5=pyhd8ed1ab_0
84 | - joblib=1.3.2=pyhd8ed1ab_0
85 | - jsonpatch=1.33=pyhd8ed1ab_1
86 | - jsonpath-python=1.0.6=pyhff2d567_2
87 | - jsonpointer=3.0.0=py312h7900ff3_1
88 | - keyutils=1.6.1=h166bdaf_0
89 | - krb5=1.21.3=h659f571_0
90 | - lame=3.100=h166bdaf_1003
91 | - langchain=0.3.14=pyhd8ed1ab_0
92 | - langchain-core=0.3.29=pyhd8ed1ab_0
93 | - langchain-text-splitters=0.3.5=pyhd8ed1ab_0
94 | - langdetect=1.0.9=pyhd8ed1ab_1
95 | - langsmith=0.2.10=pyhd8ed1ab_0
96 | - lcms2=2.16=hb7c19ff_0
97 | - ld_impl_linux-64=2.43=h712a8e2_2
98 | - lerc=4.0.0=h27087fc_0
99 | - libabseil=20240116.2=cxx17_he02047a_1
100 | - libarrow=16.1.0=hcb6531f_6_cpu
101 | - libarrow-acero=16.1.0=hac33072_6_cpu
102 | - libarrow-dataset=16.1.0=hac33072_6_cpu
103 | - libarrow-substrait=16.1.0=h7e0c224_6_cpu
104 | - libblas=3.9.0=16_linux64_mkl
105 | - libbrotlicommon=1.1.0=hb9d3cd8_2
106 | - libbrotlidec=1.1.0=hb9d3cd8_2
107 | - libbrotlienc=1.1.0=hb9d3cd8_2
108 | - libcblas=3.9.0=16_linux64_mkl
109 | - libcrc32c=1.1.2=h9c3ff4c_0
110 | - libcublas=12.4.5.8=0
111 | - libcufft=11.2.1.3=0
112 | - libcufile=1.11.1.6=0
113 | - libcurand=10.3.7.77=0
114 | - libcurl=8.8.0=hca28451_1
115 | - libcusolver=11.6.1.9=0
116 | - libcusparse=12.3.1.170=0
117 | - libdeflate=1.20=hd590300_0
118 | - libedit=3.1.20240808=pl5321h7949ede_0
119 | - libev=4.33=hd590300_2
120 | - libevent=2.1.12=hf998b51_1
121 | - libexpat=2.6.4=h5888daf_0
122 | - libffi=3.4.2=h7f98852_5
123 | - libgcc=14.2.0=h77fa898_1
124 | - libgcc-ng=14.2.0=h69a702a_1
125 | - libgfortran=14.2.0=h69a702a_1
126 | - libgfortran-ng=14.2.0=h69a702a_1
127 | - libgfortran5=14.2.0=hd5240d6_1
128 | - libgoogle-cloud=2.24.0=h2736e30_0
129 | - libgoogle-cloud-storage=2.24.0=h3d9a0c8_0
130 | - libgrpc=1.62.2=h15f2491_0
131 | - libhwloc=2.11.2=default_he43201b_1000
132 | - libiconv=1.17=hd590300_2
133 | - libjpeg-turbo=3.0.0=hd590300_1
134 | - liblapack=3.9.0=16_linux64_mkl
135 | - liblapacke=3.9.0=16_linux64_mkl
136 | - liblzma=5.6.3=hb9d3cd8_1
137 | - liblzma-devel=5.6.3=hb9d3cd8_1
138 | - libmagic=5.39=h753d276_1
139 | - libnghttp2=1.58.0=h47da74e_1
140 | - libnpp=12.2.5.30=0
141 | - libnsl=2.0.1=hd590300_0
142 | - libnvfatbin=12.6.77=0
143 | - libnvjitlink=12.4.127=0
144 | - libnvjpeg=12.3.1.117=0
145 | - libparquet=16.1.0=h6a7eafb_6_cpu
146 | - libpng=1.6.43=h2797004_0
147 | - libprotobuf=4.25.3=h08a7969_0
148 | - libre2-11=2023.09.01=h5a48ba9_2
149 | - libsqlite=3.46.0=hde9e2c9_0
150 | - libssh2=1.11.0=h0841786_0
151 | - libstdcxx=14.2.0=hc0a3c3a_1
152 | - libstdcxx-ng=14.2.0=h4852527_1
153 | - libthrift=0.19.0=hb90f79a_1
154 | - libtiff=4.6.0=h1dd3fc0_3
155 | - libutf8proc=2.8.0=hf23e847_1
156 | - libuuid=2.38.1=h0b41bf4_0
157 | - libwebp=1.4.0=h2c329e2_0
158 | - libwebp-base=1.4.0=hd590300_0
159 | - libxcb=1.15=h0b41bf4_0
160 | - libxcrypt=4.4.36=hd590300_1
161 | - libxml2=2.12.7=hc051c1a_1
162 | - libxslt=1.1.39=h76b75d6_0
163 | - libzlib=1.2.13=h4ab18f5_6
164 | - llvm-openmp=15.0.7=h0cdce71_0
165 | - lxml=5.2.2=py312hb90d8a5_0
166 | - lz4-c=1.9.4=hcb278e6_0
167 | - marshmallow=3.20.2=pyhd8ed1ab_0
168 | - mkl=2022.1.0=h84fe81f_915
169 | - mkl-devel=2022.1.0=ha770c72_916
170 | - mkl-include=2022.1.0=h84fe81f_915
171 | - mpc=1.3.1=h24ddda3_1
172 | - mpfr=4.2.1=h90cbb55_3
173 | - mpmath=1.3.0=pyhd8ed1ab_1
174 | - multidict=6.1.0=py312h178313f_2
175 | - multiprocess=0.70.16=py312h66e93f0_1
176 | - mypy_extensions=1.0.0=pyha770c72_1
177 | - ncurses=6.5=h2d0b736_2
178 | - ndjson=0.3.1=pyhd8ed1ab_0
179 | - nest-asyncio=1.6.0=pyhd8ed1ab_1
180 | - nettle=3.6=he412f7d_0
181 | - networkx=3.4.2=pyh267e887_2
182 | - nltk=3.9.1=pyhd8ed1ab_1
183 | - numpy=1.26.4=py312heda63a1_0
184 | - olefile=0.47=pyhd8ed1ab_1
185 | - openh264=2.1.1=h780b84a_0
186 | - openjpeg=2.5.2=h488ebb8_0
187 | - openssl=3.4.0=h7b32b05_1
188 | - orc=2.0.1=h17fec99_1
189 | - orderly-set=5.2.3=pyh29332c3_1
190 | - orjson=3.10.14=py312h12e396e_0
191 | - packaging=23.2=pyhd8ed1ab_0
192 | - pandas=2.2.3=py312hf9745cd_1
193 | - pillow=10.3.0=py312hdcec9eb_0
194 | - pip=24.3.1=pyh8b19718_2
195 | - propcache=0.2.1=py312h66e93f0_0
196 | - psutil=6.1.1=py312h66e93f0_0
197 | - pthread-stubs=0.4=hb9d3cd8_1002
198 | - pyarrow=16.1.0=py312h8da182e_1
199 | - pyarrow-core=16.1.0=py312h5429d62_1_cpu
200 | - pycparser=2.22=pyh29332c3_1
201 | - pydantic=2.9.2=pyhd8ed1ab_0
202 | - pydantic-core=2.23.4=py312h12e396e_0
203 | - pypdf=5.1.0=pyha770c72_1
204 | - pysocks=1.7.1=pyha55dd90_7
205 | - python=3.12.3=hab00c5b_0_cpython
206 | - python-dateutil=2.8.2=pyhd8ed1ab_0
207 | - python-iso639=2024.10.22=pyhff2d567_1
208 | - python-magic=0.4.27=pyh9ac5cc3_5
209 | - python-oxmsg=0.0.1=pyhff2d567_1
210 | - python-tzdata=2024.2=pyhd8ed1ab_1
211 | - python-xxhash=3.5.0=py312h66e93f0_1
212 | - python_abi=3.12=5_cp312
213 | - pytorch=2.5.1=py3.12_cuda12.4_cudnn9.1.0_0
214 | - pytorch-cuda=12.4=hc786d27_7
215 | - pytorch-mutex=1.0=cuda
216 | - pytz=2024.1=pyhd8ed1ab_0
217 | - pyyaml=6.0.2=py312h66e93f0_1
218 | - rapidfuzz=3.11.0=py312h2ec8cdc_0
219 | - re2=2023.09.01=h7f4b329_2
220 | - readline=8.2=h8228510_1
221 | - regex=2023.12.25=py312h98912ed_0
222 | - requests=2.32.3=pyhd8ed1ab_1
223 | - requests-toolbelt=1.0.0=pyhd8ed1ab_1
224 | - s2n=1.4.15=he19d79f_0
225 | - safetensors=0.5.2=py312h12e396e_0
226 | - scikit-learn=1.6.1=py312h7a48858_0
227 | - scipy=1.15.1=py312h180e4f1_0
228 | - sentence-transformers=3.3.1=pyhd8ed1ab_1
229 | - setuptools=75.8.0=pyhff2d567_0
230 | - six=1.16.0=pyhd8ed1ab_1
231 | - snappy=1.2.1=h8bd8927_1
232 | - sniffio=1.3.1=pyhd8ed1ab_1
233 | - soupsieve=2.5=pyhd8ed1ab_1
234 | - sqlalchemy=2.0.37=py312h66e93f0_0
235 | - tabulate=0.9.0=pyhd8ed1ab_2
236 | - tbb=2021.13.0=hceb3a55_1
237 | - tenacity=9.0.0=pyhd8ed1ab_1
238 | - threadpoolctl=3.5.0=pyhc1e730c_0
239 | - tk=8.6.13=noxft_h4845f30_101
240 | - tokenizers=0.21.0=py312h8360d73_0
241 | - torchaudio=2.5.1=py312_cu124
242 | - torchtriton=3.1.0=py312
243 | - torchvision=0.20.1=py312_cu124
244 | - tqdm=4.67.1=pyhd8ed1ab_1
245 | - transformers=4.48.0=pyhd8ed1ab_0
246 | - typing-extensions=4.12.2=hd8ed1ab_1
247 | - typing_extensions=4.12.2=pyha770c72_1
248 | - typing_inspect=0.9.0=pyhd8ed1ab_1
249 | - tzdata=2024b=hc8b5060_0
250 | - unstructured=0.16.12=pyhd8ed1ab_0
251 | - unstructured-client=0.28.1=pyhd8ed1ab_1
252 | - urllib3=1.26.18=pyhd8ed1ab_0
253 | - webencodings=0.5.1=pyhd8ed1ab_3
254 | - wheel=0.45.1=pyhd8ed1ab_1
255 | - wrapt=1.17.1=py312h66e93f0_0
256 | - xorg-libxau=1.0.12=hb9d3cd8_0
257 | - xorg-libxdmcp=1.1.5=hb9d3cd8_0
258 | - xxhash=0.8.2=hd590300_0
259 | - xz=5.6.3=hbcc6ac9_1
260 | - xz-gpl-tools=5.6.3=hbcc6ac9_1
261 | - xz-tools=5.6.3=hb9d3cd8_1
262 | - yaml=0.2.5=h7f98852_2
263 | - yarl=1.18.3=py312h66e93f0_0
264 | - zlib=1.2.13=h4ab18f5_6
265 | - zstandard=0.23.0=py312hef9b889_1
266 | - zstd=1.5.6=ha6fb4c9_0
267 | - pip:
268 | - accelerate==1.2.1
269 | - aiofiles==23.2.1
270 | - bitsandbytes==0.45.0
271 | - coloredlogs==15.0.1
272 | - fastapi==0.115.6
273 | - fastembed==0.5.0
274 | - ffmpy==0.5.0
275 | - flatbuffers==24.12.23
276 | - gradio==5.12.0
277 | - gradio-client==1.5.4
278 | - grpcio==1.69.0
279 | - grpcio-tools==1.69.0
280 | - httpx-sse==0.4.0
281 | - humanfriendly==10.0
282 | - langchain-community==0.3.14
283 | - loguru==0.7.3
284 | - markdown-it-py==3.0.0
285 | - markupsafe==2.1.5
286 | - mdurl==0.1.2
287 | - mmh3==4.1.0
288 | - onnx==1.17.0
289 | - onnxruntime==1.20.1
290 | - portalocker==2.10.1
291 | - protobuf==5.29.3
292 | - py-rust-stemmers==0.1.3
293 | - pydantic-settings==2.7.1
294 | - pydub==0.25.1
295 | - pygments==2.19.1
296 | - python-dotenv==1.0.1
297 | - python-multipart==0.0.20
298 | - qdrant-client==1.12.2
299 | - rich==13.9.4
300 | - ruff==0.9.1
301 | - safehttpx==0.1.6
302 | - semantic-version==2.10.0
303 | - shellingham==1.5.4
304 | - starlette==0.41.3
305 | - sympy==1.13.1
306 | - tomlkit==0.13.2
307 | - trl==0.13.0
308 | - typer==0.15.1
309 | - uvicorn==0.34.0
310 | - websockets==14.1
311 |
--------------------------------------------------------------------------------