├── .gitignore
├── ChipNeMo.pdf
├── Dockerfile
├── README.md
├── docker-compose.yml
├── indexing.py
├── rag.py
├── rag.yaml
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | .env


--------------------------------------------------------------------------------
/ChipNeMo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepset-ai/rag-with-nvidia-nims/04c871c14173f9b5d496bf576b01a869f71c53f8/ChipNeMo.pdf


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM deepset/hayhooks:v0.0.15
2 | 
3 | EXPOSE 1416
4 | 
5 | RUN pip install pypdf qdrant-haystack nvidia-haystack
6 | 
7 | CMD ["hayhooks", "run", "--host", "0.0.0.0"]
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Build Air-Gapped RAG with Nvidia NIMs and Haystack
 2 | 
 3 | 📚 This repository is accompanied by our article ["Building RAG Applications with NVIDIA NIM and Haystack on K8s"](https://haystack.deepset.ai/blog/haystack-nvidia-nim-rag-guide)
 4 | 
 5 | > Info: This repo is set up to use models hosted and accessible via https://build.nvidia.com/ 
 6 | >
 7 | > These models are already available and you can use them by creating yourself API keys through the platform.
 8 | > The project is set up so that you can change these models to NIM deployments by setting the `model` name and `api_url` in the `NvidiaGenerator`, `NvidiaDocumentEmbedder` and `NvidiaTextEmbedder` components.
 9 | > 
10 | > 👩🏻‍🍳 We also provide a notebook on Haystack Cookbooks that provide the same code and setup, only expecting self-hosted NIMs
11 | > 
12 | > <a href="https://colab.research.google.com/github/deepset-ai/haystack-cookbook/blob/main/notebooks/rag-with-nims.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
13 | 
14 | ## Run with Docker
15 | 
16 | 1. `pip install -r requirements.txt`
17 | 2. Create a `.env` file and add `NVIDIA_API_KEY` (if you're using hosted models via https://build.nvidia.com/)
18 | 3. `docker-compose up`
19 | 6. `hayhooks deploy rag.yaml`
20 | 7. Go to `localhost:1416/docs` to interact with your RAG pipeline
21 | 
22 | ## File Structure
23 | 
24 | - `indexing.py`: This script preproecesses, embeds and writes ChipNemo.pdf into a Qdrant database
25 | - `rag.py`: This scripts runs a RAG pipeline with a NIM LLM and retrieval model. 
26 | - `Dockerfile`: This is used by the docker-compose file to install dependencies
27 | - `docker-compose.yml`: This is the docker compose file we use to spin up a container for hayhooks (Haystack pipeline deployment) and Qdrant
28 | - `rag.yaml`: This is the serialized RAG pipeline which is the same as `rag.py` in YAML. We use this to deploy our pipeline with hayhooks
29 | -  <a href="https://colab.research.google.com/github/deepset-ai/haystack-cookbook/blob/main/notebooks/rag-with-nims.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>: This notebook shows you how you can set up your components to use self-hosted NIMs.


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   qdrant:
 3 |     image: qdrant/qdrant:latest
 4 |     restart: always
 5 |     container_name: qdrant
 6 |     ports:
 7 |       - 6333:6333
 8 |       - 6334:6334
 9 |     expose:
10 |       - 6333
11 |       - 6334
12 |       - 6335
13 |     volumes:
14 |       - ./qdrant_data:/qdrant_data
15 | 
16 |   hayhooks:
17 |     build: .
18 |     container_name: hayhooks
19 |     ports:
20 |       - "1416:1416"
21 |     env_file:
22 |       - ".env"
23 | 
24 |   qdrant-setup:
25 |     build: .
26 |     container_name: qdrant-setup
27 |     depends_on: 
28 |       - qdrant
29 |     env_file:
30 |       - ".env"
31 |     volumes:
32 |       - .:/hayhooks/
33 |     entrypoint: ["python", "/hayhooks/indexing.py"]


--------------------------------------------------------------------------------
/indexing.py:
--------------------------------------------------------------------------------
 1 | from haystack import Pipeline
 2 | from haystack.utils.auth import Secret
 3 | from haystack.components.converters import PyPDFToDocument
 4 | from haystack.components.writers import DocumentWriter
 5 | from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
 6 | from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
 7 | from haystack_integrations.components.embedders.nvidia import NvidiaDocumentEmbedder
 8 | 
 9 | document_store = QdrantDocumentStore(embedding_dim=1024, host="qdrant")
10 | 
11 | converter = PyPDFToDocument()
12 | 
13 | cleaner = DocumentCleaner()
14 | 
15 | splitter = DocumentSplitter(split_by='word', split_length=100)
16 | 
17 | embedder = NvidiaDocumentEmbedder(model="snowflake/arctic-embed-l", 
18 |                                   api_url="https://ai.api.nvidia.com/v1/retrieval/snowflake/arctic-embed-l",
19 |                                   batch_size=1)
20 | 
21 | writer = DocumentWriter(document_store)
22 | 
23 | indexing = Pipeline()
24 | indexing.add_component("converter", converter)
25 | indexing.add_component("cleaner", cleaner)
26 | indexing.add_component("splitter", splitter)
27 | indexing.add_component("embedder", embedder)
28 | indexing.add_component("writer", writer)
29 | 
30 | indexing.connect("converter", "cleaner")
31 | indexing.connect("cleaner", "splitter")
32 | indexing.connect("splitter", "embedder")
33 | indexing.connect("embedder", "writer")
34 | 
35 | if __name__=="__main__":
36 |     indexing.run({"converter": {"sources": ["/hayhooks/ChipNeMo.pdf"]}})


--------------------------------------------------------------------------------
/rag.py:
--------------------------------------------------------------------------------
 1 | from haystack import Pipeline
 2 | from haystack.utils.auth import Secret
 3 | from haystack.components.builders import PromptBuilder
 4 | from haystack_integrations.components.embedders.nvidia import NvidiaTextEmbedder
 5 | from haystack_integrations.components.generators.nvidia import NvidiaGenerator
 6 | from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever
 7 | from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
 8 | 
 9 | document_store = QdrantDocumentStore(embedding_dim=1024)
10 | 
11 | embedder = NvidiaTextEmbedder(model="snowflake/arctic-embed-l", 
12 |                               api_url="https://ai.api.nvidia.com/v1/retrieval/snowflake/arctic-embed-l")
13 | 
14 | retriever = QdrantEmbeddingRetriever(document_store=document_store)
15 | 
16 | prompt = """Answer the question given the context.
17 | Question: {{ query }}
18 | Context:
19 | {% for document in documents %}
20 |     {{ document.content }}
21 | {% endfor %}
22 | Answer:"""
23 | prompt_builder = PromptBuilder(template=prompt)
24 | 
25 | generator = NvidiaGenerator(
26 |     model="meta/llama-3.1-70b-instruct",
27 |     api_url="https://integrate.api.nvidia.com/v1",
28 |     model_arguments={
29 |         "max_tokens": 1024
30 |     }
31 | )
32 | 
33 | rag = Pipeline()
34 | rag.add_component("embedder", embedder)
35 | rag.add_component("retriever", retriever)
36 | rag.add_component("prompt", prompt_builder)
37 | rag.add_component("generator", generator)
38 | 
39 | rag.connect("embedder.embedding", "retriever.query_embedding")
40 | rag.connect("retriever.documents", "prompt.documents")
41 | rag.connect("prompt", "generator")
42 | 
43 | with open("rag.yaml", "w") as f: 
44 |     rag.dump(f)
45 | 
46 | while True:
47 |     question = input("Ask a question:\n")
48 |     result = rag.run(
49 |         {
50 |             "embedder": {"text": question},
51 |             "prompt": {"query": question},
52 |         }, include_outputs_from=["prompt"]
53 |     )
54 |     print(result["generator"]["replies"][0])
55 | 


--------------------------------------------------------------------------------
/rag.yaml:
--------------------------------------------------------------------------------
 1 | components:
 2 |   embedder:
 3 |     init_parameters:
 4 |       api_key:
 5 |         env_vars:
 6 |         - NVIDIA_API_KEY
 7 |         strict: true
 8 |         type: env_var
 9 |       api_url: https://ai.api.nvidia.com/v1/retrieval/snowflake/arctic-embed-l
10 |       model: snowflake/arctic-embed-l
11 |       prefix: ''
12 |       suffix: ''
13 |       truncate: null
14 |     type: haystack_integrations.components.embedders.nvidia.text_embedder.NvidiaTextEmbedder
15 |   generator:
16 |     init_parameters:
17 |       api_key:
18 |         env_vars:
19 |         - NVIDIA_API_KEY
20 |         strict: true
21 |         type: env_var
22 |       api_url: https://integrate.api.nvidia.com/v1
23 |       model: meta/llama3-70b-instruct
24 |       model_arguments:
25 |         max_tokens: 1024
26 |     type: haystack_integrations.components.generators.nvidia.generator.NvidiaGenerator
27 |   prompt:
28 |     init_parameters:
29 |       template: "Answer the question given the context.\nQuestion: {{ query }}\nContext:\n\
30 |         {% for document in documents %}\n    {{ document.content }}\n{% endfor %}\n\
31 |         Answer:"
32 |     type: haystack.components.builders.prompt_builder.PromptBuilder
33 |   retriever:
34 |     init_parameters:
35 |       document_store:
36 |         init_parameters:
37 |           api_key: null
38 |           content_field: content
39 |           duplicate_documents: overwrite
40 |           embedding_dim: 1024
41 |           embedding_field: embedding
42 |           grpc_port: 6334
43 |           hnsw_config: null
44 |           host: qdrant
45 |           https: null
46 |           index: Document
47 |           init_from: null
48 |           location: null
49 |           metadata: {}
50 |           name_field: name
51 |           on_disk: false
52 |           on_disk_payload: null
53 |           optimizers_config: null
54 |           path: null
55 |           payload_fields_to_index: null
56 |           port: 6333
57 |           prefer_grpc: false
58 |           prefix: null
59 |           progress_bar: true
60 |           quantization_config: null
61 |           recreate_index: false
62 |           replication_factor: null
63 |           return_embedding: false
64 |           scroll_size: 10000
65 |           shard_number: null
66 |           similarity: cosine
67 |           timeout: null
68 |           url: null
69 |           use_sparse_embeddings: false
70 |           wait_result_from_api: true
71 |           wal_config: null
72 |           write_batch_size: 100
73 |           write_consistency_factor: null
74 |         type: haystack_integrations.document_stores.qdrant.document_store.QdrantDocumentStore
75 |       filters: null
76 |       return_embedding: false
77 |       scale_score: true
78 |       top_k: 10
79 |     type: haystack_integrations.components.retrievers.qdrant.retriever.QdrantEmbeddingRetriever
80 | connections:
81 | - receiver: retriever.query_embedding
82 |   sender: embedder.embedding
83 | - receiver: prompt.documents
84 |   sender: retriever.documents
85 | - receiver: generator.prompt
86 |   sender: prompt.prompt
87 | max_loops_allowed: 100
88 | metadata: {}
89 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | haystack-ai
2 | qdrant-haystack
3 | nvidia-haystack
4 | pydantic==1.9.0
5 | pypdf
6 | hayhooks
7 | 


--------------------------------------------------------------------------------