├── .gitignore
├── ChipNeMo.pdf
├── Dockerfile
├── README.md
├── docker-compose.yml
├── indexing.py
├── rag.py
├── rag.yaml
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | .env
--------------------------------------------------------------------------------
/ChipNeMo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepset-ai/rag-with-nvidia-nims/04c871c14173f9b5d496bf576b01a869f71c53f8/ChipNeMo.pdf
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM deepset/hayhooks:v0.0.15
2 |
3 | EXPOSE 1416
4 |
5 | RUN pip install pypdf qdrant-haystack nvidia-haystack
6 |
7 | CMD ["hayhooks", "run", "--host", "0.0.0.0"]
8 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Build Air-Gapped RAG with Nvidia NIMs and Haystack
2 |
3 | 📚 This repository is accompanied by our article ["Building RAG Applications with NVIDIA NIM and Haystack on K8s"](https://haystack.deepset.ai/blog/haystack-nvidia-nim-rag-guide)
4 |
5 | > Info: This repo is set up to use models hosted and accessible via https://build.nvidia.com/
6 | >
7 | > These models are already available and you can use them by creating yourself API keys through the platform.
8 | > The project is set up so that you can change these models to NIM deployments by setting the `model` name and `api_url` in the `NvidiaGenerator`, `NvidiaDocumentEmbedder` and `NvidiaTextEmbedder` components.
9 | >
10 | > 👩🏻🍳 We also provide a notebook on Haystack Cookbooks that provide the same code and setup, only expecting self-hosted NIMs
11 | >
12 | >
13 |
14 | ## Run with Docker
15 |
16 | 1. `pip install -r requirements.txt`
17 | 2. Create a `.env` file and add `NVIDIA_API_KEY` (if you're using hosted models via https://build.nvidia.com/)
18 | 3. `docker-compose up`
19 | 6. `hayhooks deploy rag.yaml`
20 | 7. Go to `localhost:1416/docs` to interact with your RAG pipeline
21 |
22 | ## File Structure
23 |
24 | - `indexing.py`: This script preproecesses, embeds and writes ChipNemo.pdf into a Qdrant database
25 | - `rag.py`: This scripts runs a RAG pipeline with a NIM LLM and retrieval model.
26 | - `Dockerfile`: This is used by the docker-compose file to install dependencies
27 | - `docker-compose.yml`: This is the docker compose file we use to spin up a container for hayhooks (Haystack pipeline deployment) and Qdrant
28 | - `rag.yaml`: This is the serialized RAG pipeline which is the same as `rag.py` in YAML. We use this to deploy our pipeline with hayhooks
29 | -
: This notebook shows you how you can set up your components to use self-hosted NIMs.
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | services:
2 | qdrant:
3 | image: qdrant/qdrant:latest
4 | restart: always
5 | container_name: qdrant
6 | ports:
7 | - 6333:6333
8 | - 6334:6334
9 | expose:
10 | - 6333
11 | - 6334
12 | - 6335
13 | volumes:
14 | - ./qdrant_data:/qdrant_data
15 |
16 | hayhooks:
17 | build: .
18 | container_name: hayhooks
19 | ports:
20 | - "1416:1416"
21 | env_file:
22 | - ".env"
23 |
24 | qdrant-setup:
25 | build: .
26 | container_name: qdrant-setup
27 | depends_on:
28 | - qdrant
29 | env_file:
30 | - ".env"
31 | volumes:
32 | - .:/hayhooks/
33 | entrypoint: ["python", "/hayhooks/indexing.py"]
--------------------------------------------------------------------------------
/indexing.py:
--------------------------------------------------------------------------------
1 | from haystack import Pipeline
2 | from haystack.utils.auth import Secret
3 | from haystack.components.converters import PyPDFToDocument
4 | from haystack.components.writers import DocumentWriter
5 | from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
6 | from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
7 | from haystack_integrations.components.embedders.nvidia import NvidiaDocumentEmbedder
8 |
9 | document_store = QdrantDocumentStore(embedding_dim=1024, host="qdrant")
10 |
11 | converter = PyPDFToDocument()
12 |
13 | cleaner = DocumentCleaner()
14 |
15 | splitter = DocumentSplitter(split_by='word', split_length=100)
16 |
17 | embedder = NvidiaDocumentEmbedder(model="snowflake/arctic-embed-l",
18 | api_url="https://ai.api.nvidia.com/v1/retrieval/snowflake/arctic-embed-l",
19 | batch_size=1)
20 |
21 | writer = DocumentWriter(document_store)
22 |
23 | indexing = Pipeline()
24 | indexing.add_component("converter", converter)
25 | indexing.add_component("cleaner", cleaner)
26 | indexing.add_component("splitter", splitter)
27 | indexing.add_component("embedder", embedder)
28 | indexing.add_component("writer", writer)
29 |
30 | indexing.connect("converter", "cleaner")
31 | indexing.connect("cleaner", "splitter")
32 | indexing.connect("splitter", "embedder")
33 | indexing.connect("embedder", "writer")
34 |
35 | if __name__=="__main__":
36 | indexing.run({"converter": {"sources": ["/hayhooks/ChipNeMo.pdf"]}})
--------------------------------------------------------------------------------
/rag.py:
--------------------------------------------------------------------------------
1 | from haystack import Pipeline
2 | from haystack.utils.auth import Secret
3 | from haystack.components.builders import PromptBuilder
4 | from haystack_integrations.components.embedders.nvidia import NvidiaTextEmbedder
5 | from haystack_integrations.components.generators.nvidia import NvidiaGenerator
6 | from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever
7 | from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
8 |
9 | document_store = QdrantDocumentStore(embedding_dim=1024)
10 |
11 | embedder = NvidiaTextEmbedder(model="snowflake/arctic-embed-l",
12 | api_url="https://ai.api.nvidia.com/v1/retrieval/snowflake/arctic-embed-l")
13 |
14 | retriever = QdrantEmbeddingRetriever(document_store=document_store)
15 |
16 | prompt = """Answer the question given the context.
17 | Question: {{ query }}
18 | Context:
19 | {% for document in documents %}
20 | {{ document.content }}
21 | {% endfor %}
22 | Answer:"""
23 | prompt_builder = PromptBuilder(template=prompt)
24 |
25 | generator = NvidiaGenerator(
26 | model="meta/llama-3.1-70b-instruct",
27 | api_url="https://integrate.api.nvidia.com/v1",
28 | model_arguments={
29 | "max_tokens": 1024
30 | }
31 | )
32 |
33 | rag = Pipeline()
34 | rag.add_component("embedder", embedder)
35 | rag.add_component("retriever", retriever)
36 | rag.add_component("prompt", prompt_builder)
37 | rag.add_component("generator", generator)
38 |
39 | rag.connect("embedder.embedding", "retriever.query_embedding")
40 | rag.connect("retriever.documents", "prompt.documents")
41 | rag.connect("prompt", "generator")
42 |
43 | with open("rag.yaml", "w") as f:
44 | rag.dump(f)
45 |
46 | while True:
47 | question = input("Ask a question:\n")
48 | result = rag.run(
49 | {
50 | "embedder": {"text": question},
51 | "prompt": {"query": question},
52 | }, include_outputs_from=["prompt"]
53 | )
54 | print(result["generator"]["replies"][0])
55 |
--------------------------------------------------------------------------------
/rag.yaml:
--------------------------------------------------------------------------------
1 | components:
2 | embedder:
3 | init_parameters:
4 | api_key:
5 | env_vars:
6 | - NVIDIA_API_KEY
7 | strict: true
8 | type: env_var
9 | api_url: https://ai.api.nvidia.com/v1/retrieval/snowflake/arctic-embed-l
10 | model: snowflake/arctic-embed-l
11 | prefix: ''
12 | suffix: ''
13 | truncate: null
14 | type: haystack_integrations.components.embedders.nvidia.text_embedder.NvidiaTextEmbedder
15 | generator:
16 | init_parameters:
17 | api_key:
18 | env_vars:
19 | - NVIDIA_API_KEY
20 | strict: true
21 | type: env_var
22 | api_url: https://integrate.api.nvidia.com/v1
23 | model: meta/llama3-70b-instruct
24 | model_arguments:
25 | max_tokens: 1024
26 | type: haystack_integrations.components.generators.nvidia.generator.NvidiaGenerator
27 | prompt:
28 | init_parameters:
29 | template: "Answer the question given the context.\nQuestion: {{ query }}\nContext:\n\
30 | {% for document in documents %}\n {{ document.content }}\n{% endfor %}\n\
31 | Answer:"
32 | type: haystack.components.builders.prompt_builder.PromptBuilder
33 | retriever:
34 | init_parameters:
35 | document_store:
36 | init_parameters:
37 | api_key: null
38 | content_field: content
39 | duplicate_documents: overwrite
40 | embedding_dim: 1024
41 | embedding_field: embedding
42 | grpc_port: 6334
43 | hnsw_config: null
44 | host: qdrant
45 | https: null
46 | index: Document
47 | init_from: null
48 | location: null
49 | metadata: {}
50 | name_field: name
51 | on_disk: false
52 | on_disk_payload: null
53 | optimizers_config: null
54 | path: null
55 | payload_fields_to_index: null
56 | port: 6333
57 | prefer_grpc: false
58 | prefix: null
59 | progress_bar: true
60 | quantization_config: null
61 | recreate_index: false
62 | replication_factor: null
63 | return_embedding: false
64 | scroll_size: 10000
65 | shard_number: null
66 | similarity: cosine
67 | timeout: null
68 | url: null
69 | use_sparse_embeddings: false
70 | wait_result_from_api: true
71 | wal_config: null
72 | write_batch_size: 100
73 | write_consistency_factor: null
74 | type: haystack_integrations.document_stores.qdrant.document_store.QdrantDocumentStore
75 | filters: null
76 | return_embedding: false
77 | scale_score: true
78 | top_k: 10
79 | type: haystack_integrations.components.retrievers.qdrant.retriever.QdrantEmbeddingRetriever
80 | connections:
81 | - receiver: retriever.query_embedding
82 | sender: embedder.embedding
83 | - receiver: prompt.documents
84 | sender: retriever.documents
85 | - receiver: generator.prompt
86 | sender: prompt.prompt
87 | max_loops_allowed: 100
88 | metadata: {}
89 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | haystack-ai
2 | qdrant-haystack
3 | nvidia-haystack
4 | pydantic==1.9.0
5 | pypdf
6 | hayhooks
7 |
--------------------------------------------------------------------------------