├── .github ├── CODEOWNERS └── workflows │ ├── CI_project.yml │ ├── project.yml │ └── publish_integrations.yml ├── .gitignore ├── README.md ├── draft-integration.md ├── images ├── arize-demo.gif ├── azure-cosmosdb-collection.png ├── azure-cosmosdb-quickstart.png ├── burr-ui.png ├── chainlit-haystack.png ├── opik-demo.gif ├── ray-pipeline-concurrent.gif ├── traceloop-monitoring.png └── traceloop-tracing.png ├── integrations ├── amazon-bedrock.md ├── amazon-sagemaker.md ├── anthropic.md ├── apify.md ├── arize-phoenix.md ├── arize.md ├── assemblyai.md ├── astradb.md ├── azure-ai-search.md ├── azure-cosmos-db.md ├── azure.md ├── browserbase.md ├── burr.md ├── cerebras.md ├── chroma-documentstore.md ├── cohere.md ├── context-ai.md ├── couchbase-document-store.md ├── deepeval.md ├── deepl.md ├── docling.md ├── duckduckgo-api-websearch.md ├── elasticsearch-document-store.md ├── elevenlabs.md ├── fastembed.md ├── fastrag.md ├── flow-judge.md ├── github.md ├── google-ai.md ├── google-vertex-ai.md ├── groq.md ├── huggingface.md ├── instructor-embedder.md ├── jina.md ├── lancedb.md ├── langfuse.md ├── llama_cpp.md ├── llamafile.md ├── lmformatenforcer.md ├── marqo-document-store.md ├── mastodon-fetcher.md ├── mcp.md ├── meta_llama.md ├── milvus-document-store.md ├── mistral.md ├── mixedbread-ai.md ├── mongodb.md ├── monsterapi.md ├── needle.md ├── neo4j-document-store.md ├── notion-extractor.md ├── nvidia.md ├── ollama.md ├── opea.md ├── openai.md ├── openrouter.md ├── opensearch-document-store.md ├── opik.md ├── optimum.md ├── pgvector-documentstore.md ├── pinecone-document-store.md ├── qdrant-document-store.md ├── ragas.md ├── ray.md ├── sambanova.md ├── snowflake.md ├── stackit.md ├── titanml-takeoff.md ├── traceloop.md ├── trafilatura.md ├── unstructured-file-converter.md ├── uptrain.md ├── vllm.md ├── voyage.md ├── weaviate-document-store.md └── weights-and-bias-tracer.md └── logos ├── anthropic.png ├── apify.png ├── arize-phoenix.png ├── arize.png ├── assemblyai.png ├── astradb.png ├── aws.png ├── azure-ai.png ├── azure-cosmos-db.png ├── azure.png ├── browserbase.png ├── burr.png ├── cerebras.png ├── chainlit.png ├── chroma.png ├── cohere.png ├── context.svg ├── couchbase.svg ├── deepeval.png ├── deepl.svg ├── docling.png ├── duckduckgo.png ├── elastic.png ├── elevenlabs.png ├── flow-ai.png ├── github.png ├── googleai.png ├── groq.png ├── huggingface.png ├── intel-labs.png ├── jina.png ├── lancedb.png ├── langfuse.png ├── llama_cpp.png ├── llamafile.png ├── lmformatenforcer.png ├── marqo.png ├── mastodon.png ├── mcp.png ├── meta.png ├── meta_llama.png ├── milvus.png ├── mistral.svg ├── mixedbread-ai.png ├── mongodb.png ├── monsterapi.png ├── needle.png ├── neo4j.png ├── notion.png ├── nvidia.png ├── ollama.png ├── opea.png ├── openai.png ├── openrouter.png ├── opensearch.png ├── opik.png ├── pinecone.png ├── qdrant.png ├── ragas.png ├── ray.png ├── sambanova.png ├── snowflake.png ├── stackit.svg ├── titanml.png ├── traceloop.png ├── trafilatura.png ├── unstructured.svg ├── uptrain.png ├── vertexai.png ├── vllm.png ├── voyage_ai.jpg ├── weaviate.png └── weights_and_bias.png /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/about-codeowners/ for syntax 2 | 3 | # DevRel Team will be the default owner for everything 4 | # in the repo. Unless a later match takes precedence, 5 | # @deepset-ai/devrel will be requested for review 6 | # when someone opens a pull request. 7 | * @deepset-ai/devrel 8 | -------------------------------------------------------------------------------- /.github/workflows/CI_project.yml: -------------------------------------------------------------------------------- 1 | name: Open Source / Add issues to Github project 2 | 3 | on: 4 | issues: 5 | types: 6 | - opened 7 | 8 | jobs: 9 | add-to-project: 10 | name: Add new issues to the project for triage 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/add-to-project@v0.5.0 14 | with: 15 | project-url: https://github.com/orgs/deepset-ai/projects/5 16 | github-token: ${{ secrets.GH_PROJECT_PAT }} 17 | -------------------------------------------------------------------------------- /.github/workflows/project.yml: -------------------------------------------------------------------------------- 1 | name: Track issues with Github project 2 | 3 | on: 4 | issues: 5 | types: 6 | - opened 7 | 8 | jobs: 9 | add-to-project: 10 | name: Add new issues to project for triage 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/add-to-project@v1.0.2 14 | with: 15 | project-url: https://github.com/orgs/deepset-ai/projects/5 16 | github-token: ${{ secrets.GH_PROJECT_PAT }} 17 | -------------------------------------------------------------------------------- /.github/workflows/publish_integrations.yml: -------------------------------------------------------------------------------- 1 | name: Publish integrations on Haystack Home 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | 9 | 10 | jobs: 11 | publish-integrations: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - name: trigger-hook 16 | run: | 17 | curl -X POST ${{ secrets.VERCEL_DEPLOY_HOOK }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .env -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Haystack Integrations 2 | 3 | This repository is an index of Haystack integrations that can be used with a Haystack Pipeline or Agent. 4 | 5 | These integrations are maintained by their respective owner or authors. You can browse them on the [Haystack Integrations](https://haystack.deepset.ai/integrations) page, where you will find information on the Author(s), installation and usage of each tool. 6 | 7 | ## What are Haystack Integrations? 8 | 9 | Haystack Integrations are a Document Store, Model Provider, Custom Component, Monitoring Tool or Evaluation Framework that are either external packages or additional technologies that can be used with Haystack. Some integrations may be maintained by the deepset team, others are community contributions owned by the authors of the integration. Read more about Haystack Integrations in [Introduction to Integrations](https://docs.haystack.deepset.ai/docs/integrations). 10 | 11 | ## How to contribute 12 | To contribute, create a PR add an `.md` file to the `integrations/` directory. 13 | 👉 You can start off with the [draft integration page](https://github.com/deepset-ai/haystack-integrations/blob/main/draft-integration.md) 14 | A few things to include in the file 👇 15 | The frontmatter has to include the following: 16 | ``` 17 | --- 18 | layout: integration (required) 19 | name: Name of your integration (required) 20 | description: A short description (this will appear on the front page element of your integration on the website) (required) 21 | authors: 22 | - name: Name of Author 1 (required) 23 | socials: 24 | github: include if desired 25 | twitter: include if desired 26 | linkedin: include if desired (full url) 27 | - name: Name of Author 2 28 | socials: 29 | github: include if desired 30 | twitter: include if desired 31 | linkedin: include if desired (full url) 32 | pypi: url of pypi package if exists 33 | repo: url of GitHub repo if exists 34 | report_issue: url to where people can report an issue with the integration 35 | type: Document Store OR Model Provider OR Data Ingestion OR Monitoring Tool OR Evaluation Framework OR Custom Component OR Tool Integration OR something new! (required) 36 | toc: true (optional) 37 | logo: /logos/your-logo.png (optional) 38 | version: Haystack 2.0 39 | --- 40 | ``` 41 | Note that there should be at least one of either the `pypi` or `repo` fields for us to merge the integration. 42 | 43 | Then, please add as much information and instructions about your Integration as possible as the body of your `.md` file. 44 | 45 | Open a Pull Request, and congrats, if all goes well, you will see your integration on the integrations page in no time 🥳 46 | -------------------------------------------------------------------------------- /draft-integration.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Your integration name 4 | description: The description of your integration 5 | authors: 6 | - name: Author Name 7 | socials: 8 | github: author-github-username 9 | twitter: author-twitter-username 10 | linkedin: author-linkedin-url 11 | pypi: https://pypi.org/project/your-project 12 | repo: https://github.com/your-repo 13 | type: Type of your integration (like Model Provider or Document Store etc) 14 | report_issue: https://github.com/yout-repo/issues 15 | logo: /logos/your-logo.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | ### **Table of Contents** 20 | - [Overview](#overview) 21 | - [Installation](#installation) 22 | - [Usage](#usage) 23 | - [License](#license) 24 | 25 | ## Overview 26 | Add more information about this integration such as its features, links to its documentation 27 | 28 | ## Installation 29 | 30 | ```bash 31 | pip install your-package 32 | ``` 33 | 34 | ## Usage 35 | ### Components 36 | This integration introduces X components: 37 | 38 | - The `ComponentName`: 39 | 40 | ### Use XYZ 41 | 42 | ```python 43 | Example code block (must actually run, include all imports, etc.) 44 | ``` 45 | 46 | ### License 47 | 48 | Info about your integration license 49 | -------------------------------------------------------------------------------- /images/arize-demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/images/arize-demo.gif -------------------------------------------------------------------------------- /images/azure-cosmosdb-collection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/images/azure-cosmosdb-collection.png -------------------------------------------------------------------------------- /images/azure-cosmosdb-quickstart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/images/azure-cosmosdb-quickstart.png -------------------------------------------------------------------------------- /images/burr-ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/images/burr-ui.png -------------------------------------------------------------------------------- /images/chainlit-haystack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/images/chainlit-haystack.png -------------------------------------------------------------------------------- /images/opik-demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/images/opik-demo.gif -------------------------------------------------------------------------------- /images/ray-pipeline-concurrent.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/images/ray-pipeline-concurrent.gif -------------------------------------------------------------------------------- /images/traceloop-monitoring.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/images/traceloop-monitoring.png -------------------------------------------------------------------------------- /images/traceloop-tracing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/images/traceloop-tracing.png -------------------------------------------------------------------------------- /integrations/amazon-sagemaker.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Amazon Sagemaker 4 | description: Use Models from Huggingface, Anthropic, AI21 Labs, Cohere, Meta, and Amazon via Amazon Sagemaker with Haystack 5 | authors: 6 | - name: deepset 7 | socials: 8 | github: deepset-ai 9 | twitter: deepset_ai 10 | linkedin: https://www.linkedin.com/company/deepset-ai/ 11 | pypi: https://pypi.org/project/amazon-sagemaker-haystack 12 | repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/amazon_sagemaker 13 | type: Model Provider 14 | report_issue: https://github.com/deepset-ai/haystack-core-integrations/issues 15 | logo: /logos/aws.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | 20 | ### Table of Contents 21 | 22 | - [Overview](#overview) 23 | - [Installation](#installation) 24 | - [Usage](#usage) 25 | 26 | ## Overview 27 | 28 | [Amazon Sagemaker](https://docs.aws.amazon.com/sagemaker/latest/dg/whatis.html) is a comprehensive, fully managed machine learning service 29 | that allows data scientists and developers to build, train, and deploy ML models efficiently. More information can be found on the 30 | [documentation page](https://docs.haystack.deepset.ai/docs/sagemakergenerator). 31 | 32 | ## Installation 33 | 34 | Install the Amazon Sagemaker integration: 35 | ```bash 36 | pip install amazon-sagemaker-haystack 37 | ``` 38 | 39 | ## Usage 40 | 41 | Once installed, you will have access to a [SagemakerGenerator](https://docs.haystack.deepset.ai/docs/sagemakergenerator) that supports models from various providers. To know more 42 | about which models are supported, check out [Sagemaker's documentation](https://docs.aws.amazon.com/sagemaker/latest/dg/jumpstart-foundation-models.html). 43 | 44 | To use this integration for text generation, initialize a `SagemakerGenerator` with the model name and aws credentials: 45 | 46 | ```python 47 | import os 48 | haystack_integrations.components.generators.amazon_sagemaker import SagemakerGenerator 49 | 50 | os.environ["AWS_ACCESS_KEY_ID"] = "..." 51 | os.environ["AWS_SECRET_ACCESS_KEY"] = "..." 52 | # This one is optional 53 | os.environ["AWS_REGION_NAME"] = "..." 54 | 55 | model = # Your Sagemaker endpoint name, such as "jumpstart-dft-hf-llm-falcon-7b-instruct-bf16" 56 | 57 | generator = SagemakerGenerator(model=model) 58 | result = generator.run("Who is the best American actor?") 59 | for reply in result["replies"]: 60 | print(reply) 61 | ``` 62 | Output: 63 | ```shell 64 | 'There is no definitive "best" American actor, as acting skill and talent are subjective. 65 | However, some of the most acclaimed and influential American actors include Tom Hanks, 66 | Daniel Day-Lewis, Denzel Washington, Meryl Streep, Rober# t De Niro, Al Pacino, Marlon Brando, 67 | Jack Nicholson, Leonardo DiCaprio and John# ny Depp. Choosing a single "best" actor comes 68 | down to personal preference.' 69 | ``` 70 | 71 | Note that different models may require different parameters. One notable example is the Llama2 family of models, 72 | which should be initialized with `{'accept_eula': True}` as a custom attribute: 73 | 74 | ```python 75 | generator = SagemakerGenerator(model="jumpstart-dft-meta-textgenerationneuron-llama-2-7b", aws_custom_attributes={"accept_eula": True}) 76 | ``` 77 | -------------------------------------------------------------------------------- /integrations/anthropic.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Anthropic 4 | description: Use Anthropic Models with Haystack 5 | authors: 6 | - name: deepset 7 | socials: 8 | github: deepset-ai 9 | twitter: deepset_ai 10 | linkedin: https://www.linkedin.com/company/deepset-ai/ 11 | pypi: https://pypi.org/project/anthropic-haystack/ 12 | repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/anthropic 13 | type: Model Provider 14 | report_issue: https://github.com/deepset-ai/haystack-core-integrations/issues 15 | logo: /logos/anthropic.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | 20 | ### **Table of Contents** 21 | 22 | - [Overview](#overview) 23 | - [Installation](#installation) 24 | - [Usage](#usage) 25 | 26 | ## Overview 27 | 28 | This integration supports Anthropic Claude models provided through Anthropic’s own inferencing infrastructure. For a full list of available models, check out [the Anthropic Claude documentation](https://www.anthropic.com/claude). 29 | 30 | You can use Anthropic models with [`AnthropicGenerator`](https://docs.haystack.deepset.ai/docs/anthropicgenerator) and [`AnthropicChatGenerator`](https://docs.haystack.deepset.ai/docs/anthropicchatgenerator). 31 | 32 | Currently, available models are: 33 | 34 | - `claude-2.1` 35 | - `claude-3-haiku-20240307` 36 | - `claude-3-sonnet-20240229` (default) 37 | - `claude-3-opus-20240229` 38 | 39 | ## Installation 40 | 41 | ```bash 42 | pip install anthropic-haystack 43 | ``` 44 | 45 | ## Usage 46 | 47 | Based on your use case, you can choose between [`AnthropicGenerator`](https://docs.haystack.deepset.ai/docs/anthropicgenerator) or [`AnthropicChatGenerator`](https://docs.haystack.deepset.ai/docs/anthropicchatgenerator) to work with Anthropic models. To learn more about the difference, visit the [Generators vs Chat Generators](https://docs.haystack.deepset.ai/docs/generators-vs-chat-generators) guide. 48 | Before using, make sure to set the `ANTHROPIC_API_KEY` environment variable. 49 | 50 | ### Using `AnthropicChatGenerator` 51 | 52 | Below is an example RAG Pipeline where we answer a predefined question using the contents from the below mentioned URL pointing to Anthropic prompt engineering guide. We fetch the contents of the URL and generate an answer with the `AnthropicChatGenerator`. 53 | 54 | ```python 55 | # To run this example, you will need to set a `ANTHROPIC_API_KEY` environment variable. 56 | 57 | from haystack import Pipeline 58 | from haystack.components.builders import ChatPromptBuilder 59 | from haystack.components.converters import HTMLToDocument 60 | from haystack.components.fetchers import LinkContentFetcher 61 | from haystack.components.generators.utils import print_streaming_chunk 62 | from haystack.dataclasses import ChatMessage 63 | from haystack.utils import Secret 64 | 65 | from haystack_integrations.components.generators.anthropic import AnthropicChatGenerator 66 | 67 | messages = [ 68 | ChatMessage.from_system("You are a prompt expert who answers questions based on the given documents."), 69 | ChatMessage.from_user( 70 | "Here are the documents:\n" 71 | "{% for d in documents %} \n" 72 | " {{d.content}} \n" 73 | "{% endfor %}" 74 | "\nAnswer: {{query}}" 75 | ), 76 | ] 77 | 78 | rag_pipeline = Pipeline() 79 | rag_pipeline.add_component("fetcher", LinkContentFetcher()) 80 | rag_pipeline.add_component("converter", HTMLToDocument()) 81 | rag_pipeline.add_component("prompt_builder", ChatPromptBuilder(variables=["documents"])) 82 | rag_pipeline.add_component( 83 | "llm", 84 | AnthropicChatGenerator( 85 | api_key=Secret.from_env_var("ANTHROPIC_API_KEY"), 86 | streaming_callback=print_streaming_chunk, 87 | ), 88 | ) 89 | 90 | 91 | rag_pipeline.connect("fetcher", "converter") 92 | rag_pipeline.connect("converter", "prompt_builder") 93 | rag_pipeline.connect("prompt_builder.prompt", "llm.messages") 94 | 95 | question = "When should we use prompt engineering and when should we fine-tune?" 96 | rag_pipeline.run( 97 | data={ 98 | "fetcher": {"urls": ["https://docs.anthropic.com/en/docs/build-with-claude/prompt-engineering/overview"]}, 99 | "prompt_builder": {"template_variables": {"query": question}, "template": messages}, 100 | } 101 | ) 102 | ``` 103 | 104 | ### Using `AnthropicGenerator` 105 | 106 | Below is an example of using `AnthropicGenerator`: 107 | 108 | ```python 109 | from haystack_integrations.components.generators.anthropic import AnthropicGenerator 110 | 111 | client = AnthropicGenerator() 112 | response = client.run("What's Natural Language Processing? Be brief.") 113 | print(response) 114 | 115 | >>{'replies': ['Natural language processing (NLP) is a branch of artificial intelligence focused on enabling 116 | >>computers to understand, interpret, and manipulate human language. The goal of NLP is to read, decipher, 117 | >> understand, and make sense of the human languages in a manner that is valuable.'], 'meta': {'model': 118 | >> 'claude-2.1', 'index': 0, 'finish_reason': 'end_turn', 'usage': {'input_tokens': 18, 'output_tokens': 58}}} 119 | ``` 120 | -------------------------------------------------------------------------------- /integrations/arize-phoenix.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Arize Phoenix 4 | description: Trace your Haystack pipelines with Arize Phoenix 5 | authors: 6 | - name: Arize AI 7 | socials: 8 | github: Arize-ai 9 | twitter: ArizePhoenix 10 | linkedin: https://www.linkedin.com/company/arizeai/ 11 | pypi: https://pypi.org/project/openinference-instrumentation-haystack/ 12 | repo: https://github.com/Arize-ai/phoenix 13 | type: Monitoring Tool 14 | report_issue: https://github.com/Arize-ai/openinference/issues 15 | logo: /logos/arize-phoenix.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | 20 | ### **Table of Contents** 21 | 22 | - [Overview](#overview) 23 | - [Installation](#installation) 24 | - [Usage](#usage) 25 | - [Resources](#resources) 26 | 27 | ## Overview 28 | 29 | **Arize Phoenix** is Arize's open-source platform that offers developers the quickest way to troubleshoot, evaluate, and experiment with LLM applications. 30 | 31 | For a detailed integration guide, see the [documentation for Phoenix + Haystack](https://docs.arize.com/phoenix/tracing/integrations-tracing/haystack) 32 | 33 | ## Installation 34 | 35 | ```bash 36 | pip install openinference-instrumentation-haystack haystack-ai opentelemetry-sdk opentelemetry-exporter-otlp arize-phoenix 37 | ``` 38 | 39 | ## Usage 40 | 41 | To trace any Haystack pipeline with Phoenix, simply initialize OpenTelemetry and the `HaystackInstrumentor`. Haystack pipelines that run within the same environment send traces to Phoenix. 42 | 43 | First, start a Phoenix instance to send traces to. 44 | 45 | ```sh 46 | python -m phoenix.server.main serve 47 | ``` 48 | 49 | Now let's connect our Haystack pipeline to Phoenix using OpenTelemetry. 50 | 51 | ```python 52 | from openinference.instrumentation.haystack import HaystackInstrumentor 53 | from opentelemetry.exporter.otlp.proto.http.trace_exporter import ( 54 | OTLPSpanExporter, 55 | ) 56 | from opentelemetry.sdk import trace as trace_sdk 57 | from opentelemetry.sdk.trace.export import SimpleSpanProcessor 58 | 59 | endpoint = "http://localhost:6006/v1/traces" # The URL to your Phoenix instance 60 | tracer_provider = trace_sdk.TracerProvider() 61 | tracer_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter(endpoint))) 62 | 63 | HaystackInstrumentor().instrument(tracer_provider=tracer_provider) 64 | ``` 65 | 66 | Now, you can run a Haystack pipeline within the same environment, resulting in the following trace: 67 | 68 | > To run the example below, export your OpenAI Key to the `OPENAI_API_KEY` environment variable. 69 | 70 | ![Arize Phoenix Demo](https://raw.githubusercontent.com/deepset-ai/haystack-integrations/main/images/arize-demo.gif) 71 | 72 | ```python 73 | from haystack import Document, Pipeline 74 | from haystack.components.builders.prompt_builder import PromptBuilder 75 | from haystack.components.generators import OpenAIGenerator 76 | from haystack.components.retrievers.in_memory import InMemoryBM25Retriever 77 | from haystack.document_stores.in_memory import InMemoryDocumentStore 78 | 79 | document_store = InMemoryDocumentStore() 80 | document_store.write_documents([ 81 | Document(content="My name is Jean and I live in Paris."), 82 | Document(content="My name is Mark and I live in Berlin."), 83 | Document(content="My name is Giorgio and I live in Rome.") 84 | ]) 85 | 86 | prompt_template = """ 87 | Given these documents, answer the question. 88 | Documents: 89 | {% for doc in documents %} 90 | {{ doc.content }} 91 | {% endfor %} 92 | Question: {{question}} 93 | Answer: 94 | """ 95 | 96 | retriever = InMemoryBM25Retriever(document_store=document_store) 97 | prompt_builder = PromptBuilder(template=prompt_template) 98 | llm = OpenAIGenerator() 99 | 100 | rag_pipeline = Pipeline() 101 | rag_pipeline.add_component("retriever", retriever) 102 | rag_pipeline.add_component("prompt_builder", prompt_builder) 103 | rag_pipeline.add_component("llm", llm) 104 | rag_pipeline.connect("retriever", "prompt_builder.documents") 105 | rag_pipeline.connect("prompt_builder", "llm") 106 | 107 | question = "Who lives in Paris?" 108 | results = rag_pipeline.run( 109 | { 110 | "retriever": {"query": question}, 111 | "prompt_builder": {"question": question}, 112 | } 113 | ) 114 | ``` 115 | 116 | ## Resources 117 | 118 | - Check out the Phoenix [GitHub repository](https://github.com/Arize-ai/phoenix) 119 | - For an in-depth guide on how to host your own Phoenix instance, see the [Phoenix documentation](https://docs.arize.com/phoenix/deployment) 120 | - Try out free hosted Phoenix instances at [phoenix.arize.com](https://phoenix.arize.com/) 121 | - Check out the [Phoenix documentation](https://docs.arize.com/phoenix) 122 | -------------------------------------------------------------------------------- /integrations/arize.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Arize AI 4 | description: Trace and Monitor your Haystack pipelines with Arize AI 5 | authors: 6 | - name: Arize AI 7 | socials: 8 | github: Arize-ai 9 | twitter: arizeai 10 | linkedin: https://www.linkedin.com/company/arizeai/ 11 | pypi: https://pypi.org/project/openinference-instrumentation-haystack/ 12 | repo: https://github.com/Arize-ai/openinference 13 | type: Monitoring Tool 14 | report_issue: https://github.com/Arize-ai/openinference/issues 15 | logo: /logos/arize.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | 20 | ### **Table of Contents** 21 | 22 | - [Overview](#overview) 23 | - [Installation](#installation) 24 | - [Usage](#usage) 25 | 26 | ## Overview 27 | 28 | Arize is AI Observability and Evaluation platform designed to help you troubleshoot, evaluate, and experiment on LLM and ML applications. Developers use Arize to get applications working quickly, evaluate performance, detect and prevent production issues, and curate datasets. 29 | 30 | - [Documentation for Arize AI + Haystack](https://docs.arize.com/arize/large-language-models/tracing/auto-instrumentation/haystack) 31 | 32 | ## Installation 33 | 34 | ```bash 35 | pip install openinference-instrumentation-haystack haystack-ai arize-otel opentelemetry-sdk opentelemetry-exporter-otlp 36 | ``` 37 | 38 | ## Usage 39 | 40 | To trace any Haystack pipeline with Arize, simply initialize OpenTelemetry and the `HaystackInstrumentor`. Haystack pipelines that run within the same environment send traces to Arize. 41 | 42 | ```python 43 | from openinference.instrumentation.haystack import HaystackInstrumentor 44 | # Import open-telemetry dependencies 45 | from arize_otel import register_otel, Endpoints 46 | 47 | # Setup OTEL via our convenience function 48 | register_otel( 49 | endpoints = Endpoints.ARIZE, 50 | space_id = "", # from the space settings page 51 | api_key = "", # from the space settings page 52 | model_id = "", # name this to whatever you would like 53 | ) 54 | ``` 55 | 56 | Now, you can run a Haystack pipeline within the same environment, resulting in the following trace: 57 | 58 | > To run the example below, export your OpenAI Key to the `OPENAI_API_KEY` environment variable. 59 | 60 | ![Arize Demo](https://raw.githubusercontent.com/deepset-ai/haystack-integrations/main/images/arize-demo.gif) 61 | 62 | ```python 63 | from haystack import Document, Pipeline 64 | from haystack.components.builders.prompt_builder import PromptBuilder 65 | from haystack.components.generators import OpenAIGenerator 66 | from haystack.components.retrievers.in_memory import InMemoryBM25Retriever 67 | from haystack.document_stores.in_memory import InMemoryDocumentStore 68 | 69 | document_store = InMemoryDocumentStore() 70 | document_store.write_documents([ 71 | Document(content="My name is Jean and I live in Paris."), 72 | Document(content="My name is Mark and I live in Berlin."), 73 | Document(content="My name is Giorgio and I live in Rome.") 74 | ]) 75 | 76 | prompt_template = """ 77 | Given these documents, answer the question. 78 | Documents: 79 | {% for doc in documents %} 80 | {{ doc.content }} 81 | {% endfor %} 82 | Question: {{question}} 83 | Answer: 84 | """ 85 | 86 | retriever = InMemoryBM25Retriever(document_store=document_store) 87 | prompt_builder = PromptBuilder(template=prompt_template) 88 | llm = OpenAIGenerator() 89 | 90 | rag_pipeline = Pipeline() 91 | rag_pipeline.add_component("retriever", retriever) 92 | rag_pipeline.add_component("prompt_builder", prompt_builder) 93 | rag_pipeline.add_component("llm", llm) 94 | rag_pipeline.connect("retriever", "prompt_builder.documents") 95 | rag_pipeline.connect("prompt_builder", "llm") 96 | 97 | question = "Who lives in Paris?" 98 | results = rag_pipeline.run( 99 | { 100 | "retriever": {"query": question}, 101 | "prompt_builder": {"question": question}, 102 | } 103 | ) 104 | ``` 105 | -------------------------------------------------------------------------------- /integrations/astradb.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: AstraDB 4 | description: A Document Store for storing and retrieval from AstraDB. 5 | authors: 6 | - name: Nicholas Brackley 7 | socials: 8 | github: hc33brackles 9 | - name: deepset 10 | socials: 11 | github: deepset-ai 12 | twitter: deepset_ai 13 | linkedin: https://www.linkedin.com/company/deepset-ai/ 14 | pypi: https://pypi.org/project/astra-haystack/ 15 | repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/astra 16 | type: Document Store 17 | report_issue: https://github.com/deepset-ai/haystack-core-integrations/issues 18 | logo: /logos/astradb.png 19 | version: Haystack 2.0 20 | toc: true 21 | --- 22 | 23 | ### **Table of Contents** 24 | - [Overview](#overview) 25 | - [Installation](#installation) 26 | - [Usage](#usage) 27 | - [Note](#note) 28 | - [License](#license) 29 | 30 | ## Overview 31 | 32 | 33 | DataStax Astra DB is a serverless vector database built on Apache Cassandra, and it supports vector-based search and auto-scaling. You can deploy it on AWS, GCP, or Azure and easily expand to one or more regions within those clouds for multi-region availability, low latency data access, data sovereignty, and to avoid cloud vendor lock-in. For more information, see the [DataStax documentation](https://docs.datastax.com/en/astra-serverless/docs/). 34 | 35 | 36 | This integration allows you to use AstraDB for document storage and retrieval in your Haystack pipelines. This page provides instructions on how to initialize an AstraDB instance and connect with Haystack. 37 | 38 | ## Components 39 | 40 | - [`AstraDocumentStore`](https://docs.haystack.deepset.ai/docs/astradocumentstore). This component serves as a persistent data store for your Haystack documents, and supports a number of embedding models and vector dimensions. 41 | - [`AstraEmbeddingRetriever`](https://docs.haystack.deepset.ai/docs/astraretriever) This is an embedding-based Retriever compatible with the Astra Document Store. 42 | 43 | 44 | ## Initialization 45 | 46 | First you need to [sign up for a free DataStax account](https://astra.datastax.com/signup). Follow these instructions for [creating an AstraDB Database](https://docs.datastax.com/en/astra/astra-db-vector/databases/create-database.html#create-a-serverless-non-vector-database) in the Datastax console. Make sure you create a collection, a keyspace name, and an access token since you'll need those later. 47 | 48 | ## Installation 49 | 50 | ```console 51 | pip install astra-haystack 52 | ``` 53 | ## Usage 54 | 55 | This package includes Astra Document Store and Astra Retriever classes that integrate with Haystack, allowing you to easily perform document retrieval or RAG with AstraDB, and include those functions in Haystack pipelines. 56 | 57 | In order to connect AstraDB with Haystack, you'll need these pieces of information from your Datastax console: 58 | - Database API Endpoint 59 | - Application Token 60 | - Astra collection name (otherwise `documents` will be used) 61 | 62 | ### how to use the `AstraDocumentStore`: 63 | 64 | ```python 65 | from haystack import Document 66 | from haystack_integrations.document_stores.astra import AstraDocumentStore 67 | 68 | # Make sure ASTRA_DB_API_ENDPOINT and ASTRA_DB_APPLICATION_TOKEN environment variables are set 69 | document_store = AstraDocumentStore() 70 | 71 | document_store.write_documents([ 72 | Document(content="This is first"), 73 | Document(content="This is second") 74 | ]) 75 | print(document_store.count_documents()) 76 | ``` 77 | 78 | ### How to use the `AstraEmbeddingRetriever` 79 | 80 | ```python 81 | from haystack import Document, Pipeline 82 | from haystack.components.embedders import SentenceTransformersTextEmbedder, SentenceTransformersDocumentEmbedder 83 | from haystack_integrations.components.retrievers.astra import AstraEmbeddingRetriever 84 | from haystack_integrations.document_stores.astra import AstraDocumentStore 85 | 86 | 87 | # Make sure ASTRA_DB_API_ENDPOINT and ASTRA_DB_APPLICATION_TOKEN environment variables are set 88 | document_store = AstraDocumentStore() 89 | 90 | model = "sentence-transformers/all-mpnet-base-v2" 91 | 92 | documents = [Document(content="There are over 7,000 languages spoken around the world today."), 93 | Document(content="Elephants have been observed to behave in a way that indicates a high level of self-awareness, such as recognizing themselves in mirrors."), 94 | Document(content="In certain parts of the world, like the Maldives, Puerto Rico, and San Diego, you can witness the phenomenon of bioluminescent waves.")] 95 | 96 | document_embedder = SentenceTransformersDocumentEmbedder(model=model) 97 | document_embedder.warm_up() 98 | documents_with_embeddings = document_embedder.run(documents) 99 | 100 | document_store.write_documents(documents_with_embeddings.get("documents")) 101 | query_pipeline = Pipeline() 102 | query_pipeline.add_component("text_embedder", SentenceTransformersTextEmbedder(model=model)) 103 | query_pipeline.add_component("retriever", AstraEmbeddingRetriever(document_store=document_store)) 104 | query_pipeline.connect("text_embedder.embedding", "retriever.query_embedding") 105 | 106 | query = "How many languages are there?" 107 | 108 | result = query_pipeline.run({"text_embedder": {"text": query}}) 109 | 110 | print(result['retriever']['documents'][0]) 111 | ``` 112 | 113 | ### Note: 114 | Please note that the current version of Astra JSON API does not support the following operators: 115 | $lt, $lte, $gt, $gte, $nin, $not, $neq 116 | As well as filtering with none values (these won't be inserted as the result is stored as json document, and it doesn't store nones) 117 | 118 | ### License 119 | 120 | `astra-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. 121 | -------------------------------------------------------------------------------- /integrations/azure-ai-search.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Azure AI Search 4 | description: Use Azure AI Search with Haystack 5 | authors: 6 | - name: deepset 7 | socials: 8 | github: deepset-ai 9 | twitter: deepset_ai 10 | linkedin: https://www.linkedin.com/company/deepset-ai/ 11 | pypi: https://pypi.org/project/azure-ai-search 12 | repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/azure-ai-search 13 | type: Document Store 14 | report_issue: https://github.com/deepset-ai/haystack-core-integrations/issues 15 | logo: /logos/azure-ai.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | 20 | ### **Table of Contents** 21 | - [Overview](#overview) 22 | - [Installation](#installation) 23 | - [Usage](#usage) 24 | 25 | ## Overview 26 | 27 | `AzureAIDocumentStore` supports an integration of [Azure AI Search](https://learn.microsoft.com/en-us/azure/search/search-what-is-azure-search) which is an enterprise-ready search and retrieval system with [Haystack](https://haystack.deepset.ai/) by [deepset](https://www.deepset.ai). 28 | 29 | This integration allows using search indexes in Azure AI Search as a document store to build RAG-based applications on Azure, with native LLM integrations. To retrieve data from the document store, the integration supports three types of retrieval techniques: 30 | 31 | 1. **Embedding Retrieval**: For vector-based searches. 32 | 2. **BM25 Retrieval**: Keyword retrieval utilizing the BM25 algorithm. 33 | 3. **Hybrid Retrieval**: A combination of vector and BM25 retrieval methods. 34 | 35 | ## Installation 36 | 37 | Install the Azure AI Search integration: 38 | 39 | ```bash 40 | pip install "azure-ai-search-haystack" 41 | ``` 42 | 43 | ## Usage 44 | 45 | To use the `AzureAISearchDocumentStore`, you need to have an active [Azure subscription](https://azure.microsoft.com/en-us/products/ai-services/ai-search) with a deployed Azure AI Search service. You need to provide a search service endpoint as an `AZURE_AI_SEARCH_ENDPOINT` and an API key as `AZURE_AI_SEARCH_API_KEY` for authentication. If the API key is not provided, the `DefaultAzureCredential` will attempt to authenticate you through the browser. 46 | 47 | ```python 48 | from haystack_integrations.document_stores.azure_ai_search import AzureAISearchDocumentStore 49 | from haystack import Document 50 | 51 | document_store = AzureAISearchDocumentStore( 52 | metadata_fields={"version": float, "label": str}, 53 | index_name="document-store-example", 54 | ) 55 | 56 | documents = [ 57 | Document( 58 | content="This is an introduction to using Python for data analysis.", 59 | meta={"version": 1.0, "label": "chapter_one"}, 60 | ), 61 | Document( 62 | content="Learn how to use Python libraries for machine learning.", 63 | meta={"version": 1.5, "label": "chapter_two"}, 64 | ), 65 | Document( 66 | content="Advanced Python techniques for data visualization.", 67 | meta={"version": 2.0, "label": "chapter_three"}, 68 | ), 69 | ] 70 | document_store.write_documents(documents) 71 | 72 | filters = { 73 | "operator": "AND", 74 | "conditions": [ 75 | {"field": "meta.version", "operator": ">", "value": 1.2}, 76 | {"field": "meta.label", "operator": "in", "value": ["chapter_one", "chapter_three"]}, 77 | ], 78 | } 79 | 80 | results = document_store.filter_documents(filters) 81 | print(results) 82 | ``` 83 | 84 | You can supply all supported parameters as `index_creation_kwargs` for `SearchIndex` during the initialization of the `AzureAISearchDocumentStore` to customize index creation. Additionally, the `AzureAISearchDocumentStore` supports semantic ranking, which can be enabled by including the `SemanticSearch` configuration in index_creation_kwargs during initialization and utilizing it through one of the retrievers. For further details, refer to the [Azure AI tutorial](https://learn.microsoft.com/en-us/azure/search/search-get-started-semantic?tabs=dotnet) on this feature. 85 | 86 | -------------------------------------------------------------------------------- /integrations/azure.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Azure 4 | description: Use OpenAI models deployed through Azure services with Haystack 5 | authors: 6 | - name: deepset 7 | socials: 8 | github: deepset-ai 9 | twitter: deepset_ai 10 | linkedin: https://www.linkedin.com/company/deepset-ai 11 | pypi: https://pypi.org/project/haystack-ai/ 12 | repo: https://github.com/deepset-ai/haystack 13 | type: Model Provider 14 | report_issue: https://github.com/deepset-ai/haystack/issues 15 | logo: /logos/azure.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | 20 | ### Table of Contents 21 | 22 | - [Overview](#overview) 23 | - [Installation](#installation) 24 | - [Usage](#usage) 25 | - [Embedding Models](#embedding-models) 26 | - [Generative Models (LLMs)](#generative-models-llms) 27 | 28 | ## Overview 29 | 30 | [Azure OpenAI Service](https://learn.microsoft.com/en-us/azure/ai-services/openai/overview) provides REST API access to OpenAI's powerful language models including the GPT-4, GPT-4 Turbo with Vision, GPT-3.5-Turbo, and Embeddings model series. To get access to Azure OpenAI endpoints, visit [Azure OpenAI Service REST API reference](https://learn.microsoft.com/en-us/azure/ai-services/openai/reference). 31 | 32 | ## Installation 33 | 34 | Install Haystack: 35 | 36 | ```bash 37 | pip install haystack-ai 38 | ``` 39 | 40 | ## Usage 41 | 42 | To work with Azure components, you will need an Azure OpenAI API key, an [Azure Active Directory Token](https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id) as well as an Azure OpenAI Endpoint. 43 | 44 | ### Components 45 | 46 | - [AzureOpenAIGenerator](https://docs.haystack.deepset.ai/docs/azureopenaigenerator) 47 | - [AzureOpenAIChatGenerator](https://docs.haystack.deepset.ai/docs/azureopenaichatgenerator) 48 | - [AzureOpenAITextEmbedder](https://docs.haystack.deepset.ai/docs/azureopenaitextembedder) 49 | - [AzureOpenAIDocumentEmbedder](https://docs.haystack.deepset.ai/docs/azureopenaidocumentembedder) 50 | 51 | All components use `AZURE_OPENAI_API_KEY` and `AZURE_OPENAI_AD_TOKEN` environment variables by default. Otherwise, you can pass `api_key` and `azure_ad_token` at initialization using `Secret` class. Read more about [Secret Handling](https://docs.haystack.deepset.ai/docs/secret-management#structured-secret-handling). 52 | 53 | ### Embedding Models 54 | 55 | You can leverage embedding models from Azure OpenAI through two components: [AzureOpenAITextEmbedder](https://docs.haystack.deepset.ai/docs/azureopenaitextembedder) and [AzureOpenAIDocumentEmbedder](https://docs.haystack.deepset.ai/docs/azureopenaidocumentembedder). 56 | 57 | To create semantic embeddings for documents, use `AzureOpenAIDocumentEmbedder` in your indexing pipeline. For generating embeddings for queries, use `AzureOpenAITextEmbedder`. Once you've selected the suitable component for your specific use case, initialize the component with required parameters. 58 | 59 | Below is the example indexing pipeline with `InMemoryDocumentStore`, `AzureOpenAIDocumentEmbedder` and `DocumentWriter`: 60 | 61 | ```python 62 | from haystack import Document, Pipeline 63 | from haystack.document_stores.in_memory import InMemoryDocumentStore 64 | from haystack.components.embedders import AzureOpenAITextEmbedder, AzureOpenAIDocumentEmbedder 65 | from haystack.components.writers import DocumentWriter 66 | 67 | os.environ["AZURE_OPENAI_API_KEY"] = "Your Azure OpenAI API key" 68 | os.environ["AZURE_OPENAI_AD_TOKEN"] = "Your Azure Active Directory Token" 69 | 70 | document_store = InMemoryDocumentStore(embedding_similarity_function="cosine") 71 | 72 | documents = [Document(content="My name is Wolfgang and I live in Berlin"), 73 | Document(content="I saw a black horse running"), 74 | Document(content="Germany has many big cities")] 75 | 76 | indexing_pipeline = Pipeline() 77 | indexing_pipeline.add_component("embedder", AzureOpenAIDocumentEmbedder(azure_endpoint="https://example-resource.azure.openai.com/", azure_deployment="text-embedding-ada-002")) 78 | indexing_pipeline.add_component("writer", DocumentWriter(document_store=document_store)) 79 | indexing_pipeline.connect("embedder", "writer") 80 | 81 | indexing_pipeline.run({"embedder": {"documents": documents}}) 82 | ``` 83 | 84 | ### Generative Models (LLMs) 85 | 86 | You can leverage Azure OpenAI models through two components: [AzureOpenAIGenerator](https://docs.haystack.deepset.ai/docs/azureopenaigenerator) and [AzureOpenAIChatGenerator](https://docs.haystack.deepset.ai/docs/azureopenaichatgenerator). 87 | 88 | To use OpenAI models deployed through Azure services for text generation, initialize a `AzureOpenAIGenerator` with `azure_deployment` and `azure_endpoint`. You can then use the `AzureOpenAIGenerator` instance in a pipeline after the `PromptBuilder`. 89 | 90 | Below is the example of generative questions answering pipeline using RAG with `PromptBuilder` and `AzureOpenAIGenerator`: 91 | 92 | ```python 93 | from haystack import Pipeline 94 | from haystack.components.retrievers.in_memory import InMemoryBM25Retriever 95 | from haystack.components.builders.prompt_builder import PromptBuilder 96 | from haystack.components.generators import AzureOpenAIGenerator 97 | 98 | os.environ["AZURE_OPENAI_API_KEY"] = "Your Azure OpenAI API key" 99 | os.environ["AZURE_OPENAI_AD_TOKEN"] = "Your Azure Active Directory Token" 100 | 101 | template = """ 102 | Given the following information, answer the question. 103 | 104 | Context: 105 | {% for document in documents %} 106 | {{ document.content }} 107 | {% endfor %} 108 | 109 | Question: What's the official language of {{ country }}? 110 | """ 111 | pipe = Pipeline() 112 | 113 | pipe.add_component("retriever", InMemoryBM25Retriever(document_store=document_store)) 114 | pipe.add_component("prompt_builder", PromptBuilder(template=template)) 115 | pipe.add_component("llm", AzureOpenAIGenerator(azure_endpoint="https://example-resource.azure.openai.com/", azure_deployment="gpt-35-turbo")) 116 | pipe.connect("retriever", "prompt_builder.documents") 117 | pipe.connect("prompt_builder", "llm") 118 | 119 | pipe.run({ 120 | "prompt_builder": { 121 | "country": "France" 122 | } 123 | }) 124 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /integrations/browserbase.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Browserbase 4 | description: Use Browserbase headless browsers with Haystack 5 | authors: 6 | - name: Browserbase 7 | socials: 8 | github: https://github.com/browserbase 9 | twitter: https://twitter.com/browserbasehq 10 | linkedin: https://www.linkedin.com/company/browserbasehq 11 | pypi: https://pypi.org/project/browserbase-haystack 12 | repo: https://github.com/browserbase/haystack 13 | report_issue: https://github.com/browserbase/haystack/issues 14 | type: Data Ingestion 15 | logo: /logos/browserbase.png 16 | version: Haystack 2.0 17 | --- 18 | 19 | # Browserbase Haystack Fetcher 20 | 21 | [Browserbase](https://browserbase.com) is a developer platform to reliably run, manage, and monitor headless browsers. 22 | 23 | Power your AI data retrievals with: 24 | - [Serverless Infrastructure](https://docs.browserbase.com/under-the-hood) providing reliable browsers to extract data from complex UIs 25 | - [Stealth Mode](https://docs.browserbase.com/features/stealth-mode) with included fingerprinting tactics and automatic captcha solving 26 | - [Session Debugger](https://docs.browserbase.com/features/sessions) to inspect your Browser Session with networks timeline and logs 27 | - [Live Debug](https://docs.browserbase.com/guides/session-debug-connection/browser-remote-control) to quickly debug your automation 28 | 29 | ## Installation and setup 30 | 31 | - Get an API key and Project ID from [browserbase.com](https://browserbase.com) and set it in environment variables (`BROWSERBASE_API_KEY`, `BROWSERBASE_PROJECT_ID`). 32 | - Install the required dependencies: 33 | 34 | ``` 35 | pip install browserbase-haystack 36 | ``` 37 | 38 | ## Usage 39 | 40 | You can load webpages into Haystack using `BrowserbaseFetcher`. Optionally, you can set `text_content` parameter to convert the pages to text-only representation. 41 | 42 | ### Standalone 43 | 44 | ```py 45 | from browserbase_haystack import BrowserbaseFetcher 46 | 47 | browserbase_fetcher = BrowserbaseFetcher() 48 | browserbase_fetcher.run(urls=["https://example.com"], text_content=False) 49 | ``` 50 | 51 | ### In a pipeline 52 | 53 | ```py 54 | from haystack import Pipeline 55 | from haystack.components.generators import OpenAIGenerator 56 | from haystack.components.builders import PromptBuilder 57 | from browserbase_haystack import BrowserbaseFetcher 58 | 59 | prompt_template = ( 60 | "Tell me the titles of the given pages. Pages: {{ documents }}" 61 | ) 62 | prompt_builder = PromptBuilder(template=prompt_template) 63 | llm = OpenAIGenerator() 64 | 65 | browserbase_fetcher = BrowserbaseFetcher() 66 | 67 | pipe = Pipeline() 68 | pipe.add_component("fetcher", browserbase_fetcher) 69 | pipe.add_component("prompt_builder", prompt_builder) 70 | pipe.add_component("llm", llm) 71 | 72 | pipe.connect("fetcher.documents", "prompt_builder.documents") 73 | pipe.connect("prompt_builder.prompt", "llm.prompt") 74 | result = pipe.run(data={"fetcher": {"urls": ["https://example.com"]}}) 75 | ``` 76 | 77 | ### Parameters 78 | 79 | - `urls` Required. A list of URLs to fetch. 80 | - `text_content` Retrieve only text content. Default is `False`. 81 | - `session_id` Optional. Provide an existing Session ID. 82 | - `proxy` Optional. Enable/Disable Proxies. 83 | -------------------------------------------------------------------------------- /integrations/cerebras.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Cerebras 4 | description: Use LLMs served by Cerebras API 5 | authors: 6 | - name: deepset 7 | socials: 8 | github: deepset-ai 9 | twitter: Haystack_AI 10 | linkedin: https://www.linkedin.com/company/deepset-ai/ 11 | pypi: https://pypi.org/project/haystack-ai/ 12 | repo: https://github.com/deepset-ai/haystack 13 | type: Model Provider 14 | report_issue: https://github.com/deepset-ai/haystack/issues 15 | logo: /logos/cerebras.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | 20 | ### **Table of Contents** 21 | 22 | - [Overview](#overview) 23 | - [Usage](#usage) 24 | 25 | ## Overview 26 | 27 | [Cerebras](https://cerebras.ai/) is the go-to platform for fast and effortless AI training and inference. 28 | 29 | ## Usage 30 | 31 | [Cerebras API](https://cerebras.ai/inference) is OpenAI compatible, making it easy to use in Haystack via OpenAI Generators. 32 | 33 | ### Using `Generator` 34 | 35 | Here's an example of using `llama3.1-8b` served via Cerebras to perform question answering on a web page. 36 | You need to set the environment variable `CEREBRAS_API_KEY` and choose a [compatible model](https://inference-docs.cerebras.ai/introduction). 37 | 38 | ```python 39 | from haystack import Pipeline 40 | from haystack.utils import Secret 41 | from haystack.components.fetchers import LinkContentFetcher 42 | from haystack.components.converters import HTMLToDocument 43 | from haystack.components.builders import PromptBuilder 44 | from haystack.components.generators import OpenAIGenerator 45 | 46 | fetcher = LinkContentFetcher() 47 | converter = HTMLToDocument() 48 | prompt_template = """ 49 | According to the contents of this website: 50 | {% for document in documents %} 51 | {{document.content}} 52 | {% endfor %} 53 | Answer the given question: {{query}} 54 | Answer: 55 | """ 56 | prompt_builder = PromptBuilder(template=prompt_template) 57 | llm = OpenAIGenerator( 58 | api_key=Secret.from_env_var("CEREBRAS_API_KEY"), 59 | api_base_url="https://api.cerebras.ai/v1", 60 | model="llama3.1-8b" 61 | ) 62 | pipeline = Pipeline() 63 | pipeline.add_component("fetcher", fetcher) 64 | pipeline.add_component("converter", converter) 65 | pipeline.add_component("prompt", prompt_builder) 66 | pipeline.add_component("llm", llm) 67 | 68 | pipeline.connect("fetcher.streams", "converter.sources") 69 | pipeline.connect("converter.documents", "prompt.documents") 70 | pipeline.connect("prompt.prompt", "llm.prompt") 71 | 72 | result = pipeline.run({"fetcher": {"urls": ["https://cerebras.ai/inference"]}, 73 | "prompt": {"query": "Why should I use Cerebras for serving LLMs?"}}) 74 | 75 | print(result["llm"]["replies"][0]) 76 | ``` 77 | 78 | ### Using `ChatGenerator` 79 | 80 | See an example of engaging in a multi-turn conversation with `llama3.1-8b`. 81 | You need to set the environment variable `CEREBRAS_API_KEY` and choose a [compatible model](https://inference-docs.cerebras.ai/introduction). 82 | 83 | ```python 84 | from haystack.components.generators.chat import OpenAIChatGenerator 85 | from haystack.dataclasses import ChatMessage 86 | from haystack.utils import Secret 87 | 88 | generator = OpenAIChatGenerator( 89 | api_key=Secret.from_env_var("CEREBRAS_API_KEY"), 90 | api_base_url="https://api.cerebras.ai/v1", 91 | model="llama3.1-8b", 92 | generation_kwargs = {"max_tokens": 512} 93 | ) 94 | 95 | messages = [] 96 | 97 | while True: 98 | msg = input("Enter your message or Q to exit\n🧑 ") 99 | if msg=="Q": 100 | break 101 | messages.append(ChatMessage.from_user(msg)) 102 | response = generator.run(messages=messages) 103 | assistant_resp = response['replies'][0] 104 | print("🤖 "+assistant_resp.text) 105 | messages.append(assistant_resp) 106 | ``` 107 | -------------------------------------------------------------------------------- /integrations/chroma-documentstore.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Chroma 4 | description: A Document Store for storing and retrieval from Chroma 5 | authors: 6 | - name: Massimiliano Pippi 7 | socials: 8 | github: masci 9 | - name: deepset 10 | socials: 11 | github: deepset-ai 12 | twitter: deepset_ai 13 | linkedin: https://www.linkedin.com/company/deepset-ai/ 14 | pypi: https://pypi.org/project/chroma-store 15 | repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/chroma 16 | type: Document Store 17 | report_issue: https://github.com/deepset-ai/haystack-core-integrations/issues 18 | logo: /logos/chroma.png 19 | version: Haystack 2.0 20 | toc: true 21 | --- 22 | 23 | [![PyPI - Version](https://img.shields.io/pypi/v/chroma-haystack.svg)](https://pypi.org/project/chroma-haystack) 24 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/chroma-haystack.svg)](https://pypi.org/project/chroma-haystack) 25 | [![test](https://github.com/masci/chroma-haystack/actions/workflows/test.yml/badge.svg)](https://github.com/masci/chroma-haystack/actions/workflows/test.yml) 26 | 27 | ----- 28 | 29 | **Table of Contents** 30 | 31 | - [Chroma Document Store for Haystack](#chroma-document-store-for-haystack) 32 | - [Installation](#installation) 33 | - [Examples](#examples) 34 | - [License](#license) 35 | 36 | ## Installation 37 | Use `pip` to install Chroma: 38 | 39 | ```console 40 | pip install chroma-haystack 41 | ``` 42 | ## Usage 43 | Once installed, initialize your Chroma database to use it with Haystack: 44 | 45 | ```python 46 | from haystack_integrations.document_stores.chroma import ChromaDocumentStore 47 | 48 | # Chroma is used in-memory so we use the same instances in the two pipelines below 49 | document_store = ChromaDocumentStore() 50 | ``` 51 | 52 | ### Writing Documents to ChromaDocumentStore 53 | To write documents to `ChromaDocumentStore`, create an indexing pipeline. 54 | 55 | ```python 56 | from haystack.components.converters import TextFileToDocument 57 | from haystack.components.writers import DocumentWriter 58 | 59 | indexing = Pipeline() 60 | indexing.add_component("converter", TextFileToDocument()) 61 | indexing.add_component("writer", DocumentWriter(document_store)) 62 | indexing.connect("converter", "writer") 63 | indexing.run({"converter": {"sources": file_paths}}) 64 | ``` 65 | 66 | ## Examples 67 | You can find a code example showing how to use the Document Store and the Retriever under the `example/` folder of [this repo](https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/chroma). 68 | 69 | ## License 70 | 71 | `chroma-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. 72 | -------------------------------------------------------------------------------- /integrations/context-ai.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Context AI 4 | description: A component to log conversations for analytics by Context.ai 5 | authors: 6 | - name: Alec Barber 7 | socials: 8 | github: BarberAlec 9 | linkedin: https://www.linkedin.com/in/alec-barber/ 10 | - name: Alex Gamble 11 | socials: 12 | github: agamble 13 | linkedin: https://www.linkedin.com/in/alex-gamble-13682086/ 14 | - name: Henry Scott-Green 15 | socials: 16 | linkedin: https://www.linkedin.com/in/hcscottgreen/ 17 | - name: Amishapriya Singh 18 | socials: 19 | github: amisha29sh 20 | linkedin: https://www.linkedin.com/in/amisha29/ 21 | pypi: https://pypi.org/project/context-haystack/ 22 | repo: https://github.com/contextco/context-haystack 23 | type: Monitoring Tool 24 | report_issue: https://github.com/contextco/context-haystack/issues 25 | logo: /logos/context.svg 26 | version: Haystack 2.0 27 | toc: true 28 | --- 29 | ### **Table of Contents** 30 | - [Overview](#overview) 31 | - [Installation](#installation) 32 | - [Usage](#usage) 33 | - [License](#license) 34 | 35 | ## Overview 36 | [Context.ai](https://context.ai) is an evaluations and analytics tool for products powered by LLMs. 37 | 38 | With Context.ai, you can understand how your users are interacting with natural language interfaces. This helps you know where your customers are having great experiences, but also proactively detect potential areas of improvement. You can test the performance impact of changes before you ship them to production with evaluations, and can identify where inappropriate conversations taking place. 39 | 40 | Login to [Context Dashboard](https://with.context.ai) to create a token and see your analytics. 41 | 42 | ## Installation 43 | 44 | ```bash 45 | pip install --upgrade context-haystack 46 | ``` 47 | 48 | ## Usage 49 | ### Components 50 | The `ContextAIAnalytics` component allows you to seamlessly integrate with Context.ai, uploading your messages to the Context AI platform. 51 | 52 | When running your pipeline you must include `thread_id` in the parameters where each unique `thread_id` identifies a conversation. You can optionally include `metadata` with `user_id` and `model` reserved for special analytics. 53 | 54 | Use an instance of the `ContextAIAnalytics` component at each stage of your pipeline where you wish to log a message. In the example below the output of the `prompt_builder` and the `llm` components are captured. 55 | 56 | ### Example 57 | ```python 58 | import uuid 59 | import os 60 | 61 | from haystack.components.generators.chat import OpenAIChatGenerator 62 | from haystack.components.builders import ChatPromptBuilder 63 | from haystack import Pipeline 64 | from haystack.dataclasses import ChatMessage 65 | 66 | from context_haystack.context import ContextAIAnalytics 67 | 68 | 69 | model = "gpt-3.5-turbo" 70 | os.environ["GETCONTEXT_TOKEN"] = "GETCONTEXT_TOKEN" 71 | os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY" 72 | 73 | prompt_builder = ChatPromptBuilder() 74 | llm = OpenAIChatGenerator(model=model) 75 | prompt_analytics = ContextAIAnalytics() 76 | assistant_analytics = ContextAIAnalytics() 77 | 78 | pipe = Pipeline() 79 | pipe.add_component("prompt_builder", prompt_builder) 80 | pipe.add_component("llm", llm) 81 | pipe.add_component("prompt_analytics", prompt_analytics) 82 | pipe.add_component("assistant_analytics", assistant_analytics) 83 | 84 | pipe.connect("prompt_builder.prompt", "llm.messages") 85 | pipe.connect("prompt_builder.prompt", "prompt_analytics") 86 | pipe.connect("llm.replies", "assistant_analytics") 87 | 88 | # thread_id is unique to each conversation 89 | context_parameters = {"thread_id": uuid.uuid4(), "metadata": {"model": model, "user_id": "1234"}} 90 | location = "Berlin" 91 | messages = [ChatMessage.from_system("Always respond in German even if some input data is in other languages."), 92 | ChatMessage.from_user("Tell me about {{location}}")] 93 | 94 | response = pipe.run( 95 | data={ 96 | "prompt_builder": {"template_variables":{"location": location}, "prompt_source": messages}, 97 | "prompt_analytics": context_parameters, 98 | "assistant_analytics": context_parameters, 99 | } 100 | ) 101 | 102 | print(response) 103 | ``` 104 | 105 | ## License 106 | `context-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. 107 | -------------------------------------------------------------------------------- /integrations/deepeval.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: DeepEval 4 | description: Use the DeepEval evaluation framework to calculate model-based metrics 5 | authors: 6 | - name: deepset 7 | socials: 8 | github: deepset-ai 9 | twitter: deepset_ai 10 | linkedin: https://www.linkedin.com/company/deepset-ai/ 11 | pypi: https://pypi.org/project/deepeval-haystack 12 | repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/deepeval 13 | type: Evaluation Framework 14 | report_issue: https://github.com/deepset-ai/haystack-core-integrations/issues 15 | logo: /logos/deepeval.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | 20 | ### Table of Contents 21 | 22 | - [Overview](#overview) 23 | - [Installation](#installation) 24 | - [Usage](#usage) 25 | - [DeepEvalEvaluator](#DeepEvalEvaluator) 26 | 27 | ## Overview 28 | 29 | [DeepEval](https://github.com/confident-ai/deepeval) (by [Confident AI](https://www.confident-ai.com/)) is an open source framework for model-based evaluation to evaluate your LLM applications by quantifying their performance on aspects such as faithfulness, answer relevancy, contextual recall etc. More information can be found on the [documentation page](https://docs.haystack.deepset.ai/docs/deepevalevaluator). 30 | 31 | ## Installation 32 | 33 | Install the DeepEval integration: 34 | ```bash 35 | pip install deepeval-haystack 36 | ``` 37 | 38 | ## Usage 39 | 40 | Once installed, you will have access to a [DeepEvalEvaluator](https://docs.haystack.deepset.ai/docs/deepevalevaluator) that supports a variety of model-based evaluation metrics: 41 | - Answer Relevancy 42 | - Faithfulness 43 | - Contextual Precision 44 | - Contextual Recall 45 | - Contextual Relevance 46 | 47 | In addition to evaluation scores, DeepEval's evaluators offer additional reasoning for each evaluation. 48 | 49 | ### DeepEvalEvaluator 50 | 51 | To use this integration for calculating model-based evaluation metrics, initialize a `DeepEvalEvaluator` with the metric name and metric input parameters: 52 | 53 | ```python 54 | from haystack import Pipeline 55 | from haystack_integrations.components.evaluators.deepeval import DeepEvalEvaluator, DeepEvalMetric 56 | 57 | QUESTIONS = [ 58 | "Which is the most popular global sport?", 59 | "Who created the Python language?", 60 | ] 61 | CONTEXTS = [ 62 | [ 63 | "The popularity of sports can be measured in various ways, including TV viewership, social media presence, number of participants, and economic impact.", 64 | "Football is undoubtedly the world's most popular sport with major events like the FIFA World Cup and sports personalities like Ronaldo and Messi, drawing a followership of more than 4 billion people.", 65 | ], 66 | [ 67 | "Python, created by Guido van Rossum in the late 1980s, is a high-level general-purpose programming language.", 68 | "Its design philosophy emphasizes code readability, and its language constructs aim to help programmers write clear, logical code for both small and large-scale software projects.", 69 | ], 70 | ] 71 | RESPONSES = [ 72 | "Football is the most popular sport with around 4 billion followers worldwide", 73 | "Python language was created by Guido van Rossum.", 74 | ] 75 | 76 | pipeline = Pipeline() 77 | evaluator = DeepEvalEvaluator( 78 | metric=DeepEvalMetric.FAITHFULNESS, 79 | metric_params={"model": "gpt-4"}, 80 | ) 81 | pipeline.add_component("evaluator", evaluator) 82 | 83 | # Each metric expects a specific set of parameters as input. Refer to the 84 | # DeepEvalMetric class' documentation for more details. 85 | results = pipeline.run({"evaluator": {"questions": QUESTIONS, "contexts": CONTEXTS, "responses": RESPONSES}}) 86 | 87 | for output in results["evaluator"]["results"]: 88 | print(output) 89 | ``` 90 | -------------------------------------------------------------------------------- /integrations/deepl.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: DeepL 4 | description: Use DeepL translation services with Haystack 5 | authors: 6 | - name: Dribia Data Research 7 | socials: 8 | github: dribia 9 | linkedin: https://www.linkedin.com/company/dribia 10 | pypi: https://pypi.org/project/deepl-haystack/ 11 | repo: https://github.com/dribia/deepl-haystack 12 | type: Custom Component 13 | report_issue: https://github.com/dribia/deepl-haystack/issues 14 | logo: /logos/deepl.svg 15 | version: Haystack 2.0 16 | toc: true 17 | --- 18 | ### **Table of Contents** 19 | - [Overview](#overview) 20 | - [Installation](#installation) 21 | - [Usage](#usage) 22 | - [Components](#components) 23 | - [Examples](#examples) 24 | - [Standalone](#standalone) 25 | - [Pipeline](#pipeline) 26 | - [License](#license) 27 | 28 | ## Overview 29 | 30 | [DeepL](https://www.deepl.com/) is a powerful translation services provider, offering high-quality translations 31 | in multiple languages. This integration allows you to use DeepL's translation services with Haystack. 32 | 33 | ## Installation 34 | 35 | ```console 36 | pip install deepl-haystack 37 | ``` 38 | 39 | ## Usage 40 | 41 | ### Components 42 | 43 | The DeepL Haystack integration introduces two components that can be used to 44 | obtain translations using the [DeepL API](https://www.deepl.com/en/pro-api). 45 | 46 | - The `DeepLTextTranslator` to translate plain text (Python strings). 47 | - The `DeepLDocumentTranslator` to translate Haystack `Document` objects. 48 | 49 | ### API Key 50 | 51 | To use the DeepL Haystack integration, you'll need to provide a DeepL API key. 52 | You can get one by signing up at the [DeepL API website](https://www.deepl.com/en/pro#developer). 53 | 54 | Once obtained, **make sure to export it as an environment variable named `DEEPL_API_KEY`** 55 | in you working environment before running the examples below. Both the `DeepLTextTranslator` 56 | and the `DeepLDocumentTranslator` component constructors will expect this variable to be set. 57 | 58 | An alternative way to provide the API key, although not recommended, would be to pass it through the 59 | `api_key` parameter of the components' constructor, using the Haystack 60 | [Secret](https://docs.haystack.deepset.ai/reference/utils-api#secret) utility. 61 | 62 | ## Examples 63 | 64 | ### Standalone 65 | 66 | The following example shows how to translate a simple text: 67 | 68 | ```python 69 | from deepl_haystack import DeepLTextTranslator 70 | 71 | translator = DeepLTextTranslator(source_lang="EN", target_lang="ES") 72 | 73 | translated_text = translator.run("Hello, world!") 74 | print(translated_text) 75 | # {'translation': '¡Hola, mundo!', 'meta': {'source_lang': 'EN', 'target_lang': 'ES'}} 76 | ``` 77 | 78 | Here, instead, we show how to translate a list of `Document` objects: 79 | 80 | ```python 81 | from haystack.dataclasses import Document 82 | 83 | from deepl_haystack import DeepLDocumentTranslator 84 | 85 | translator = DeepLDocumentTranslator(source_lang="EN", target_lang="ES") 86 | 87 | documents_to_translate = [ 88 | Document(content="Hello, world!"), 89 | Document(content="Goodbye, Joe!", meta={"name": "Joe"}), 90 | ] 91 | 92 | translated_documents = translator.run(documents_to_translate) 93 | print( 94 | "\n".join( 95 | [f"{doc.content}, {doc.meta}" for doc in translated_documents["documents"]] 96 | ) 97 | ) 98 | # ¡Hola, mundo!, {'source_lang': 'EN', 'target_lang': 'ES'} 99 | # ¡Adiós, Joe!, {'name': 'Joe', 'source_lang': 'EN', 'target_lang': 'ES'} 100 | ``` 101 | 102 | ### Pipeline 103 | 104 | To use the DeepL components in a Haystack pipeline, 105 | you can use them as any other Haystack component. 106 | 107 | ```python 108 | from haystack import Pipeline 109 | from haystack.components.converters import TextFileToDocument 110 | from haystack.components.writers import DocumentWriter 111 | from haystack.dataclasses.byte_stream import ByteStream 112 | from haystack.document_stores.in_memory import InMemoryDocumentStore 113 | 114 | from deepl_haystack import DeepLDocumentTranslator 115 | 116 | document_store = InMemoryDocumentStore() 117 | 118 | pipeline = Pipeline() 119 | pipeline.add_component(instance=TextFileToDocument(), name="converter") 120 | pipeline.add_component( 121 | instance=DeepLDocumentTranslator(target_lang="ES"), 122 | name="translator", 123 | ) 124 | pipeline.add_component( 125 | instance=DocumentWriter(document_store=document_store), name="document_store" 126 | ) 127 | pipeline.connect("converter", "translator") 128 | pipeline.connect("translator", "document_store") 129 | pipeline.run({"converter": {"sources": [ByteStream.from_string("Hello world!")]}}) 130 | print(document_store.filter_documents()) 131 | # [Document(id=..., content: '¡Hola, mundo!', meta: {'source_lang': 'EN', 'language': 'ES'})] 132 | ``` 133 | 134 | ### License 135 | 136 | `deepl-haystack` is distributed under the terms of the 137 | [MIT](https://opensource.org/license/mit) license. 138 | -------------------------------------------------------------------------------- /integrations/docling.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Docling 4 | description: Use Docling to locally parse and chunk PDF, DOCX, and other document types in Haystack 5 | authors: 6 | - name: DS4SD 7 | socials: 8 | github: DS4SD 9 | pypi: https://pypi.org/project/docling-haystack 10 | repo: https://github.com/DS4SD/docling-haystack 11 | type: Data Ingestion 12 | report_issue: https://github.com/DS4SD/docling/issues 13 | logo: /logos/docling.png 14 | version: Haystack 2.0 15 | toc: true 16 | --- 17 | ### **Table of Contents** 18 | - [Overview](#overview) 19 | - [Installation](#installation) 20 | - [Usage](#usage) 21 | - [License](#license) 22 | 23 | ## Overview 24 | 25 | [Docling](https://github.com/DS4SD/docling) locally parses PDF, DOCX, HTML, and other 26 | document formats into a rich standardized representation (incl. layout, tables etc.), 27 | which it can then export to Markdown, JSON, and others. 28 | 29 | Check out the [Docling docs](https://docling-project.github.io/docling/) for more details. 30 | 31 | This integration introduces Docling support, enabling Haystack users to: 32 | - use various document types in LLM applications with ease and speed, and 33 | - leverage Docling's rich format for advanced, document-native grounding. 34 | 35 | ## Installation 36 | 37 | ```bash 38 | pip install docling-haystack 39 | ``` 40 | 41 | ## Usage 42 | 43 | ### Components 44 | 45 | This integration introduces `DoclingConverter`, a component which reads document 46 | file paths (local or URL) and outputs Haystack `Document` objects. 47 | 48 | `DoclingConverter` supports two different export modes, see `export_type` initialization 49 | argument further below. 50 | 51 | ### Use Docling Converter 52 | 53 | #### Docling Converter Initialization 54 | 55 | `DoclingConverter` creation can be parametrized via the following `__init__()` 56 | arguments, most of which refer to the initialization and usage of the underlying Docling 57 | [`DocumentConverter`](https://docling-project.github.io/docling/usage/) and 58 | [chunker](https://docling-project.github.io/docling/concepts/chunking/) instances: 59 | 60 | - `converter`: The Docling `DocumentConverter` to use; if not set, a system default is 61 | used. 62 | - `convert_kwargs`: Any parameters to pass to Docling conversion; if not set, a system 63 | default is used. 64 | - `export_type`: The export mode to use: `ExportType.DOC_CHUNKS` (default) chunks each 65 | input document (see `chunker`) and captures each individual chunk as a separate 66 | Haystack `Document`, while `ExportType.MARKDOWN` captures each input document as a 67 | separate Haystack `Document` (in which case splitting is likely required downstream). 68 | - `md_export_kwargs`: Any parameters to pass to Markdown export (in case of 69 | `ExportType.MARKDOWN`). 70 | - `chunker`: The Docling chunker instance to use; if not set, a system default is used 71 | (in case of `ExportType.DOC_CHUNKS`). 72 | - `meta_extractor`: The extractor instance to use for populating the output document 73 | metadata; if not set, a system default is used. 74 | 75 | #### Standalone 76 | 77 | ```python 78 | from docling_haystack.converter import DoclingConverter 79 | 80 | converter = DoclingConverter() 81 | documents = converter.run(paths=["https://arxiv.org/pdf/2408.09869"])["documents"] 82 | 83 | print(repr(documents[2].content)) 84 | # -> Abstract\nThis technical report introduces Docling [...] 85 | ``` 86 | 87 | #### In a Pipeline 88 | 89 | Check out [this notebook](https://docling-project.github.io/docling/examples/rag_haystack/) 90 | illustrating usage in a complete example with indexing and RAG pipelines. 91 | 92 | ### License 93 | 94 | MIT License. 95 | -------------------------------------------------------------------------------- /integrations/duckduckgo-api-websearch.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: DuckDuckGo 4 | description: Uses DuckDuckGo API for web searches 5 | authors: 6 | - name: Giovanni Alzetta 7 | socials: 8 | github: https://github.com/GivAlz/ 9 | pypi: https://pypi.org/project/duckduckgo-api-haystack/ 10 | repo: https://github.com/GivAlz/duckduckgo-api-haystack 11 | type: Data Ingestion 12 | logo: /logos/duckduckgo.png 13 | version: Haystack 2.0 14 | toc: true 15 | --- 16 | 17 | Implements a component of the kind *WebSearch*, but through the freely available DuckDuckGo API. 18 | 19 | ### **Table of Contents** 20 | - [Overview](#Overview) 21 | - [Installation](#Installation) 22 | - [Usage](#Usage) 23 | - [License](#License) 24 | 25 | ## Overview 26 | 27 | `DuckduckgoApiWebSearch` performs web searches using the DuckDuckGo search engine. 28 | 29 | This repository provides a Python module similar to `SearchApiWebSearch` and `SerperDevWebSearch`, 30 | but utilizes the free DuckDuckGo API. 31 | 32 | When you pass a query to `DuckduckgoWebSearch`, it returns a list of URLs that are most relevant to your search. 33 | The results are based on page snippets (the brief text displayed beneath the page titles in search results) rather 34 | than the content of the entire page. 35 | 36 | While the functionality is comparable to the aforementioned APIs, there are two important considerations: 37 | - *Rate limitations*: The API may impose some restrictions on the number of queries allowed. 38 | - *Result quality*: The quality of search results may vary. 39 | 40 | ## Installation 41 | 42 | ```bash 43 | pip install duckduckgo-api-haystack 44 | ``` 45 | 46 | ## Usage 47 | 48 | The `DuckduckgoApiWebSearch` class allows you to perform web searches using the DuckDuckGo search engine. 49 | Here's how to use it: 50 | 51 | 1. Import and initialize the `DuckduckgoApiWebSearch` class: 52 | 53 | ```python 54 | from duckduckgo_api_haystack import DuckduckgoApiWebSearch 55 | 56 | websearch = DuckduckgoApiWebSearch(top_k=10) 57 | ``` 58 | 59 | 2. Perform a search: 60 | 61 | ```python 62 | results = websearch.run(query="What is frico?") 63 | 64 | # Access the search results 65 | documents = results["documents"] 66 | links = results["links"] 67 | ``` 68 | 69 | ### Configuration Options 70 | 71 | You can customize the search behavior by passing parameters to the `DuckduckgoApiWebSearch` constructor: 72 | 73 | ```python 74 | websearch = DuckduckgoApiWebSearch( 75 | top_k=10, # Maximum number of documents to return 76 | max_results=10, # Maximum number of documents to consider in the search 77 | region="wt-wt", # Region for search results (default: no region) 78 | safesearch="moderate", # SafeSearch setting ("on", "moderate", or "off") 79 | timelimit=None, # Time limit for results (e.g., "d" for day, "w" for week, "m" for month) 80 | backend="auto", # Search backend ("auto", "html", or "lite") 81 | allowed_domain="", # Restrict search to a specific domain 82 | timeout=10, # Timeout for each search request (in seconds) 83 | use_answers=False, # Include DuckDuckGo's answer box in results 84 | proxy=None # Web address of proxy server (if needed) 85 | ) 86 | ``` 87 | 88 | For more details on the configuration options, refer to the [duckduckgo_search documentation](https://github.com/deedy5/duckduckgo_search). 89 | 90 | ### License 91 | 92 | Apache 2.0 93 | -------------------------------------------------------------------------------- /integrations/elasticsearch-document-store.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Elasticsearch 4 | description: Use an Elasticsearch database with Haystack 5 | authors: 6 | - name: deepset 7 | socials: 8 | github: deepset-ai 9 | twitter: deepset_ai 10 | linkedin: https://www.linkedin.com/company/deepset-ai/ 11 | pypi: https://pypi.org/project/elasticsearch-haystack 12 | repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/opensearch 13 | type: Document Store 14 | report_issue: https://github.com/deepset-ai/haystack-core-integrations/issues 15 | logo: /logos/elastic.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | 20 | ### Table of Contents 21 | 22 | - [Overview](#overview) 23 | - [Installation](#installation) 24 | - [Usage](#usage) 25 | 26 | ## Overview 27 | 28 | The `ElasticsearchDocumentStore` is maintained in [haystack-core-integrations](https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch) repo. It allows you to use [Elasticsearch](https://www.elastic.co/guide/en/elasticsearch/reference/current/elasticsearch-intro.html) as data storage for your Haystack pipelines. 29 | 30 | For a details on available methods, visit the [API Reference](https://docs.haystack.deepset.ai/v1.25/reference/document-store-api#elasticsearchdocumentstore-1) 31 | 32 | ## Installation 33 | 34 | To run an Elasticsearch instance locally, first follow the [installation](https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html) and [start up](https://www.elastic.co/guide/en/elasticsearch/reference/current/starting-elasticsearch.html) guides. 35 | 36 | ```bash 37 | pip install elasticsearch-haystack 38 | ``` 39 | 40 | ## Usage 41 | 42 | Once installed, you can start using your Elasticsearch database with Haystack by initializing it: 43 | 44 | ```python 45 | from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore 46 | 47 | document_store = ElasticsearchDocumentStore(hosts = "http://localhost:9200") 48 | ``` 49 | 50 | ### Writing Documents to ElasticsearchDocumentStore 51 | 52 | To write documents to your `ElasticsearchDocumentStore`, create an indexing pipeline with a [DocumentWriter](https://docs.haystack.deepset.ai/docs/documentwriter), or use the `write_documents()` function. 53 | For this step, you can use the available [TextFileToDocument](https://docs.haystack.deepset.ai/docs/textfiletodocument) and [DocumentSplitter](https://docs.haystack.deepset.ai/docs/documentsplitter), as well as other [Integrations](/integrations) that might help you fetch data from other resources. 54 | 55 | ### Indexing Pipeline 56 | 57 | ```python 58 | from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore 59 | from haystack import Pipeline 60 | from haystack.components.embedders import SentenceTransformersDocumentEmbedder 61 | from haystack.components.converters import TextFileToDocument 62 | from haystack.components.preprocessors import DocumentSplitter 63 | from haystack.components.writers import DocumentWriter 64 | 65 | document_store = ElasticsearchDocumentStore(hosts = "http://localhost:9200") 66 | converter = TextFileToDocument() 67 | splitter = DocumentSplitter() 68 | doc_embedder = SentenceTransformersDocumentEmbedder(model="sentence-transformers/multi-qa-mpnet-base-dot-v1") 69 | writer = DocumentWriter(document_store) 70 | 71 | indexing_pipeline = Pipeline() 72 | indexing_pipeline.add_component("converter", converter) 73 | indexing_pipeline.add_component("splitter", splitter) 74 | indexing_pipeline.add_component("doc_embedder", doc_embedder) 75 | indexing_pipeline.add_component("writer", writer) 76 | 77 | indexing_pipeline.connect("converter", "splitter") 78 | indexing_pipeline.connect("splitter", "doc_embedder") 79 | indexing_pipeline.connect("doc_embedder", "writer") 80 | 81 | indexing_pipeline.run({ 82 | "converter":{"sources":["filename.txt"]} 83 | }) 84 | ``` 85 | 86 | ### Using Elasticsearch in a Query Pipeline 87 | 88 | Once you have documents in your `ElasticsearchDocumentStore`, it's ready to be used with with [ElasticsearchEmbeddingRetriever](https://docs.haystack.deepset.ai/docs/elasticsearchembeddingretriever) in the retrieval step of any Haystack pipeline such as a Retrieval Augmented Generation (RAG) pipelines. Learn more about [Retrievers](https://docs.haystack.deepset.ai/docs/retrievers) to make use of vector search within your LLM pipelines. 89 | 90 | ```python 91 | from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore 92 | from haystack import Pipeline 93 | from haystack.components.embedders import SentenceTransformersTextEmbedder 94 | from haystack_integrations.components.retrievers.elasticsearch import ElasticsearchEmbeddingRetriever 95 | 96 | model = "sentence-transformers/multi-qa-mpnet-base-dot-v1" 97 | 98 | document_store = ElasticsearchDocumentStore(hosts = "http://localhost:9200") 99 | 100 | 101 | retriever = ElasticsearchEmbeddingRetriever(document_store=document_store) 102 | text_embedder = SentenceTransformersTextEmbedder(model=model) 103 | 104 | query_pipeline = Pipeline() 105 | query_pipeline.add_component("text_embedder", text_embedder) 106 | query_pipeline.add_component("retriever", retriever) 107 | query_pipeline.connect("text_embedder.embedding", "retriever.query_embedding") 108 | 109 | result = query_pipeline.run({"text_embedder": {"text": "historical places in Instanbul"}}) 110 | 111 | print(result) 112 | ``` 113 | -------------------------------------------------------------------------------- /integrations/flow-judge.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Flow Judge 4 | description: Evaluate Haystack pipelines using Flow Judge 5 | authors: 6 | - name: Flow AI 7 | socials: 8 | github: flowaicom 9 | twitter: flowaicom 10 | linkedin: https://www.linkedin.com/company/flowaicom/ 11 | pypi: https://pypi.org/project/flow-judge/ 12 | repo: https://github.com/flowaicom/flow-judge 13 | type: Evaluation Framework 14 | report_issue: https://github.com/flowaicom/flow-judge/issues 15 | logo: /logos/flow-ai.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | ### **Table of Contents** 20 | - [Overview](#overview) 21 | - [Installation](#installation) 22 | - [Usage](#usage) 23 | - [License](#license) 24 | 25 | ## Overview 26 | This integration allows you to evaluate Haystack pipelines using Flow Judge. 27 | 28 | Flow Judge is an open-source, lightweight (3.8B) language model optimized for LLM system evaluations. Crafted for accuracy, speed, and customization. 29 | 30 | Read the technical report [here](https://www.flow-ai.com/blog/flow-judge). 31 | 32 | ## Installation 33 | 34 | For running Flow Judge with vLLM engine: 35 | ```bash 36 | pip install flow-judge[vllm] 37 | pip install 'flash_attn>=2.6.3' --no-build-isolation 38 | ``` 39 | For running Flow Judge with transformers: 40 | ```bash 41 | pip install flow-judge[hf] 42 | ``` 43 | If flash attention: 44 | ```bash 45 | pip install 'flash_attn>=2.6.3' --no-build-isolation 46 | ``` 47 | For running Flow Judge with Llamafile on macOS: 48 | ```bash 49 | pip install flow-judge[llamafile] 50 | pip install 'flash_attn>=2.6.3' --no-build-isolation 51 | ``` 52 | To learn more about the installation, visit the [Flow Judge Installation](https://pypi.org/project/flow-judge/) page. 53 | 54 | Finally install Haystack: 55 | ```bash 56 | pip install haystack-ai 57 | ``` 58 | 59 | ## Usage 60 | Flow Judge integration with Haystack is designed to facilitate the evaluation of Haystack pipelines using Flow Judge. This integration allows you to seamlessly integrate Flow Judge into your Haystack workflows, enabling you to evaluate and improve your LLM systems with precision and efficiency. 61 | 62 | Flow Judge offers a set-of built-in metrics and easy-to-create custom metrics. 63 | 64 | ### Available Built-in Metrics 65 | 66 | Built-in metrics come with 3 different scoring scales Binary, 3-point Likert and 5-point Likert: 67 | - Response Correctness 68 | - Response Faithfulness 69 | - Response Relevance 70 | 71 | To check the available metrics you can run: 72 | ```python 73 | from flow_judge.metrics import list_all_metrics 74 | list_all_metrics() 75 | ``` 76 | 77 | While these preset metrics provide a solid foundation for evaluation, the true power of Flow Judge lies in its ability to create custom metrics tailored to your specific requirements. This flexibility allows for a more nuanced and comprehensive assessment of your LLM systems. Please refer to our [tutorial](https://github.com/flowaicom/flow-judge/blob/main/examples/2_custom_evaluation_criteria.ipynb) for creating custom metrics for more details. 78 | 79 | ### Components 80 | This integration introduces `HaystackFlowJudge` component, which is used just like other evaluator components in Haystack. 81 | 82 | For details about the use and parameters of this component please refer to [HaystackFlowJudge class](https://github.com/flowaicom/flow-judge/blob/main/flow_judge/integrations/haystack.py) and Haystack's [LLMEvaluator component](https://docs.haystack.deepset.ai/reference/evaluators-api#module-llm_evaluator). 83 | 84 | ### Use Flow Judge with Haystack 85 | We have created a comprehensive guide on how to effectively use Flow Judge with Haystack. You can access it [here](https://github.com/flowaicom/flow-judge/blob/main/examples/5_evaluate_haystack_rag_pipeline.ipynb). This tutorial demonstrates how to evaluate a RAG pipeline built with Haystack using Flow Judge. 86 | 87 | ### Quick Example 88 | The code snippet below provides a simpler example of how to integrate Flow Judge with Haystack. However, we recommend following the full tutorial for a deeper understanding of the concepts and implementation. 89 | 90 | ```python 91 | from flow_judge.integrations.haystack import HaystackFlowJudge 92 | from flow_judge.metrics.presets import RESPONSE_FAITHFULNESS_5POINT 93 | from flow_judge import Hf 94 | 95 | from haystack import Pipeline 96 | 97 | # Create a model using Hugging Face Transformers with Flash Attention 98 | model = Hf() # We support also Vllm, Llamafile 99 | 100 | # Evaluation sample 101 | questions = ["What is the termination clause in the contract?"] 102 | contexts = ["This contract may be terminated by either party upon providing thirty (30) days written notice to the other party. In the event of a breach of contract, the non-breaching party may terminate the contract immediately."] 103 | answers = ["The contract can be terminated by either party with thirty days written notice."] 104 | 105 | # Define the HaystackFlowJudge evaluator, we will use the built-in metric for faithfulness 106 | # For parameters refer to Haystack's [LLMEvaluator](https://docs.haystack.deepset.ai/reference/evaluators-api#module-llm_evaluator) and HaystackFlowJudge class. 107 | ff_evaluator = HaystackFlowJudge( 108 | metric=RESPONSE_FAITHFULNESS_5POINT, 109 | model=model, 110 | progress_bar=True, 111 | raise_on_failure=True, 112 | save_results=True, 113 | fail_on_parse_error=False 114 | ) 115 | 116 | # Setup the pipeline 117 | eval_pipeline = Pipeline() 118 | 119 | # Add components to the pipeline 120 | eval_pipeline.add_component("ff_evaluator", ff_evaluator) 121 | 122 | # Run the eval pipeline 123 | results = eval_pipeline.run( 124 | { 125 | "ff_evaluator": { 126 | 'query': questions, 127 | 'context': contexts, 128 | 'response': answers, 129 | } 130 | } 131 | ) 132 | 133 | # Print eval results 134 | for result in results['ff_evaluator']['results']: 135 | score = result['score'] 136 | feedback = result['feedback'] 137 | print(f"Score: {score}") 138 | print(f"Feedback: {feedback}\n") 139 | 140 | ``` 141 | 142 | ### License 143 | The code is licensed under the [Apache 2.0 license.](https://github.com/flowaicom/flow-judge/blob/main/LICENSE) 144 | 145 | -------------------------------------------------------------------------------- /integrations/github.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: GitHub 4 | description: Interact with GitHub repositories, issues, and pull requests within Haystack 5 | authors: 6 | - name: deepset 7 | socials: 8 | github: deepset-ai 9 | twitter: deepset_ai 10 | linkedin: https://www.linkedin.com/company/deepset-ai/ 11 | pypi: https://pypi.org/project/github-haystack 12 | repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/github 13 | type: Tool Integration 14 | report_issue: https://github.com/deepset-ai/haystack-core-integrations/issues 15 | logo: /logos/github.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | 20 | ### **Table of Contents** 21 | 22 | - [Overview](#overview) 23 | - [Installation](#installation) 24 | - [Usage](#usage) 25 | 26 | ## Overview 27 | 28 | The GitHub integration for Haystack provides a set of components and tools to interact with GitHub repositories, issues, and pull requests. It enables you to view repository contents, manage issues, create pull requests, and more within your Haystack agents and pipelines. 29 | 30 | 31 | Some of the components and tools in this integration require GitHub authentication with a personal access token. 32 | For example, authentication is required to post a comment on GitHub, fork a repository, or open a pull request. You can create a (fine-graind personal access token)[https://github.com/settings/personal-access-tokens] or a [classic personal access token](https://github.com/settings/tokens) on GitHub and then expose it via an environment variable called `GITHUB_API_KEY`. 33 | 34 | 35 | ## Installation 36 | 37 | Install the GitHub integration with pip: 38 | 39 | ```bash 40 | pip install github-haystack 41 | ``` 42 | 43 | ## Usage 44 | 45 | This integration comes with several components and tools: 46 | 47 | ### Components 48 | - `GitHubIssueViewer`: View issues and their details 49 | - `GitHubIssueCommenter`: Add comments to issues 50 | - `GitHubRepoViewer`: View repository contents and metadata 51 | - `GitHubRepoForker`: Fork repositories 52 | - `GitHubFileEditor`: Edit files in repositories 53 | - `GitHubPRCreator`: Create pull requests 54 | 55 | ### Tools 56 | - `GitHubIssueViewerTool`: View issues 57 | - `GitHubIssueCommenterTool`: Comment on issues 58 | - `GitHubRepoViewerTool`: View repository contents 59 | - `GitHubFileEditorTool`: Edit repository files 60 | - `GitHubPRCreatorTool`: Create pull requests 61 | 62 | ### Example Usage 63 | 64 | ```python 65 | from typing import List 66 | 67 | from haystack import Pipeline 68 | from haystack.components.agents import Agent 69 | from haystack.components.builders import ChatPromptBuilder 70 | from haystack.dataclasses import ChatMessage, Document 71 | from haystack.tools.from_function import tool 72 | 73 | from haystack_integrations.components.connectors.github import GitHubIssueViewer 74 | from haystack_integrations.components.generators.anthropic import AnthropicChatGenerator 75 | from haystack_integrations.prompts.github import SYSTEM_PROMPT 76 | from haystack_integrations.tools.github import GitHubRepoViewerTool 77 | 78 | @tool 79 | def create_comment(comment: str) -> str: 80 | """ 81 | Use this to create a Github comment once you finished your exploration. 82 | """ 83 | # A mockup tool to showcase how Agent uses tools. You should use `GitHubIssueCommenterTool` instead of this one to write comments on GitHub. 84 | return comment 85 | 86 | repo_viewer_tool = GitHubRepoViewerTool() 87 | 88 | chat_generator = AnthropicChatGenerator(model="claude-3-5-sonnet-latest", generation_kwargs={"max_tokens": 8000}) 89 | 90 | agent = Agent( 91 | chat_generator=chat_generator, 92 | system_prompt=SYSTEM_PROMPT, 93 | tools=[repo_viewer_tool, create_comment], 94 | exit_conditions=["create_comment"], 95 | state_schema={"documents": {"type": List[Document]}}, 96 | ) 97 | 98 | issue_template = """ 99 | Issue from: {{ url }} 100 | {% for document in documents %} 101 | {% if loop.index == 1 %} 102 | **Title: {{ document.meta.title }}** 103 | {% endif %} 104 | 105 | {{document.content}} 106 | 107 | {% endfor %} 108 | """ 109 | 110 | issue_builder = ChatPromptBuilder(template=[ChatMessage.from_user(issue_template)], required_variables="*") 111 | 112 | issue_fetcher = GitHubIssueViewer() 113 | 114 | pipeline = Pipeline() 115 | 116 | pipeline.add_component("issue_fetcher", issue_fetcher) 117 | pipeline.add_component("issue_builder", issue_builder) 118 | pipeline.add_component("agent", agent) 119 | 120 | pipeline.connect("issue_fetcher.documents", "issue_builder.documents") 121 | pipeline.connect("issue_builder.prompt", "agent.messages") 122 | 123 | issue_url = "https://github.com///issues/1268" 124 | 125 | result = pipeline.run({"url": issue_url}) 126 | print(result["agent"]["last_message"].tool_call_result.result) 127 | ``` 128 | -------------------------------------------------------------------------------- /integrations/groq.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Groq 4 | description: Use open Language Models served by Groq 5 | authors: 6 | - name: deepset 7 | socials: 8 | github: deepset-ai 9 | twitter: Haystack_AI 10 | linkedin: https://www.linkedin.com/company/deepset-ai 11 | pypi: https://pypi.org/project/haystack-ai/ 12 | repo: https://github.com/deepset-ai/haystack 13 | type: Model Provider 14 | report_issue: https://github.com/deepset-ai/haystack/issues 15 | logo: /logos/groq.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | 20 | ### **Table of Contents** 21 | 22 | - [Overview](#overview) 23 | - [Usage](#usage) 24 | 25 | ## Overview 26 | 27 | Groq is an AI company that has developed Language Processing Unit (LPU), a high-performance engine designed for fast inference of Large Language Models. 28 | 29 | To start using Groq, sign up for an API key [here](https://console.groq.com/). 30 | This will give you access to Groq API, which offers rapid inference of open Language Models like Mixtral and Llama 3. 31 | 32 | ## Usage 33 | 34 | Groq API is OpenAI compatible, making it easy to use in Haystack via OpenAI Generators. 35 | 36 | 37 | ### Using `Generator` 38 | 39 | Here's an example of using Mixtral served via Groq to perform question answering on a web page. 40 | You need to set the environment variable `GROQ_API_KEY` and choose a [compatible model](https://console.groq.com/docs/models). 41 | 42 | ```python 43 | from haystack import Pipeline 44 | from haystack.utils import Secret 45 | from haystack.components.fetchers import LinkContentFetcher 46 | from haystack.components.converters import HTMLToDocument 47 | from haystack.components.builders import PromptBuilder 48 | from haystack.components.generators import OpenAIGenerator 49 | 50 | fetcher = LinkContentFetcher() 51 | converter = HTMLToDocument() 52 | prompt_template = """ 53 | According to the contents of this website: 54 | {% for document in documents %} 55 | {{document.content}} 56 | {% endfor %} 57 | Answer the given question: {{query}} 58 | Answer: 59 | """ 60 | prompt_builder = PromptBuilder(template=prompt_template) 61 | llm = OpenAIGenerator( 62 | api_key=Secret.from_env_var("GROQ_API_KEY"), 63 | api_base_url="https://api.groq.com/openai/v1", 64 | model="mixtral-8x7b-32768", 65 | generation_kwargs = {"max_tokens": 512} 66 | ) 67 | pipeline = Pipeline() 68 | pipeline.add_component("fetcher", fetcher) 69 | pipeline.add_component("converter", converter) 70 | pipeline.add_component("prompt", prompt_builder) 71 | pipeline.add_component("llm", llm) 72 | 73 | pipeline.connect("fetcher.streams", "converter.sources") 74 | pipeline.connect("converter.documents", "prompt.documents") 75 | pipeline.connect("prompt.prompt", "llm.prompt") 76 | 77 | result = pipeline.run({"fetcher": {"urls": ["https://wow.groq.com/why-groq/"]}, 78 | "prompt": {"query": "Why should I use Groq for serving LLMs?"}}) 79 | 80 | print(result["llm"]["replies"][0]) 81 | ``` 82 | 83 | ### Using `ChatGenerator` 84 | 85 | See an example of engaging in a multi-turn conversation with Llama 3. 86 | You need to set the environment variable `GROQ_API_KEY` and choose a [compatible model](https://console.groq.com/docs/models). 87 | 88 | ```python 89 | from haystack.components.generators.chat import OpenAIChatGenerator 90 | from haystack.dataclasses import ChatMessage 91 | from haystack.utils import Secret 92 | 93 | generator = OpenAIChatGenerator( 94 | api_key=Secret.from_env_var("GROQ_API_KEY"), 95 | api_base_url="https://api.groq.com/openai/v1", 96 | model="llama3-8b-8192", 97 | generation_kwargs = {"max_tokens": 512} 98 | ) 99 | 100 | 101 | messages = [] 102 | 103 | while True: 104 | msg = input("Enter your message or Q to exit\n🧑 ") 105 | if msg=="Q": 106 | break 107 | messages.append(ChatMessage.from_user(msg)) 108 | response = generator.run(messages=messages) 109 | assistant_resp = response['replies'][0] 110 | print("🤖 "+assistant_resp.text) 111 | messages.append(assistant_resp) 112 | ``` 113 | -------------------------------------------------------------------------------- /integrations/lancedb.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: LanceDB Haystack 4 | description: A DocumentStore backed by LanceDB 5 | authors: 6 | - name: Alan Meeson 7 | socials: 8 | github: alanmeeson 9 | pypi: https://pypi.org/project/lancedb-haystack/ 10 | repo: https://github.com/alanmeeson/lancedb-haystack 11 | type: Document Store 12 | report_issue: https://github.com/alanmeeson/lancedb-haystack/issues 13 | logo: /logos/lancedb.png 14 | version: Haystack 2.0 15 | toc: true 16 | --- 17 | ### **Table of Contents** 18 | - [Overview](#overview) 19 | - [Installation](#installation) 20 | - [Usage](#usage) 21 | - [License](#license) 22 | 23 | ## Overview 24 | LanceDB-Haystack is an embedded [LanceDB](https://lancedb.github.io/lancedb/) backed Document Store for [Haystack](https://github.com/deepset-ai/haystack/). 25 | 26 | ## Installation 27 | 28 | The current simplest way to get LanceDB-Haystack is to install from GitHub via pip: 29 | 30 | ```shell 31 | pip install lancedb-haystack 32 | ``` 33 | 34 | ## Usage 35 | 36 | ```python 37 | import pyarrow as pa 38 | from lancedb_haystack import LanceDBDocumentStore 39 | from lancedb_haystack import LanceDBEmbeddingRetriever, LanceDBFTSRetriever 40 | 41 | # Declare the metadata fields schema, this lets us filter using it. 42 | # See: https://arrow.apache.org/docs/python/api/datatypes.html 43 | metadata_schema = pa.struct([ 44 | ('title', pa.string()), 45 | ('publication_date', pa.timestamp('s')), 46 | ('page_number', pa.int32()), 47 | ('topics', pa.list_(pa.string())) 48 | ]) 49 | 50 | # Create the DocumentStore 51 | document_store = LanceDBDocumentStore( 52 | database='my_database', 53 | table_name="documents", 54 | metadata_schema=metadata_schema, 55 | embedding_dims=384 56 | ) 57 | 58 | # Create an embedding retriever 59 | embedding_retriever = LanceDBEmbeddingRetriever(document_store) 60 | 61 | # Create a Full Text Search retriever 62 | fts_retriever = LanceDBFTSRetriever(document_store) 63 | ``` 64 | 65 | See also [`examples/pipeline-usage.ipynb`](https://github.com/alanmeeson/lancedb-haystack/blob/main/examples/pipeline-usage.ipynb) for a full worked example, and the [API Reference](https://lancedb-haystack.readthedocs.io). 66 | 67 | ### License 68 | 69 | [Apache License 2.0](https://github.com/alanmeeson/lancedb-haystack/blob/main/LICENSE) 70 | -------------------------------------------------------------------------------- /integrations/lmformatenforcer.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: LM Format Enforcer 4 | description: Use the LM Format Enforcer to enforce JSON Schema / Regex output of your Local Models. 5 | authors: 6 | - name: noamgat 7 | socials: 8 | github: noamgat 9 | twitter: noamgat 10 | pypi: https://pypi.org/project/lm-format-enforcer/ 11 | repo: https://github.com/noamgat/lm-format-enforcer 12 | type: Model Provider 13 | report_issue: https://github.com/noamgat/lm-format-enforcer/issues 14 | logo: /logos/lmformatenforcer.png 15 | version: Haystack 2.0 16 | --- 17 | 18 | Use the [LM Format Enforcer](https://github.com/noamgat/lm-format-enforcer) to enforce JSON Schema / Regex output of your local models in your haystack pipelines. 19 | 20 | Language models are able to generate text, but when requiring a precise output format, they do not always perform as instructed. Various prompt engineering techniques have been introduced to improve the robustness of the generated text, but they are not always sufficient. [LM Format Enforcer](https://github.com/noamgat/lm-format-enforcer) solves the issues by filtering the tokens that the language model is allowed to generate at every timestep, thus ensuring that the output format is respected, while minimizing the limitations on the language model. 21 | 22 | ### What is the LM Format enforcer? 23 | ![Solution at a glance](https://raw.githubusercontent.com/noamgat/lm-format-enforcer/main/docs/Intro.webp) 24 | 25 | ## Installation 26 | Install the format enforcer via pip: `pip install lm-format-enforcer` 27 | 28 | ## Usage 29 | - `LMFormatEnforcerLocalGenerator`: A Haystack Generator component that activates the format enforcer. 30 | 31 | Important note: LM Format Enforcer requires a LOCAL generator - currently only Local HuggingFace transformers are supported, vLLM support is coming soon. 32 | 33 | ### Creating a CharacterLevelParser 34 | The `CharacterLevelParser` is the class that connects the output parsing to the format enforcing. Two main parsers are available : `JsonSchemaParser` for JSON Schemas, and `RegexParser` for regular expressions. 35 | 36 | We will start off by defining the format we want to decode, regardless of Haystack. 37 | 38 | ```python 39 | 40 | from pydantic import BaseModel 41 | from lmformatenforcer import JsonSchemaParser 42 | 43 | class AnswerFormat(BaseModel): 44 | first_name: str 45 | last_name: str 46 | year_of_birth: int 47 | num_seasons_in_nba: int 48 | 49 | parser = JsonSchemaParser(AnswerFormat.schema()) 50 | ``` 51 | 52 | ### Haystack Integration 53 | 54 | Open In Colab 55 | 56 | 57 | To activate the the enforcer with Haystack V2, a `LMFormatEnforcerLocalGenerator` has to be used. 58 | 59 | Here is a simple example: 60 | ```python 61 | from haystack.components.generators.hugging_face.hugging_face_local import HuggingFaceLocalGenerator 62 | from lmformatenforcer.integrations.haystackv2 import LMFormatEnforcerLocalGenerator 63 | 64 | 65 | question = 'Please give me information about Michael Jordan. You MUST answer using the following json schema: ' 66 | schema_json_str = AnswerFormat.schema_json() 67 | prompt = f'{question}{schema_json_str}' 68 | 69 | 70 | model = HuggingFaceLocalGenerator(model="meta-llama/Llama-2-7b-chat-hf") 71 | format_enforcer = LMFormatEnforcerLocalGenerator(model, character_level_parser) 72 | pipeline = Pipeline() 73 | pipeline.add_component(instance=format_enforcer, name='model') 74 | 75 | 76 | result = pipeline.run({ 77 | "model": {"prompt": prompt} 78 | }) 79 | print(result['model']['replies'][0]) 80 | 81 | ``` 82 | The model will be inferred with the format enforcer, and the output will look like this: 83 | 84 | ``` 85 | { 86 | "first_name": "Michael", 87 | "last_name": "Jordan", 88 | "year_of_birth": 1963, 89 | "num_seasons_in_nba": 15 90 | } 91 | ``` 92 | For a full example, see the [example notebook](https://github.com/noamgat/lm-format-enforcer/blob/main/samples/colab_haystackv2_integration.ipynb) 93 | 94 | -------------------------------------------------------------------------------- /integrations/marqo-document-store.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Marqo 4 | description: A Document Store for storing and retrieval from Marqo 5 | authors: 6 | - name: marqo-ai 7 | socials: 8 | github: marqo-ai 9 | pypi: https://pypi.org/project/marqo-haystack/ 10 | repo: https://github.com/marqo-ai/marqo-haystack 11 | type: Document Store 12 | report_issue: https://github.com/marqo-ai/marqo-haystack/issues 13 | logo: /logos/marqo.png 14 | version: Haystack 2.0 15 | toc: true 16 | --- 17 | 18 | ### **Table of Contents** 19 | - [Overview](#overview) 20 | - [Installation](#installation) 21 | - [Usage](#usage) 22 | 23 | ## Overview 24 | 25 | This integration allows you to use [Marqo DB](https://www.marqo.ai/) as the document store for your Haystack pipelines. This page provides simple instructions on how to start it up and how to initialize a `MarqoDocumentStore` that can be used in any Haystack pipeline. 26 | 27 | ## Installation 28 | 29 | ```console 30 | pip install marqo-haystack 31 | ``` 32 | ## Usage 33 | 34 | Once installed, you can start using your Marqo database with Haystack. The `MarqoDocumentStore` is compatible with the open-source Marqo Docker container and with the Marqo managed cloud offering. 35 | 36 | ### Getting Started Locally with the Marqo Docker Container 37 | 38 | #### For x86 machines 39 | ```bash 40 | docker pull marqoai/marqo:latest 41 | docker rm -f marqo 42 | docker run --name marqo -it --privileged -p 8882:8882 --add-host host.docker.internal:host-gateway marqoai/marqo:latest 43 | ``` 44 | #### For M1/M2 ARM machines 45 | ```bash 46 | docker rm -f marqo-os; docker run -p 9200:9200 -p 9600:9600 -e "discovery.type=single-node" marqoai/marqo-os:0.0.3-arm 47 | ``` 48 | 49 | Next, in a new terminal: 50 | ```bash 51 | docker rm -f marqo; docker run --name marqo --privileged \ 52 | -p 8882:8882 --add-host host.docker.internal:host-gateway \ 53 | -e "OPENSEARCH_URL=https://localhost:9200" \ 54 | marqoai/marqo:latest 55 | ``` 56 | 57 | ### Getting started with Marqo Cloud 58 | 59 | Log in or create an account at [https://cloud.marqo.ai](https://cloud.marqo.ai). Create a new index with the indexing mode set as "Text-optimised". 60 | 61 | ### Initializing a MarqoDocumetStore in Haystack 62 | 63 | ```python 64 | from marqo_haystack import MarqoDocumentStore 65 | 66 | document_store = MarqoDocumentStore() 67 | ``` 68 | 69 | If you are using the Docker container then this will use an index called `documents`, if it doesn't exist then it will be created. 70 | 71 | If you are using Marqo cloud then you can connect to an existing index like so: 72 | 73 | ```python 74 | from marqo_haystack import MarqoDocumentStore 75 | 76 | document_store = MarqoDocumentStore( 77 | url="https://api.marqo.ai", 78 | api_key="XXXXXXXXXXXXX", 79 | collection_name="my-cloud-index" 80 | ) 81 | ``` 82 | 83 | ### Writing Documents to MarqoDocumentStore 84 | To write documents to `MarqoDocumentStore`, create an indexing pipeline. 85 | 86 | ```python 87 | from haystack.components.converters import TextFileToDocument 88 | from haystack.components.writers import DocumentWriter 89 | 90 | indexing = Pipeline() 91 | indexing.add_component("converter", TextFileToDocument()) 92 | indexing.add_component("writer", DocumentWriter(document_store)) 93 | indexing.connect("converter", "writer") 94 | indexing.run({"converter": {"paths": file_paths}}) 95 | ``` 96 | 97 | ### Using the MarqoRetriever 98 | To retrieve documents from your Marqo document store, create a querying pipeline. 99 | 100 | To send a single query use the `MarqoSingleRetriever`: 101 | 102 | ```python 103 | from marqo_haystack.retriever import MarqoSingleRetriever 104 | 105 | querying = Pipeline() 106 | querying.add_component("retriever", MarqoSingleRetriever(document_store)) 107 | results = querying.run({"retriever": {"query": "Who is Marco Polo?", "top_k": 3}}) 108 | ``` 109 | 110 | To send a list of queries use the `MarqoRetriever`: 111 | 112 | ```python 113 | from marqo_haystack.retriever import MarqoRetriever 114 | 115 | querying = Pipeline() 116 | querying.add_component("retriever", MarqoRetriever(document_store)) 117 | results = querying.run({"retriever": {"queries": ["Who is Marco Polo?", "Can Hippos swim?"], "top_k": 3}}) 118 | ``` 119 | 120 | -------------------------------------------------------------------------------- /integrations/mastodon-fetcher.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Mastodon Fetcher 4 | description: A custom component to fetch a mastodon usernames latest posts 5 | authors: 6 | - name: Tuana Çelik 7 | socials: 8 | github: tuanacelik 9 | twitter: tuanacelik 10 | linkedin: https://www.linkedin.com/in/tuanacelik 11 | pypi: https://pypi.org/project/mastodon-fetcher-haystack/ 12 | repo: https://github.com/tuanacelik/mastodon-fetcher-haystack 13 | type: Data Ingestion 14 | report_issue: https://github.com/tuanacelik/mastodon-fetcher-haystack/issues 15 | logo: /logos/mastodon.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | The `MastodonFetcher` is a simple custom component that fetches the `last_k_posts` of a given Mastodon username. 20 | You can see a demo of this custom component in the [🦄 Should I Follow?](https://huggingface.co/spaces/deepset/should-i-follow) space on Hugging Face 🤗. 21 | 22 | ### **Table of Contents** 23 | 24 | - [Overview](#overview) 25 | - [Installation](#installation) 26 | - [Usage](#usage) 27 | 28 | ## Overview 29 | This component expects `username` to be a complete Mastodon username. For example "tuana@sigmoid.social". If the provided username is correct and public, `MastodonFetcher` will return a list of `Document` objects where the contents are the users latest posts. 30 | 31 | ## Installation 32 | ```bash 33 | pip install mastodon-fetcher-haystack 34 | ``` 35 | 36 | ## Usage 37 | You can use this component on its own, or in a pipeline. 38 | 39 | ### On its own: 40 | ```python 41 | from mastodon_fetcher_haystack.mastodon_fetcher import MastodonFetcher 42 | 43 | mastodon_fetcher = MastodonFetcher() 44 | mastodon_fetcher.run(username="tuana@sigmoid.social") 45 | ``` 46 | ### In a pipeline 47 | 48 | ```python 49 | from haystack import Pipeline 50 | from haystack.utils import Secret 51 | from mastodon_fetcher_haystack.mastodon_fetcher import MastodonFetcher 52 | from haystack.components.generators import OpenAIGenerator 53 | from haystack.components.builders import PromptBuilder 54 | 55 | mastodon_fetcher = MastodonFetcher() 56 | prompt_builder = PromptBuilder(template='YOUR_PROMPT_TEMPLATE') 57 | llm = OpenAIGenerator(api_key=Secret.from_token("YOUR_OPENAI_API_KEY")) 58 | 59 | pipe = Pipeline() 60 | pipe.add_component("fetcher", mastodon_fetcher) 61 | pipe.add_component("prompt_builder", prompt_builder) 62 | pipe.add_component("llm", llm) 63 | 64 | pipe.connect("fetcher.documents", "prompt_builder.documents") 65 | pipe.connect("prompt_builder.prompt", "llm.prompt") 66 | pipe.run(data={"fetcher": {"username": "tuana@sigmoid.social"}}) 67 | ``` 68 | 69 | ## Limitations 70 | 1. The way this component is set up is very particular with how it expects usernames. Make sure you provide the full username, e.g.: `username@instance` 71 | 2. By default, the Mastodon API allows requesting up to 40 posts. 72 | -------------------------------------------------------------------------------- /integrations/mongodb.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: MongoDB 4 | description: Use a MongoDB Atlas database with Haystack 5 | authors: 6 | - name: deepset 7 | socials: 8 | github: deepset-ai 9 | twitter: deepset_ai 10 | linkedin: https://www.linkedin.com/company/deepset-ai/ 11 | pypi: https://pypi.org/project/mongodb-atlas-haystack/ 12 | repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/mongodb_atlas 13 | type: Document Store 14 | report_issue: https://github.com/deepset-ai/haystack-core-integrations/issues 15 | logo: /logos/mongodb.png 16 | toc: true 17 | version: Haystack 2.0 18 | --- 19 | 20 | ### Table of Contents 21 | 22 | - [Overview](#overview) 23 | - [Installation](#installation) 24 | - [Usage](#usage) 25 | 26 | ## Overview 27 | 28 | [MongoDB](https://www.mongodb.com/) is a document database designed to ease application development and scaling. [MongoDB Atlas](https://www.mongodb.com/atlas) is a multi-cloud database service built by the people behind MongoDB. MongoDB Atlas simplifies deploying and managing your databases while offering the versatility you need to build resilient and performant global applications on the cloud providers of your choice. 29 | 30 | You can use MongoDB Atlas's **full-text** and **semantic search** features through [MongoDBAtlasFullTextRetriever](https://docs.haystack.deepset.ai/docs/mongodbatlasfulltextretriever) and [MongoDBAtlasEmbeddingRetriever](https://docs.haystack.deepset.ai/docs/mongodbatlasembeddingretriever). For a detailed overview of all settings for the `MongoDBAtlasDocumentStore`, visit the [Haystack Documentation](https://docs.haystack.deepset.ai/docs/mongodbatlasdocumentstore). 31 | 32 | ## Installation 33 | 34 | ```bash 35 | pip install mongodb-atlas-haystack 36 | ``` 37 | 38 | ## Usage 39 | 40 | To use the `MongoDBAtlasDocumentStore`, you must have a running MongoDB Atlas database. 41 | For details, see [Get Started with Atlas](https://www.mongodb.com/docs/atlas/getting-started/). 42 | 43 | Once your database is set, set the environment variable `MONGO_CONNECTION_STRING` with the connection string to your MongoDB Atlas database. 44 | The format should be similar to the following: 45 | `"mongodb+srv://{mongo_atlas_username}:{mongo_atlas_password}@{mongo_atlas_host}/?{mongo_atlas_params_string}"` 46 | 47 | And then you can initialize a [`MongoDBAtlasDocumentStore`](https://docs.haystack.deepset.ai/docs/mongodbatlasdocumentstore) for Haystack with the required configurations: 48 | 49 | ```python 50 | from haystack_integrations.document_stores.mongodb_atlas import MongoDBAtlasDocumentStore 51 | 52 | document_store = MongoDBAtlasDocumentStore( 53 | database_name="haystack_test", 54 | collection_name="test_collection", 55 | vector_search_index="test_vector_search_index", 56 | ) 57 | ``` 58 | 59 | ### Example pipelines 60 | 61 | Here is some example code of an end-to-end RAG app built on MongoDB Atlas: one indexing pipeline that embeds the documents, 62 | and a generative pipeline that can be used for question answering. 63 | 64 | ```python 65 | from haystack import Pipeline, Document 66 | from haystack.document_stores.types import DuplicatePolicy 67 | from haystack.components.writers import DocumentWriter 68 | from haystack.components.generators import OpenAIGenerator 69 | from haystack.components.builders.prompt_builder import PromptBuilder 70 | from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder 71 | from haystack_integrations.document_stores.mongodb_atlas import MongoDBAtlasDocumentStore 72 | from haystack_integrations.components.retrievers.mongodb_atlas import MongoDBAtlasEmbeddingRetriever 73 | 74 | # Create some example documents 75 | documents = [ 76 | Document(content="My name is Jean and I live in Paris."), 77 | Document(content="My name is Mark and I live in Berlin."), 78 | Document(content="My name is Giorgio and I live in Rome."), 79 | ] 80 | 81 | document_store = MongoDBAtlasDocumentStore( 82 | database_name="haystack_test", 83 | collection_name="test_collection", 84 | vector_search_index="test_vector_search_index", 85 | ) 86 | 87 | # Define some more components 88 | doc_writer = DocumentWriter(document_store=document_store, policy=DuplicatePolicy.SKIP) 89 | doc_embedder = SentenceTransformersDocumentEmbedder(model="intfloat/e5-base-v2") 90 | query_embedder = SentenceTransformersTextEmbedder(model="intfloat/e5-base-v2") 91 | 92 | # Pipeline that ingests document for retrieval 93 | indexing_pipe = Pipeline() 94 | indexing_pipe.add_component(instance=doc_embedder, name="doc_embedder") 95 | indexing_pipe.add_component(instance=doc_writer, name="doc_writer") 96 | 97 | indexing_pipe.connect("doc_embedder.documents", "doc_writer.documents") 98 | indexing_pipe.run({"doc_embedder": {"documents": documents}}) 99 | 100 | # Build a RAG pipeline with a Retriever to get documents relevant to 101 | # the query, a PromptBuilder to create a custom prompt and the OpenAIGenerator (LLM) 102 | prompt_template = """ 103 | Given these documents, answer the question.\nDocuments: 104 | {% for doc in documents %} 105 | {{ doc.content }} 106 | {% endfor %} 107 | 108 | \nQuestion: {{question}} 109 | \nAnswer: 110 | """ 111 | rag_pipeline = Pipeline() 112 | rag_pipeline.add_component(instance=query_embedder, name="query_embedder") 113 | rag_pipeline.add_component(instance=MongoDBAtlasEmbeddingRetriever(document_store=document_store), name="retriever") 114 | rag_pipeline.add_component(instance=PromptBuilder(template=prompt_template), name="prompt_builder") 115 | rag_pipeline.add_component(instance=OpenAIGenerator(), name="llm") 116 | rag_pipeline.connect("query_embedder", "retriever.query_embedding") 117 | rag_pipeline.connect("embedding_retriever", "prompt_builder.documents") 118 | rag_pipeline.connect("prompt_builder", "llm") 119 | 120 | # Ask a question on the data you just added. 121 | question = "Where does Mark live?" 122 | result = rag_pipeline.run( 123 | { 124 | "query_embedder": {"text": question}, 125 | "prompt_builder": {"question": question}, 126 | } 127 | ) 128 | print(result) 129 | ``` 130 | -------------------------------------------------------------------------------- /integrations/monsterapi.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: MonsterAPI 4 | description: Use open Language Models served by MonsterAPI 5 | authors: 6 | - name: monsterapi 7 | socials: 8 | github: qblocks 9 | twitter: monsterapi 10 | linkedin: https://www.linkedin.com/company/monster-api/ 11 | pypi: https://pypi.org/project/haystack-ai 12 | repo: https://github.com/deepset-ai/haystack 13 | type: Model Provider 14 | report_issue: https://github.com/deepset-ai/haystack/issues 15 | logo: /logos/monsterapi.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | 20 | ### **Table of Contents** 21 | 22 | - [Overview](#overview) 23 | - [Usage](#usage) 24 | 25 | ## Overview 26 | 27 | MonsterAPI provides access to powerful language models designed for various text generation tasks. With the MonsterAPI integration, you can leverage these models within the Haystack framework for enhanced natural language processing capabilities. 28 | 29 | To start using MonsterAPI, sign up for an API key [here](https://monsterapi.ai/). This key provides access to the MonsterAPI, which supports rapid inference and customization through various parameters. 30 | 31 | ## Usage 32 | 33 | MonsterAPI's API is OpenAI compatible, making it easy to use within Haystack via OpenAI Generators. 34 | 35 | ### Using `Generator` 36 | 37 | Here's an example of using a model served via MonsterAPI to perform question answering on a web page. You need to set the environment variable `MONSTER_API_KEY` and choose a [compatible model](https://developer.monsterapi.ai/). 38 | 39 | ```python 40 | from haystack import Pipeline 41 | from haystack.utils import Secret 42 | from haystack.components.fetchers import LinkContentFetcher 43 | from haystack.components.converters import HTMLToDocument 44 | from haystack.components.builders import PromptBuilder 45 | from haystack.components.generators import OpenAIGenerator 46 | 47 | fetcher = LinkContentFetcher() 48 | converter = HTMLToDocument() 49 | prompt_template = """ 50 | According to the contents of this website: 51 | {% for document in documents %} 52 | {{document.content}} 53 | {% endfor %} 54 | Answer the given question: {{query}} 55 | Answer: 56 | """ 57 | prompt_builder = PromptBuilder(template=prompt_template) 58 | llm = OpenAIGenerator( 59 | api_key=Secret.from_env_var("MONSTER_API_KEY"), 60 | api_base_url="https://llm.monsterapi.ai/v1/", 61 | model="microsoft/Phi-3-mini-4k-instruct", 62 | generation_kwargs = {"max_tokens": 256} 63 | ) 64 | pipeline = Pipeline() 65 | pipeline.add_component("fetcher", fetcher) 66 | pipeline.add_component("converter", converter) 67 | pipeline.add_component("prompt", prompt_builder) 68 | pipeline.add_component("llm", llm) 69 | 70 | pipeline.connect("fetcher.streams", "converter.sources") 71 | pipeline.connect("converter.documents", "prompt.documents") 72 | pipeline.connect("prompt.prompt", "llm.prompt") 73 | 74 | result = pipeline.run({"fetcher": {"urls": ["https://developer.monsterapi.ai/docs/"]}, 75 | "prompt": {"query": "What are the features of MonsterAPI?"}}) 76 | 77 | print(result["llm"]["replies"][0]) 78 | ``` 79 | 80 | ### Using `ChatGenerator` 81 | 82 | Here's an example of engaging in a multi-turn conversation with a MonsterAPI model. You need to set the environment variable `MONSTER_API_KEY` and choose a [compatible model](https://developer.monsterapi.ai/). 83 | 84 | ```python 85 | from haystack.components.generators.chat import OpenAIChatGenerator 86 | from haystack.dataclasses import ChatMessage 87 | from haystack.utils import Secret 88 | 89 | generator = OpenAIChatGenerator( 90 | api_key=Secret.from_env_var("MONSTER_API_KEY"), 91 | api_base_url="https://llm.monsterapi.ai/v1/", 92 | model="microsoft/Phi-3-mini-4k-instruct", 93 | generation_kwargs = {"max_tokens": 256} 94 | ) 95 | 96 | messages = [] 97 | 98 | while True: 99 | msg = input("Enter your message or Q to exit\n ") 100 | if msg=="Q": 101 | break 102 | messages.append(ChatMessage.from_user(msg)) 103 | response = generator.run(messages=messages) 104 | assistant_resp = response['replies'][0] 105 | print(assistant_resp.text) 106 | messages.append(assistant_resp) 107 | ``` 108 | 109 | -------------------------------------------------------------------------------- /integrations/needle.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Needle 4 | description: Use Needle document store and retriever in Haystack. 5 | authors: 6 | - name: Needle Team 7 | socials: 8 | twitter: needlexAI 9 | linkedin: https://www.linkedin.com/company/needlexai 10 | pypi: https://pypi.org/project/needle-haystack-ai 11 | repo: https://github.com/JANHMS/needle-haystack 12 | type: Document Store 13 | report_issue: https://github.com/JANHMS/needle-haystack/issues 14 | logo: /logos/needle.png 15 | version: Haystack 2.0 16 | --- 17 | 18 | # Needle RAG tools for Haystack 19 | 20 | [![PyPI - Version](https://img.shields.io/pypi/v/needle-haystack-ai.svg)](https://pypi.org/project/needle-haystack-ai) 21 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/needle-haystack-ai.svg)](https://pypi.org/project/needle-haystack-ai) 22 | 23 | This package provides `NeedleDocumentStore` and `NeedleEmbeddingRetriever` component for use in Haystack projects. 24 | 25 | ## Usage ⚡️ 26 | 27 | Get started by installing the package via `pip`. 28 | 29 | ```bash 30 | pip install needle-haystack-ai 31 | ``` 32 | 33 | ### API Keys 34 | 35 | We will show you building a common RAG pipeline using Needle tools and OpenAI generator. 36 | For using these tools you must set your environment variables, `NEEDLE_API_KEY` and `OPENAI_API_KEY` respectively. 37 | 38 | You can get your Needle API key from from [Developer settings](https://needle-ai.com/dashboard/settings). 39 | 40 | ### Example Pipeline 🧱 41 | 42 | In Needle document stores are called collections. For detailed information, see our [docs](https://docs.needle-ai.com). 43 | You can create a reference to your Needle collection using `NeedleDocumentStore` and use `NeedleEmbeddingRetriever` to retrieve documents from it. 44 | 45 | ```python 46 | from needle_haystack import NeedleDocumentStore, NeedleEmbeddingRetriever 47 | 48 | document_store = NeedleDocumentStore(collection_id="") 49 | retriever = NeedleEmbeddingRetriever(document_store=document_store) 50 | ``` 51 | 52 | Use the retriever in a Haystack pipeline. Example: 53 | 54 | ```python 55 | from haystack import Pipeline 56 | from haystack.components.generators import OpenAIGenerator 57 | from haystack.components.builders import PromptBuilder 58 | 59 | prompt_template = """ 60 | Given the following retrieved documents, generate a concise and informative answer to the query: 61 | 62 | Query: {{query}} 63 | Documents: 64 | {% for doc in documents %} 65 | {{ doc.content }} 66 | {% endfor %} 67 | 68 | Answer: 69 | """ 70 | 71 | prompt_builder = PromptBuilder(template=prompt_template) 72 | llm = OpenAIGenerator() 73 | 74 | # Add components to pipeline 75 | pipeline = Pipeline() 76 | pipeline.add_component("retriever", retriever) 77 | pipeline.add_component("prompt_builder", prompt_builder) 78 | pipeline.add_component("llm", llm) 79 | 80 | # Connect the components 81 | pipeline.connect("retriever", "prompt_builder.documents") 82 | pipeline.connect("prompt_builder", "llm") 83 | ``` 84 | 85 | Run your RAG pipeline: 86 | 87 | ```python 88 | prompt = "What is the topic of the news?" 89 | 90 | result = basic_rag_pipeline.run({ 91 | "retriever": {"text": prompt}, 92 | "prompt_builder": {"query": prompt} 93 | }) 94 | 95 | # Print final answer 96 | print(result['llm']['replies'][0]) 97 | ``` 98 | 99 | # Support 📞 100 | 101 | For detailed guides, take a look at our [docs](https://docs.needle-ai.com). If you have questions or requests you can contact us in our [Discord channel](https://discord.gg/JzJcHgTyZx). 102 | -------------------------------------------------------------------------------- /integrations/notion-extractor.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Notion Extractor 4 | description: A component to extract pages from Notion to Haystack Documents. Useful for indexing Pipelines. 5 | authors: 6 | - name: Bogdan Kostić 7 | socials: 8 | github: bogdankostic 9 | pypi: https://pypi.org/project/notion-haystack/ 10 | repo: https://github.com/bogdankostic/notion-haystack 11 | type: Data Ingestion 12 | report_issue: https://github.com/bogdankostic/notion-haystack/issues 13 | version: Haystack 2.0 14 | logo: /logos/notion.png 15 | --- 16 | This Haystack component allows you to easily export your Notion pages to Haystack Documents by providing a Notion API token. 17 | 18 | Given that the Notion API is subject to some [rate limits](https://developers.notion.com/reference/request-limits), 19 | this component will automatically retry failed requests and wait for the rate limit to reset before retrying. This is 20 | especially useful when exporting a large number of pages. Furthermore, this component uses `asyncio` to make requests in 21 | parallel, which can significantly speed up the export process. 22 | 23 | ## Installation 24 | 25 | ```bash 26 | pip install notion-haystack 27 | ``` 28 | 29 | ## Usage 30 | 31 | To use this component, you will need a Notion API token. You can follow the steps outlined in the [Notion documentation](https://developers.notion.com/docs/create-a-notion-integration#create-your-integration-in-notion) 32 | to create a new Notion integration, connect it to your pages, and obtain your API token. 33 | 34 | The following minimal example demonstrates how to export a list of pages to Haystack Documents: 35 | ```python 36 | from notion_haystack import NotionExporter 37 | 38 | exporter = NotionExporter(api_token="") 39 | exported_pages = exporter.run(file_paths=[""]) 40 | 41 | # exported_pages will be a list of Haystack Documents where each Document corresponds to a Notion page 42 | ``` 43 | 44 | The following example shows how to use the `NotionExporter` inside an indexing pipeline: 45 | ```python 46 | from haystack import Pipeline 47 | 48 | from notion_haystack import NotionExporter 49 | from haystack.components.preprocessors import DocumentSplitter 50 | from haystack.components.writers import DocumentWriter 51 | from haystack.document_stores import InMemoryDocumentStore 52 | 53 | document_store = InMemoryDocumentStore() 54 | exporter = NotionExporter(api_token="YOUR_API_KEY") 55 | splitter = DocumentSplitter() 56 | writer = DocumentWriter(document_store=document_store) 57 | 58 | indexing_pipeline = Pipeline() 59 | indexing_pipeline.add_component(instance=exporter, name="exporter") 60 | indexing_pipeline.add_component(instance=splitter, name="splitter") 61 | indexing_pipeline.add_component(instance=writer, name="writer") 62 | 63 | indexing_pipeline.connect("exporter.documents", "splitter.documents") 64 | indexing_pipeline.connect("splitter", "writer") 65 | 66 | indexing_pipeline.run(data={"exporter": {"page_ids": ["your_page_id"] }}) 67 | ``` 68 | 69 | The `NotionExporter` class takes the following arguments: 70 | - `api_token`: Your Notion API token. You can find information on how to get an API token in [Notion's documentation](https://developers.notion.com/docs/create-a-notion-integration) 71 | - `export_child_pages`: Whether to recursively export all child pages of the provided page ids. Defaults to `False`. 72 | - `extract_page_metadata`: Whether to extract metadata from the page and add it as a frontmatter to the markdown. 73 | Extracted metadata includes title, author, path, URL, last editor, and last editing time of 74 | the page. Defaults to `False`. 75 | - `exclude_title_containing`: If specified, pages with titles containing this string will be excluded. This might be 76 | useful for example to exclude pages that are archived. Defaults to `None`. 77 | 78 | The `NotionExporter.run` method takes the following arguments: 79 | - `page_ids`: A list of page ids to export. If `export_child_pages` is `True`, all child pages of these pages will be 80 | exported as well. 81 | -------------------------------------------------------------------------------- /integrations/opea.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: OPEA 4 | description: Use the OPEA framework for hardware abstraction and orchestration 5 | authors: 6 | - name: OPEA-Project 7 | socials: 8 | github: opea-project 9 | pypi: https://pypi.org/project/haystack-opea/ 10 | repo: https://github.com/opea-project/Haystack-OPEA 11 | type: Distributed Computing 12 | report_issue: https://github.com/opea-project/Haystack-OPEA/issues 13 | logo: /logos/opea.png 14 | version: Haystack 2.0 15 | toc: true 16 | --- 17 | 18 | ### Table of Contents 19 | 20 | - [Overview](#overview) 21 | - [Installation](#installation) 22 | - [Usage](#usage) 23 | - [Embeddings](#embeddings) 24 | - [LLM Generation](#llm-generation) 25 | 26 | ## Overview 27 | 28 | The `haystack-opea` integration connects Haystack to [OPEA](https://opea.dev/)—a collection of containerized microservices for LLMs, embedding, retrieval and reranking. By delegating heavy compute to OPEA services, you can build flexible Retrieval-Augmented Generation (RAG) pipelines that scale across cloud, on-prem and edge deployments. 29 | 30 | Key features: 31 | - Hardware-agnostic LLM & embedding services. 32 | - Easy orchestration of LLM, embedder, retriever, ranker, among others. 33 | - Support for local development via Docker Compose or production clusters. 34 | 35 | ## Installation 36 | 37 | Install from source: 38 | 39 | ```bash 40 | git clone https://github.com/opea-project/Haystack-OPEA.git 41 | cd Haystack-OPEA 42 | pip install poetry 43 | poetry install --with test 44 | ``` 45 | 46 | ## Usage 47 | 48 | Below are quickstart examples for embeddings and LLM generation. Make sure your OPEA backend is running: e.g. via the provided [Docker Compose](https://github.com/opea-project/Haystack-OPEA/blob/main/samples/compose.yaml) file. OPEA services can be configured to use a variety of model serving backends like TGI, vLLM, ollama, OVMS... and offer validated runtime settings for good performance on various hardware's including Intel Gaudi, see the [LLM](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/src/text-generation) section in the OPEA components library. 49 | 50 | ### Embeddings 51 | 52 | ```python 53 | from haystack import Document 54 | from haystack_opea import OPEATextEmbedder, OPEADocumentEmbedder 55 | 56 | # Text embedding example 57 | text_embedder = OPEATextEmbedder(api_url="http://localhost:6006") 58 | text_embedder.warm_up() 59 | result = text_embedder.run("I love pizza!") 60 | print("Text embedding:", result["vectors"][0]) 61 | 62 | # Document embedding example 63 | doc = Document(content="I love pizza!") 64 | doc_embedder = OPEADocumentEmbedder(api_url="http://localhost:6006") 65 | doc_embedder.warm_up() 66 | out = doc_embedder.run([doc]) 67 | print("Document embedding:", out["documents"][0].embedding) 68 | ``` 69 | 70 | ### LLM Generation 71 | 72 | ```python 73 | from haystack_opea import OPEAGenerator 74 | 75 | # Initialize the OPEA LLM service 76 | generator = OPEAGenerator( 77 | api_url="http://localhost:9009", 78 | model_arguments={ 79 | "temperature": 0.2, 80 | "top_p": 0.7, 81 | "max_tokens": 512, 82 | }, 83 | ) 84 | generator.warm_up() 85 | 86 | # Run a simple prompt 87 | response = generator.run(prompt="What is the capital of France?") 88 | print("LLM reply:", response["replies"][0]) 89 | ``` 90 | 91 | For more examples, see the `samples/` folder and the [official OPEA documentation](https://opea.dev/), as well as the [Components Library](https://github.com/opea-project/GenAIComps). 92 | -------------------------------------------------------------------------------- /integrations/openrouter.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: OpenRouter 4 | description: Use the OpenRouter API for text generation models. 5 | authors: 6 | - name: deepset 7 | socials: 8 | github: deepset-ai 9 | twitter: deepset_ai 10 | linkedin: https://www.linkedin.com/company/deepset-ai/ 11 | pypi: https://pypi.org/project/openrouter-haystack 12 | repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/openrouter 13 | type: Model Provider 14 | report_issue: https://github.com/deepset-ai/haystack-core-integrations/issues 15 | logo: /logos/openrouter.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | 20 | ### **Table of Contents** 21 | - [Overview](#overview) 22 | - [Installation](#installation) 23 | - [Usage](#usage) 24 | - [License](#license) 25 | 26 | ## Overview 27 | 28 | `OpenRouterChatGenerator` lets you call any LLMs available on [OpenRouter](https://openrouter.ai), including: 29 | 30 | - OpenAI variants such as `openai/gpt-4o` 31 | - Anthropic’s `claude-3.5-sonnet` 32 | - Community-hosted open-source models (Llama 2, Mixtral, etc.) 33 | 34 | For more information on models available via the OpenRouter API, see [the OpenRouter docs](https://openrouter.ai/models). 35 | 36 | In addition to basic chat completion, the component exposes OpenRouter-specific features: 37 | 38 | * **Provider / model routing** – choose fallback models or provider ordering with the `generation_kwargs` parameter. 39 | * **Extra HTTP headers** – add attribution or tracing headers via `extra_headers`. 40 | 41 | 42 | In order to follow along with this guide, you'll need a OpenRouter API key. Add it as an environment variable, `OPENROUTER_API_KEY`. 43 | 44 | ## Installation 45 | 46 | ```bash 47 | pip install openrouter-haystack 48 | ``` 49 | 50 | ## Usage 51 | You can use [OpenRouterChatGenerator](https://docs.haystack.deepset.ai/docs/openrouterchatgenerator) as standalone, within a [pipeline](https://docs.haystack.deepset.ai/docs/pipelines) or with the [Agent component](https://docs.haystack.deepset.ai/docs/agent). 52 | 53 | Here's an example of using it as a standalone component: 54 | 55 | ```python 56 | import os 57 | from haystack.dataclasses import ChatMessage 58 | from haystack_integrations.components.generators.openrouter import OpenRouterChatGenerator 59 | 60 | os.environ["OPENROUTER_API_KEY"] = "YOUR_OPENROUTER_API_KEY" 61 | 62 | client = OpenRouterChatGenerator() # defaults to openai/gpt-4o-mini 63 | response = client.run( 64 | [ChatMessage.from_user("What are Agentic Pipelines? Be brief.")] 65 | ) 66 | print(response["replies"]) 67 | 68 | ``` 69 | ```bash 70 | {'replies': [ChatMessage(_role=, _content=[TextContent(text='The capital of Vietnam is Hanoi.')], _name=None, _meta={'model': 'openai/gpt-4o-mini', 'index': 0, 'finish_reason': 'stop', 'usage': {'completion_tokens': 8, 'prompt_tokens': 13, 'total_tokens': 21, 'completion_tokens_details': CompletionTokensDetails(accepted_prediction_tokens=None, audio_tokens=None, reasoning_tokens=0, rejected_prediction_tokens=None), 'prompt_tokens_details': PromptTokensDetails(audio_tokens=None, cached_tokens=0)}})]} 71 | ``` 72 | `OpenRouterChatGenerator` also support streaming responses if you pass a streaming callback: 73 | 74 | ```python 75 | import os 76 | from haystack.dataclasses import ChatMessage 77 | from haystack_integrations.components.generators.openrouter import OpenRouterChatGenerator 78 | 79 | os.environ["OPENROUTER_API_KEY"] = "YOUR_OPENROUTER_API_KEY" 80 | 81 | def show(chunk): # simple streaming callback 82 | print(chunk.content, end="", flush=True) 83 | 84 | client = OpenRouterChatGenerator( 85 | model="openrouter/auto", # let OpenRouter pick a model 86 | streaming_callback=show, 87 | generation_kwargs={ 88 | "provider": {"sort": "throughput"}, # pick the fastest provider 89 | } 90 | ) 91 | 92 | response = client.run([ChatMessage.from_user("Summarize RAG in two lines.")]) 93 | 94 | print (response) 95 | 96 | ``` 97 | 98 | ### License 99 | 100 | `openrouter-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. 101 | -------------------------------------------------------------------------------- /integrations/opensearch-document-store.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: OpenSearch 4 | description: A Document Store for storing and retrieval from OpenSearch 5 | authors: 6 | - name: Thomas Stadelmann 7 | socials: 8 | github: tstadel 9 | - name: Julian Risch 10 | socials: 11 | github: julian-risch 12 | - name: deepset 13 | socials: 14 | github: deepset-ai 15 | twitter: deepset_ai 16 | linkedin: https://www.linkedin.com/company/deepset-ai/ 17 | pypi: https://pypi.org/project/opensearch-haystack 18 | repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/opensearch 19 | type: Document Store 20 | report_issue: https://github.com/deepset-ai/haystack-core-integrations/issues 21 | logo: /logos/opensearch.png 22 | version: Haystack 2.0 23 | toc: true 24 | --- 25 | 26 | ### Table of Contents 27 | 28 | - [Overview](#overview) 29 | - [Installation](#installation) 30 | - [Usage](#usage) 31 | 32 | ## Overview 33 | 34 | [![PyPI - Version](https://img.shields.io/pypi/v/opensearch-haystack.svg)](https://pypi.org/project/opensearch-haystack) 35 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/opensearch-haystack.svg)](https://pypi.org/project/opensearch-haystack) 36 | [![test](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/opensearch.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/opensearch.yml) 37 | 38 | ----- 39 | 40 | ## Installation 41 | Use `pip` to install OpenSearch: 42 | 43 | ```console 44 | pip install opensearch-haystack 45 | ``` 46 | ## Usage 47 | Once installed, initialize your OpenSearch database to use it with Haystack: 48 | 49 | ```python 50 | from haystack_integrations.document_stores.opensearch import OpenSearchDocumentStore 51 | 52 | document_store = OpenSearchDocumentStore() 53 | ``` 54 | 55 | ### Writing Documents to OpenSearchDocumentStore 56 | To write documents to `OpenSearchDocumentStore`, create an indexing pipeline. 57 | 58 | ```python 59 | from haystack.components.file_converters import TextFileToDocument 60 | from haystack.components.writers import DocumentWriter 61 | 62 | indexing = Pipeline() 63 | indexing.add_component("converter", TextFileToDocument()) 64 | indexing.add_component("writer", DocumentWriter(document_store)) 65 | indexing.connect("converter", "writer") 66 | indexing.run({"converter": {"paths": file_paths}}) 67 | ``` 68 | 69 | ### Hybrid Retriever 70 | 71 | This integration also provides a hybrid retriever. The `OpenSearchHybridRetriever` combines the capabilities of a vector search and a keyword search. It uses the OpenSearch document store to retrieve documents based on both semantic and keyword-based queries. 72 | 73 | You can use the `OpenSearchHybridRetriever` together with the `OpenSearchDocumentStore` to perform hybrid retrieval. 74 | 75 | See the example below on how to index documents and use the hybrid retriever: 76 | 77 | ```python 78 | from haystack import Document 79 | from haystack.components.embedders import SentenceTransformersTextEmbedder, SentenceTransformersDocumentEmbedder 80 | from haystack_integrations.components.retrievers.opensearch import OpenSearchHybridRetriever 81 | from haystack_integrations.document_stores.opensearch import OpenSearchDocumentStore 82 | 83 | # Initialize the document store 84 | doc_store = OpenSearchDocumentStore( 85 | hosts=["http://localhost:9200"], 86 | index="document_store", 87 | embedding_dim=384, 88 | ) 89 | 90 | # Create some sample documents 91 | docs = [ 92 | Document(content="Machine learning is a subset of artificial intelligence."), 93 | Document(content="Deep learning is a subset of machine learning."), 94 | Document(content="Natural language processing is a field of AI."), 95 | Document(content="Reinforcement learning is a type of machine learning."), 96 | Document(content="Supervised learning is a type of machine learning."), 97 | ] 98 | 99 | # Embed the documents and add them to the document store 100 | doc_embedder = SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2") 101 | doc_embedder.warm_up() 102 | docs = doc_embedder.run(docs) 103 | 104 | # Write the documents to the OpenSearch document store 105 | doc_store.write_documents(docs['documents']) 106 | 107 | # Initialize some haystack text embedder, in this case the SentenceTransformersTextEmbedder 108 | embedder = SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2") 109 | 110 | # Initialize the hybrid retriever 111 | retriever = OpenSearchHybridRetriever( 112 | document_store=doc_store, 113 | embedder=embedder, 114 | top_k_bm25=3, 115 | top_k_embedding=3, 116 | join_mode="reciprocal_rank_fusion" 117 | ) 118 | 119 | # Run the retriever 120 | results = retriever.run(query="What is reinforcement learning?", filters_bm25=None, filters_embedding=None) 121 | 122 | >> results['documents'] 123 | {'documents': [Document(id=..., content: 'Reinforcement learning is a type of machine learning.', score: 1.0), 124 | Document(id=..., content: 'Supervised learning is a type of machine learning.', score: 0.9760624679979518), 125 | Document(id=..., content: 'Deep learning is a subset of machine learning.', score: 0.4919354838709677), 126 | Document(id=..., content: 'Machine learning is a subset of artificial intelligence.', score: 0.4841269841269841)]} 127 | ``` 128 | 129 | You can learn more about the `OpenSearchHybridRetriever` in the [documentation](https://docs.haystack.deepset.ai/docs/opensearchhybridretriever). 130 | 131 | ### License 132 | 133 | `opensearch-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. 134 | -------------------------------------------------------------------------------- /integrations/opik.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Opik 4 | description: Trace and evaluate your Haystack pipelines with Opik 5 | authors: 6 | - name: Comet ML 7 | socials: 8 | github: comet-ml 9 | twitter: Cometml 10 | linkedin: https://www.linkedin.com/company/comet-ml/ 11 | pypi: https://pypi.org/project/opik/ 12 | repo: https://github.com/comet-ml/opik 13 | type: Monitoring Tool 14 | report_issue: https://github.com/comet-ml/opik/issues 15 | logo: /logos/opik.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | ### **Table of Contents** 20 | - [Overview](#overview) 21 | - [Installation](#installation) 22 | - [Usage](#usage) 23 | - [License](#license) 24 | 25 | ## Overview 26 | 27 | [Opik](https://www.comet.com/site/products/opik/) is an open source tool that helps you to trace, evaluate and monitor your LLM applications. With the Opik platform, you can: 28 | 29 | - Debug your pipelines 30 | - Automatically evaluate your pipelines with built-in metrics like hallucinations or context relevance 31 | - Track the latency and cost of your pipeline runs 32 | - Monitor your pipelines in production 33 | 34 | You can learn more about the Haystack and Opik integration in Opik's [Haystack integration guide](https://www.comet.com/docs/opik/tracing/integrations/haystack). 35 | 36 | ## Installation 37 | 38 | To use the Opik integration with Haystack, install the `opik` package: 39 | 40 | ```bash 41 | pip install opik haystack-ai 42 | ``` 43 | 44 | ## Usage 45 | 46 | To use Opik, you will need to: 47 | 48 | 1. Enable content tracing in Haystack by setting the environment variable `HAYSTACK_CONTENT_TRACING_ENABLED` to `True` 49 | 2. Add the `OpikConnector` to your pipeline 50 | 51 | An example pipeline that uses Opik is shown below: 52 | 53 | ```python 54 | # Enable content tracing 55 | import os 56 | os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true" 57 | 58 | from haystack import Pipeline 59 | from haystack.components.builders import ChatPromptBuilder 60 | from haystack.components.generators.chat import OpenAIChatGenerator 61 | from haystack.dataclasses import ChatMessage 62 | 63 | from opik.integrations.haystack import OpikConnector 64 | 65 | 66 | pipe = Pipeline() 67 | 68 | # Add the OpikConnector component to the pipeline 69 | pipe.add_component( 70 | "tracer", OpikConnector("Chat example") 71 | ) 72 | 73 | # Continue building the pipeline 74 | pipe.add_component("prompt_builder", ChatPromptBuilder()) 75 | pipe.add_component("llm", OpenAIChatGenerator(model="gpt-3.5-turbo")) 76 | 77 | pipe.connect("prompt_builder.prompt", "llm.messages") 78 | ``` 79 | 80 | The `OpikConnector` component will automatically trace the pipeline and log it in Opik. It will also augment the response to include a `tracer` key that will contain the Opik `traceId`: 81 | 82 | ```python 83 | messages = [ 84 | ChatMessage.from_system( 85 | "Always respond in German even if some input data is in other languages." 86 | ), 87 | ChatMessage.from_user("Tell me about {{location}}"), 88 | ] 89 | 90 | response = pipe.run( 91 | data={ 92 | "prompt_builder": { 93 | "template_variables": {"location": "Berlin"}, 94 | "template": messages, 95 | } 96 | } 97 | ) 98 | 99 | print(response) 100 | ``` 101 | 102 | ![Opik Gif](/images/opik-demo.gif) 103 | 104 | ## License 105 | 106 | Opik is fully open source and is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. 107 | -------------------------------------------------------------------------------- /integrations/optimum.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Optimum 4 | description: High-performance inference using Hugging Face Optimum 5 | authors: 6 | - name: deepset 7 | socials: 8 | github: deepset-ai 9 | twitter: deepset_ai 10 | linkedin: https://www.linkedin.com/company/deepset-ai/ 11 | pypi: https://pypi.org/project/optimum-haystack 12 | repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/optimum 13 | type: Model Provider 14 | report_issue: https://github.com/deepset-ai/haystack/issues 15 | logo: /logos/huggingface.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | 20 | ### **Table of Contents** 21 | 22 | - [Overview](#overview) 23 | - [Installation](#installation) 24 | - [Usage](#usage) 25 | - [Components](#components) 26 | - [License](#license) 27 | 28 | ## Overview 29 | 30 | [Hugging Face Optimum](https://huggingface.co/docs/optimum/index) is an extension of the 31 | [Transformers](https://huggingface.co/docs/transformers/index) library that provides a set 32 | of performance optimization tools to train and run models on targeted hardware with maximum 33 | efficiency. Using Optimum, you can leverage the [ONNX Runtime](https://onnxruntime.ai/) 34 | to automatically export models from the [Hugging Face Model Hub](https://huggingface.co/docs/hub/en/models-the-hub) and deploy them in pipelines to achieve significant improvements in performance. 35 | 36 | ## Installation 37 | 38 | ```bash 39 | pip install optimum-haystack 40 | ``` 41 | 42 | ## Usage 43 | 44 | ### Components 45 | 46 | This integration introduces two components: [OptimumTextEmbedder](https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_text_embedder.py) and [OptimumDocumentEmbedder](https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/optimum/src/haystack_integrations/components/embedders/optimum/optimum_document_embedder.py). 47 | 48 | To create semantic embeddings for documents, use `OptimumDocumentEmbedder` in your indexing pipeline. For generating embeddings for queries, use `OptimumTextEmbedder`. 49 | 50 | Below is the example indexing pipeline with `InMemoryDocumentStore`, `OptimumDocumentEmbedder` and `DocumentWriter`: 51 | 52 | ```python 53 | from haystack import Document, Pipeline 54 | from haystack.document_stores.in_memory import InMemoryDocumentStore 55 | from haystack.components.writers import DocumentWriter 56 | from haystack_integrations.components.embedders.optimum import ( 57 | OptimumDocumentEmbedder, 58 | OptimumEmbedderPooling, 59 | ) 60 | 61 | 62 | document_store = InMemoryDocumentStore(embedding_similarity_function="cosine") 63 | 64 | documents = [Document(content="I enjoy programming in Python"), 65 | Document(content="My city does not get snow in winter"), 66 | Document(content="Japanese diet is well known for being good for your health"), 67 | Document(content="Thomas is injured and can't play sports")] 68 | 69 | indexing_pipeline = Pipeline() 70 | indexing_pipeline.add_component("embedder", OptimumDocumentEmbedder( 71 | model="intfloat/e5-base-v2", 72 | normalize_embeddings=True, 73 | pooling_mode=OptimumEmbedderPooling.MEAN, 74 | )) 75 | indexing_pipeline.add_component("writer", DocumentWriter(document_store=document_store)) 76 | indexing_pipeline.connect("embedder", "writer") 77 | 78 | indexing_pipeline.run({"embedder": {"documents": documents}}) 79 | ``` 80 | 81 | ## License 82 | 83 | `optimum-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. 84 | -------------------------------------------------------------------------------- /integrations/pgvector-documentstore.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: pgvector 4 | description: A Document Store for storing and retrieval from pgvector 5 | authors: 6 | - name: deepset 7 | socials: 8 | github: deepset-ai 9 | twitter: Haystack_AI 10 | linkedin: https://www.linkedin.com/company/deepset-ai/ 11 | pypi: https://pypi.org/project/pgvector-haystack/ 12 | repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/pgvector 13 | type: Document Store 14 | report_issue: https://github.com/deepset-ai/haystack-core-integrations/issues 15 | version: Haystack 2.0 16 | toc: true 17 | --- 18 | 19 | [![PyPI - Version](https://img.shields.io/pypi/v/pgvector-haystack.svg)](https://pypi.org/project/pgvector-haystack/) 20 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pgvector-haystack.svg)](https://pypi.org/project/pgvector-haystack/) 21 | [![test](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/pgvector.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/pgvector.yml) 22 | 23 | ----- 24 | 25 | **Table of Contents** 26 | 27 | - Pgvector Document Store for Haystack 28 | - [Installation](#installation) 29 | - [Usage](#usage) 30 | - [Examples](#examples) 31 | - [License](#license) 32 | 33 | ## Installation 34 | `pgvector` is an extension for PostgreSQL that adds support for vector similarity search. 35 | 36 | To quickly set up a PostgreSQL database with pgvector, you can use Docker: 37 | ```bash 38 | docker run -d -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRES_DB=postgres ankane/pgvector 39 | ``` 40 | 41 | For more information on how to install pgvector, visit the [pgvector GitHub repository](https://github.com/pgvector/pgvector). 42 | 43 | Use `pip` to install `pgvector-haystack`: 44 | ```bash 45 | pip install pgvector-haystack 46 | ``` 47 | ## Usage 48 | 49 | Define the connection string to your PostgreSQL database in the `PG_CONN_STR` environment variable. For example: 50 | ```bash 51 | export PG_CONN_STR="postgresql://postgres:postgres@localhost:5432/postgres" 52 | ```` 53 | 54 | Once installed, initialize PgvectorDocumentStore: 55 | 56 | ```python 57 | from haystack_integrations.document_stores.pgvector import PgvectorDocumentStore 58 | 59 | document_store = PgvectorDocumentStore( 60 | table_name="haystack_docs", 61 | embedding_dimension=768, 62 | vector_function="cosine_similarity", 63 | recreate_table=True, 64 | search_strategy="hnsw", 65 | ) 66 | ``` 67 | 68 | ### Writing Documents to PgvectorDocumentStore 69 | To write documents to `PgvectorDocumentStore`, create an indexing pipeline. 70 | 71 | ```python 72 | from haystack import Pipeline 73 | from haystack.components.converters import TextFileToDocument 74 | from haystack.components.writers import DocumentWriter 75 | from haystack.components.embedders import SentenceTransformersDocumentEmbedder 76 | 77 | indexing = Pipeline() 78 | indexing.add_component("converter", TextFileToDocument()) 79 | indexing.add_component("embedder", SentenceTransformersDocumentEmbedder()) 80 | indexing.add_component("writer", DocumentWriter(document_store)) 81 | indexing.connect("converter", "embedder") 82 | indexing.connect("embedder", "writer") 83 | indexing.run({"converter": {"sources": file_paths}}) 84 | ``` 85 | 86 | ### Retrieval from PgvectorDocumentStore 87 | You can retrieve semantically similar documents to a given query using a simple pipeline that includes the [`PgvectorEmbeddingRetriever`](https://docs.haystack.deepset.ai/docs/pgvectorembeddingretriever). 88 | 89 | ```python 90 | from haystack.components.embedders import SentenceTransformersTextEmbedder 91 | from haystack_integrations.components.retrievers.pgvector import PgvectorEmbeddingRetriever 92 | from haystack import Pipeline 93 | 94 | querying = Pipeline() 95 | querying.add_component("embedder", SentenceTransformersTextEmbedder()) 96 | querying.add_component("retriever", PgvectorEmbeddingRetriever(document_store=document_store, top_k=3)) 97 | querying.connect("embedder", "retriever") 98 | 99 | results = querying.run({"embedder": {"text": "my query"}}) 100 | ``` 101 | 102 | You can also retrieve Documents based on keyword matching with the `PgvectorKeywordRetriever`. 103 | 104 | ```python 105 | from haystack_integrations.components.retrievers.pgvector import PgvectorKeywordRetriever 106 | 107 | retriever = PgvectorKeywordRetriever(document_store=document_store, top_k=3)) 108 | results = retriever.run(query="my query") 109 | ``` 110 | 111 | ## Examples 112 | You can find a code example showing how to use the Document Store and the Retriever under the `examples/` folder of [this repo](https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/pgvector). 113 | 114 | ## License 115 | 116 | `pgvector-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. 117 | -------------------------------------------------------------------------------- /integrations/qdrant-document-store.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Qdrant 4 | description: Use the Qdrant vector database with Haystack 5 | authors: 6 | - name: deepset 7 | socials: 8 | github: deepset-ai 9 | twitter: deepset_ai 10 | linkedin: https://www.linkedin.com/company/deepset-ai/ 11 | - name: Qdrant 12 | socials: 13 | github: qdrant 14 | twitter: qdrant_engine 15 | pypi: https://pypi.org/project/qdrant-haystack/ 16 | repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/qdrant 17 | type: Document Store 18 | report_issue: https://github.com/deepset-ai/haystack-core-integrations/issues 19 | logo: /logos/qdrant.png 20 | version: Haystack 2.0 21 | toc: true 22 | --- 23 | 24 | An integration of [Qdrant](https://qdrant.tech) vector database with [Haystack](https://haystack.deepset.ai/) 25 | by [deepset](https://www.deepset.ai). 26 | 27 | The library finally allows using Qdrant as a document store, and provides an in-place replacement 28 | for any other vector embeddings store. Thus, you should expect any kind of application to be working 29 | smoothly just by changing the provider to `QdrantDocumentStore`. 30 | 31 | ## Installation 32 | 33 | `qdrant-haystack` might be installed as any other Python library, using pip or poetry: 34 | 35 | ```bash 36 | pip install qdrant-haystack 37 | ``` 38 | 39 | ```bash 40 | poetry add qdrant-haystack 41 | ``` 42 | 43 | ## Usage 44 | 45 | Once installed, you can already start using `QdrantDocumentStore` as any other store that supports 46 | embeddings. 47 | 48 | ```python 49 | from haystack_integrations.document_stores.qdrant import QdrantDocumentStore 50 | 51 | document_store = QdrantDocumentStore( 52 | url="localhost", 53 | index="Document", 54 | embedding_dim=512, 55 | recreate_index=False, 56 | hnsw_config={"m": 16, "ef_construct": 64} # Optional 57 | ) 58 | ``` 59 | 60 | The list of parameters accepted by `QdrantDocumentStore` is complementary to those used in the 61 | official [Python Qdrant client](https://github.com/qdrant/qdrant_client). 62 | 63 | ### Using local in-memory / disk-persisted mode 64 | 65 | Qdrant Python client, from version 1.1.1, supports local in-memory/disk-persisted mode. That's 66 | a good choice for any test scenarios and quick experiments in which you do not plan to store 67 | lots of vectors. In such a case spinning a Docker container might be even not required. 68 | 69 | The local mode was also implemented in `qdrant-haystack` integration. 70 | 71 | #### In-memory storage 72 | 73 | In case you want to have a transient storage, for example in case of automated tests launched 74 | during your CI/CD pipeline, using Qdrant Local mode with in-memory storage might be a preferred 75 | option. It might be simply enabled by passing `:memory:` as first parameter, while creating an 76 | instance of `QdrantDocumentStore`. 77 | 78 | ```python 79 | from haystack_integrations.document_stores.qdrant import QdrantDocumentStore 80 | 81 | document_store = QdrantDocumentStore( 82 | ":memory:", 83 | index="Document", 84 | embedding_dim=512, 85 | recreate_index=False, 86 | hnsw_config={"m": 16, "ef_construct": 64} # Optional 87 | ) 88 | ``` 89 | 90 | #### On disk storage 91 | 92 | However, if you prefer to keep the vectors between different runs of your application, it 93 | might be better to use on disk storage and pass the path that should be used to persist 94 | the data. 95 | 96 | ```python 97 | from haystack_integrations.document_stores.qdrant import QdrantDocumentStore 98 | 99 | document_store = QdrantDocumentStore( 100 | path="/home/qdrant/storage_local", 101 | index="Document", 102 | embedding_dim=512, 103 | recreate_index=False, 104 | hnsw_config={"m": 16, "ef_construct": 64} # Optional 105 | ) 106 | ``` 107 | 108 | ### Connecting to Qdrant Cloud cluster 109 | 110 | If you prefer not to manage your own Qdrant instance, [Qdrant Cloud](https://cloud.qdrant.io/) 111 | might be a better option. 112 | 113 | ```python 114 | from haystack_integrations.document_stores.qdrant import QdrantDocumentStore 115 | from haystack.utils import Secret 116 | 117 | document_store = QdrantDocumentStore( 118 | url="https://YOUR-CLUSTER-URL.aws.cloud.qdrant.io", 119 | index="Document", 120 | api_key=Secret.from_env_var("QDRANT_API_KEY"), 121 | embedding_dim=512, 122 | recreate_index=False, 123 | ) 124 | ``` 125 | 126 | There is no difference in terms of functionality between local instances and cloud clusters. 127 | -------------------------------------------------------------------------------- /integrations/sambanova.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Sambanova 4 | description: Use open language models served by Sambanova 5 | authors: 6 | - name: Sambanova Team 7 | socials: 8 | twitter: SambaNovaAI 9 | linkedin: https://www.linkedin.com/company/sambanova 10 | pypi: https://pypi.org/project/haystack-ai/ 11 | repo: https://github.com/deepset-ai/haystack 12 | type: Model Provider 13 | report_issue: https://github.com/deepset-ai/haystack/issues 14 | logo: /logos/sambanova.png 15 | version: Haystack 2.0 16 | toc: true 17 | --- 18 | 19 | ### **Table of Contents** 20 | 21 | - [Overview](#overview) 22 | - [Usage](#usage) 23 | 24 | ## Overview 25 | 26 | **SambaNova** is an AI company that develops SN40L Reconfigurable Dataflow Unit (RDU), a processor that provides native dataflow processing and high-performance for fast inference of Large Language Models. 27 | 28 | To start using SambaNova, sign up for an API key [here](https://cloud.sambanova.ai/). 29 | This will give you access to SambaNova Cloud API, which offers rapid inference of open Language Models like Llama 3 and Qwen. 30 | 31 | ## Usage 32 | 33 | SambaNova Cloud API is OpenAI compatible, making it easy to use in Haystack via OpenAI Generators. 34 | 35 | 36 | ### Using `Generator` 37 | 38 | Here's an example of using Llama served via SambaNova to perform question answering using RAG with `PromptBuilder`. 39 | You need to set the environment variable `SAMBANOVA_API_KEY` and choose a [compatible model](https://cloud.sambanova.ai/). 40 | 41 | ```python 42 | from haystack import Document, Pipeline 43 | from haystack.components.builders.prompt_builder import PromptBuilder 44 | from haystack.components.retrievers.in_memory import InMemoryBM25Retriever 45 | from haystack.document_stores.in_memory import InMemoryDocumentStore 46 | 47 | from haystack.components.generators import OpenAIGenerator 48 | import os 49 | 50 | os.environ["SAMBANOVA_API_KEY"] = "YOUR_SAMBANOVA_API_KEY" 51 | 52 | document_store = InMemoryDocumentStore() 53 | document_store.write_documents( 54 | [ 55 | Document(content="The Function-Calling API enables dynamic, agentic workflows by allowing the model to suggest and select function calls based on user input." 56 | "This feature facilitates flexible agentic workflows that adapt to varied needs."), 57 | Document(content="Interact with multimodal models directly through the Inference API (OpenAI compatible) and Playground" 58 | "for seamless text and image processing."), 59 | Document( 60 | content="New Python and Gradio code samples make it easier to build and deploy applications on SambaNova Cloud. These examples simplify" 61 | "integrating AI models, enabling faster prototyping and reducing setup time." 62 | ), 63 | ] 64 | ) 65 | 66 | template = """ 67 | Given only the following information, answer the question. 68 | Ignore your own knowledge. 69 | 70 | Context: 71 | {% for document in documents %} 72 | {{ document.content }} 73 | {% endfor %} 74 | 75 | Question: {{ query }}? 76 | """ 77 | 78 | llm = OpenAIGenerator( 79 | api_key=Secret.from_env_var("SAMBANOVA_API_KEY"), 80 | api_base_url="https://api.sambanova.ai/v1", 81 | model="Meta-Llama-3.3-70B-Instruct", 82 | generation_kwargs = {"max_tokens": 512} 83 | ) 84 | 85 | pipe = Pipeline() 86 | 87 | pipe.add_component("retriever", InMemoryBM25Retriever(document_store=document_store)) 88 | pipe.add_component("prompt_builder", PromptBuilder(template=template)) 89 | pipe.add_component("llm", llm) 90 | pipe.connect("retriever", "prompt_builder.documents") 91 | pipe.connect("prompt_builder", "llm") 92 | 93 | query = "Functionalities of Sambanova API?" 94 | 95 | response = pipe.run({"prompt_builder": {"query": query}, "retriever": {"query": query}}) 96 | 97 | print(response["llm"]["replies"]) 98 | ``` 99 | 100 | ### Using `ChatGenerator` 101 | 102 | See an example of engaging in a multi-turn conversation with Llama 3.3. 103 | You need to set the environment variable `SAMBANOVA_API_KEY` and choose a [compatible model](https://cloud.sambanova.ai/). 104 | 105 | ```python 106 | from haystack.components.generators.chat import OpenAIChatGenerator 107 | from haystack.dataclasses import ChatMessage 108 | from haystack.utils import Secret 109 | import os 110 | 111 | os.environ["SAMBANOVA_API_KEY"] = "YOUR_SAMBANOVA_API_KEY" 112 | 113 | generator = OpenAIChatGenerator( 114 | api_key=Secret.from_env_var("SAMBANOVA_API_KEY"), 115 | api_base_url="https://api.sambanova.ai/v1", 116 | model="Meta-Llama-3.3-70B-Instruct", 117 | generation_kwargs = {"max_tokens": 512} 118 | ) 119 | 120 | 121 | messages = [] 122 | 123 | while True: 124 | msg = input("Enter your message or Q to exit\n🧑 ") 125 | if msg=="Q": 126 | break 127 | messages.append(ChatMessage.from_user(msg)) 128 | response = generator.run(messages=messages) 129 | assistant_resp = response['replies'][0] 130 | print("🤖 "+assistant_resp.content) 131 | messages.append(assistant_resp) 132 | ``` 133 | -------------------------------------------------------------------------------- /integrations/snowflake.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Snowflake 4 | description: A Snowflake integration that allows table retrieval from a Snowflake database. 5 | authors: 6 | - name: Mohamed Sriha 7 | socials: 8 | github: medsriha 9 | - name: deepset 10 | socials: 11 | github: deepset-ai 12 | twitter: deepset_ai 13 | linkedin: https://www.linkedin.com/company/deepset-ai/ 14 | pypi: https://pypi.org/project/snowflake-haystack 15 | repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/snowflake 16 | report_issue: https://github.com/deepset-ai/haystack-core-integrations/issues 17 | type: Data Ingestion 18 | logo: /logos/snowflake.png 19 | version: Haystack 2.0 20 | --- 21 | 22 | [![PyPI - Version](https://img.shields.io/pypi/v/snowflake-haystack.svg)](https://pypi.org/project/snowflake-haystack) 23 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/snowflake-haystack.svg)](https://pypi.org/project/snowflake-haystack) 24 | ----- 25 | 26 | **Table of Contents** 27 | 28 | - [Snowflake table retriever for Haystack](#snowfkale-table-retriever-for-haystack) 29 | - [Installation](#installation) 30 | - [Usage](#usage) 31 | - [Examples](#examples) 32 | - [License](#license) 33 | 34 | ## Installation 35 | Use `pip` to install Snowflake: 36 | 37 | ```console 38 | pip install snowflake-haystack 39 | ``` 40 | ## Usage 41 | Once installed, initialize the `SnowflakeTableRetriever` to use it with Haystack: 42 | 43 | ```python 44 | from haystack_integrations.components.retrievers.snowflake import SnowflakeTableRetriever 45 | 46 | # Provide your Snowflake credentials during intialization. 47 | executor = SnowflakeTableRetriever( 48 | user="", 49 | account="", 50 | api_key=Secret.from_env_var("SNOWFLAKE_API_KEY"), 51 | warehouse="", 52 | ) 53 | ``` 54 | 55 | Ensure you have `select` access to the tables before querying the database. More details [here](https://docs.snowflake.com/en/user-guide/security-access-control-privileges): 56 | ```python 57 | response = executor.run(query="""select * from database_name.schema_name.table_name""") 58 | ``` 59 | During component initialization, you could provide the schema and database name to avoid needing to provide them in the SQL query: 60 | ```python 61 | executor = SnowflakeTableRetriever( 62 | ... 63 | schema_name="", 64 | database ="" 65 | ) 66 | 67 | response = executor.run(query="""select * from table_name""") 68 | ``` 69 | Snowflake table retriever returns a Pandas dataframe and a Markdown version of the table: 70 | ```python 71 | 72 | print(response["dataframe"].head(2)) # Pandas dataframe 73 | # Column 1 Column 2 74 | # 0 Value1 Value2 75 | # 1 Value1 Value2 76 | 77 | print(response["table"]) # Markdown 78 | # | Column 1 | Column 2 | 79 | # |:----------|:----------| 80 | # | Value1 | Value2 | 81 | # | Value1 | Value2 | 82 | ``` 83 | 84 | Using `SnowflakeTableRetriever` within a pipeline: 85 | 86 | ```python 87 | from haystack import Pipeline 88 | from haystack.utils import Secret 89 | from haystack.components.builders import PromptBuilder 90 | from haystack.components.generators import OpenAIGenerator 91 | from haystack_integrations.components.retrievers.snowflake import SnowflakeTableRetriever 92 | 93 | executor = SnowflakeTableRetriever( 94 | user="", 95 | account="", 96 | api_key=Secret.from_env_var("SNOWFLAKE_API_KEY"), 97 | warehouse="", 98 | ) 99 | 100 | pipeline = Pipeline() 101 | pipeline.add_component("builder", PromptBuilder(template="Describe this table: {{ table }}")) 102 | pipeline.add_component("snowflake", executor) 103 | pipeline.add_component("llm", OpenAIGenerator(model="gpt-4o")) 104 | 105 | pipeline.connect("snowflake.table", "builder.table") 106 | pipeline.connect("builder", "llm") 107 | 108 | pipeline.run(data={"query": "select employee, salary from table limit 10;"}) 109 | ``` 110 | 111 | ## Examples 112 | You can find a code example showing how to use the Snowflake Retriever under the `example/` folder of [this repo](https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/snowflake). 113 | 114 | ## License 115 | 116 | `snowflake-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. -------------------------------------------------------------------------------- /integrations/titanml-takeoff.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Titan Takeoff Inference Server 4 | description: Use Titan Takeoff to run local open-source LLMs with Haystack. Titan Takeoff allows you to run the latest models from Meta, Mistral and Alphabet directly in your laptop. 5 | authors: 6 | - name: Fergus Finn 7 | socials: 8 | github: fergusbarratt 9 | twitter: BarrattFergus 10 | linkedin: https://www.linkedin.com/in/fergusfinn/ 11 | - name: Rod Rivera 12 | socials: 13 | github: rorcde 14 | twitter: rorcde 15 | linkedin: https://www.linkedin.com/in/aiengineer/ 16 | pypi: https://pypi.org/project/takeoff-haystack/ 17 | repo: https://github.com/titanml/takeoff-haystack 18 | type: Model Provider 19 | report_issue: https://github.com/titanml/takeoff-haystack/issues 20 | logo: /logos/titanml.png 21 | version: Haystack 2.0 22 | toc: true 23 | --- 24 | ### **Table of Contents** 25 | - [Overview](#overview) 26 | - [Installation](#installation) 27 | - [Usage](#usage) 28 | - [Example](#example) 29 | 30 | ## Overview 31 | You can use the Takeoff inference server to deploy local models efficiently in your Haystack pipelines. Takeoff is a state-of-the art inference server focused on deploying openly available language models at scale. It can run LLMs on local machines with consumer GPUs, and on cloud infrastructure. 32 | 33 | The TakeoffGenerator component in Haystack is a wrapper around the Takeoff server API, and can be used to serve takeoff-deployed models efficiently in Haystack pipelines. 34 | 35 | ## Installation 36 | 37 | ```bash 38 | pip install takeoff_haystack 39 | ``` 40 | 41 | ## Usage 42 | You can interact with takeoff deployed models using the `TakeoffGenerator` component in Haystack. To do so, you must have a takeoff model deployed. For information on how to do so, please read the takeoff docs [here](https://docs.titanml.co/docs/Docs/launching/). 43 | 44 | The following example deploys a Llama-2-7B-Chat-AWQ model using takeoff locally on port 3000. You can get a free license [here](https://www.titanml.co/contact). 45 | 46 | ```bash 47 | docker run --gpus all -e TAKEOFF_MODEL_NAME=TheBloke/Llama-2-7B-Chat-AWQ \ 48 | -e TAKEOFF_DEVICE=cuda \ 49 | -e TAKEOFF_MAX_SEQUENCE_LENGTH=256 \ 50 | -it \ 51 | -p 3000:3000 tytn/takeoff-pro:0.11.0-gpu 52 | ``` 53 | 54 | ## Example 55 | ### Daily News Digest Generation 56 | 57 | Below is an example of using takeoff models in a Haystack RAG pipeline. It summarizes headlines from popular news sites in technology such as Techcrunch, TheVerge, Engadget and others. 58 | 59 | ```bash 60 | from typing import Dict, List 61 | from haystack import Document, Pipeline 62 | from haystack.components.builders.prompt_builder import PromptBuilder 63 | from haystack.components.retrievers.in_memory import InMemoryBM25Retriever 64 | from haystack.document_stores.in_memory import InMemoryDocumentStore 65 | import feedparser 66 | from takeoff_haystack import TakeoffGenerator 67 | 68 | # Dict of website RSS feeds 69 | urls = { 70 | 'theverge': 'https://www.theverge.com/rss/frontpage/', 71 | 'techcrunch': 'https://techcrunch.com/feed', 72 | 'mashable': 'https://mashable.com/feeds/rss/all', 73 | 'cnet': 'https://cnet.com/rss/news', 74 | 'engadget': 'https://engadget.com/rss.xml', 75 | 'zdnet': 'https://zdnet.com/news/rss.xml', 76 | 'venturebeat': 'https://feeds.feedburner.com/venturebeat/SZYF', 77 | 'readwrite': 'https://readwrite.com/feed/', 78 | 'wired': 'https://wired.com/feed/rss', 79 | 'gizmodo': 'https://gizmodo.com/rss', 80 | } 81 | 82 | # Configurable parameters 83 | NUM_WEBSITES = 3 84 | NUM_TITLES = 1 85 | 86 | def get_titles(urls: Dict[str, str], num_sites: int, num_titles: int) -> List[str]: 87 | titles: List[str] = [] 88 | sites = list(urls.keys())[:num_sites] 89 | 90 | for site in sites: 91 | feed = feedparser.parse(urls[site]) 92 | entries = feed.entries[:num_titles] 93 | 94 | for entry in entries: 95 | titles.append(entry.title) 96 | 97 | return titles 98 | 99 | titles = get_titles(urls, NUM_WEBSITES, NUM_TITLES) 100 | 101 | document_store = InMemoryDocumentStore() 102 | document_store.write_documents([Document(content=title) for title in titles]) 103 | 104 | template = """ 105 | HEADLINES: 106 | {% for document in documents %} 107 | {{ document.content }} 108 | {% endfor %} 109 | REQUEST: {{ query }} 110 | """ 111 | 112 | pipe = Pipeline() 113 | pipe.add_component("retriever", InMemoryBM25Retriever(document_store=document_store)) 114 | pipe.add_component("prompt_builder", PromptBuilder(template=template)) 115 | pipe.add_component("llm", TakeoffGenerator(base_url="http://localhost", port="3000")) 116 | pipe.connect("retriever", "prompt_builder.documents") 117 | pipe.connect("prompt_builder", "llm") 118 | 119 | query = f"Summarize each of the {NUM_WEBSITES * NUM_TITLES} provided headlines in three words." 120 | response = pipe.run({"prompt_builder": {"query": query}, "retriever": {"query": query}}) 121 | print(response["llm"]["replies"]) 122 | ``` 123 | 124 | You should see a response like the following 125 | ``` 126 | ['\n\n\nANSWER:\n\n1. Poker Roguelike - Exciting gameplay\n2. AI-powered news reader - Personalized feed\n3. Best laptops MWC 2024 - Powerful devices'] 127 | ``` 128 | -------------------------------------------------------------------------------- /integrations/traceloop.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Traceloop 4 | description: Evaluate and monitor the quality of your LLM apps and agents 5 | authors: 6 | - name: Traceloop Team 7 | socials: 8 | github: traceloop 9 | twitter: traceloopdev 10 | pypi: https://pypi.org/project/traceloop-sdk/ 11 | repo: https://github.com/traceloop/openllmetry 12 | type: Monitoring Tool 13 | report_issue: https://github.com/traceloop/openllmetry/issues 14 | logo: /logos/traceloop.png 15 | version: Haystack 2.0 16 | toc: true 17 | --- 18 | 19 | - [OpenLLMetry](#openllmetry) 20 | - [Installation](#installation) 21 | - [Example](#example) 22 | - [About Traceloop](#about-traceloop) 23 | 24 | # OpenLLMetry 25 | 26 | OpenLLMetry is an open-source Python package built and maintained by Traceloop that instruments your Haystack-based applications with OpenTelemetry. This gives you full visibility to your LLM app, right in your existing observability stack. You can also connect this to Traceloop to get quality evaluation metrics and LLM-specific capabilities like Prompt Playground. 27 | 28 | ![Traceloop screenshot](https://raw.githubusercontent.com/deepset-ai/haystack-integrations/main/images/traceloop-monitoring.png) 29 | 30 | More info on the [documentation](https://traceloop.com/docs/python-sdk). 31 | 32 | ## Installation 33 | 34 | ``` 35 | pip install traceloop-sdk 36 | ``` 37 | 38 | ## Example 39 | 40 | Basic integration is as simple as adding one line to your code: 41 | 42 | ```python 43 | Traceloop.init(app_name="your_app_name") 44 | ``` 45 | 46 | When you run your code, you'll get a link to the Traceloop UI where you can see your app's traces. 47 | If you want to connect to a different observability platform, [follow the guide for exporting your traces](https://traceloop.com/docs/python-sdk/exporting). 48 | 49 | ### Use a Traceloop API Key 50 | 51 | If you have an account with Traceloop and would like to see your traces on your account dashboard: 52 | 53 | - Create an API key on Traceloop 54 | - Export the API key in an environment variable called `TRACELOOP_API_KEY` 55 | 56 | ### Trace Haystack Pipelines 57 | 58 | Once you've initialized a Traceloop app, any Haystack pipeline that you run in the same environment will get logged in the dashboard provided by the generated Traceloop URL. 59 | For example, below is a simple Haystack pipeline and its traceloop logs. It requires an OPENAI_API_KEY to be set. 60 | 61 | ```python 62 | from haystack.components.builders import ChatPromptBuilder 63 | from haystack.components.generators.chat import OpenAIChatGenerator 64 | from haystack.dataclasses import ChatMessage 65 | from haystack import Pipeline 66 | 67 | from traceloop.sdk import Traceloop 68 | 69 | Traceloop.init(app_name="haystack_app") 70 | 71 | prompt_builder = ChatPromptBuilder() 72 | llm = OpenAIChatGenerator() 73 | 74 | location = "Berlin" 75 | messages = [ChatMessage.from_system("Always respond in German even if some input data is in other languages."), 76 | ChatMessage.from_user("Tell me about {{location}}")] 77 | 78 | pipe = Pipeline() 79 | pipe.add_component("prompt_builder", prompt_builder) 80 | pipe.add_component("llm", llm) 81 | pipe.connect("prompt_builder.prompt", "llm.messages") 82 | 83 | pipe.run(data={"prompt_builder": {"template_variables":{"location": location}, "template": messages}}) 84 | ``` 85 | ```bash 86 | >> {'llm': {'replies': [ChatMessage(content='Berlin ist die Hauptstadt Deutschlands und die größte Stadt des Landes. 87 | >> Es ist eine lebhafte Metropole, die für ihre Geschichte, Kultur und einzigartigen Sehenswürdigkeiten bekannt ist. 88 | >> Berlin bietet eine vielfältige Kulturszene, beeindruckende architektonische Meisterwerke wie den Berliner Dom 89 | >> und das Brandenburger Tor, sowie weltberühmte Museen wie das Pergamonmuseum. Die Stadt hat auch eine pulsierende 90 | >> Clubszene und ist für ihr aufregendes Nachtleben berühmt. Berlin ist ein Schmelztiegel verschiedener Kulturen und 91 | >> zieht jedes Jahr Millionen von Touristen an.', role=, name=None, 92 | >> metadata={'model': 'gpt-4o-mini', 'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 32, 93 | >> 'completion_tokens': 153, 'total_tokens': 185}})]}} 94 | ``` 95 | 96 | image 97 | 98 | ## About Traceloop 99 | 100 | Traceloop is a platform for monitoring, evaluating and debugging LLM apps and agents. Deploy changes with confidence and get insights into your LLM executions. 101 | 102 | ### Key features 103 | 104 | - Manage your prompts in a single place with version support, gradual rollout, A/B testing, and more. 105 | - Evaluate your prompts and models quality with auto-generated test sets. 106 | - Monitor your LLM app's performance and get alerts when it's not behaving as expected. 107 | -------------------------------------------------------------------------------- /integrations/trafilatura.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Trafilatura 4 | description: Efficiently gather text and metadata on the Web for LLM and RAG 5 | authors: 6 | - name: Adrien Barbaresi 7 | socials: 8 | github: adbar 9 | twitter: adbarbaresi 10 | linkedin: https://www.linkedin.com/in/adrienbarbaresi 11 | pypi: https://pypi.org/project/trafilatura/ 12 | repo: https://github.com/adbar/trafilatura 13 | report_issue: https://github.com/adbar/trafilatura/issues 14 | logo: /logos/trafilatura.png 15 | type: Data Ingestion 16 | version: Haystack 2.0 17 | --- 18 | 19 | 20 | ### Table of Contents 21 | 22 | - [Overview](#overview) 23 | - [Installation](#installation) 24 | - [Usage](#usage) 25 | - [Settings](#settings) 26 | 27 | 28 | ## Overview 29 | 30 | Trafilatura is a cutting-edge Python package and command-line tool designed to gather text on the Web and simplify the process of turning raw HTML into structured, meaningful data. Its extraction component is seamlessly integrated into Haystack. 31 | 32 | Going from HTML bulk to essential parts can alleviate many problems related to text quality by focusing on the actual content and avoiding the noise, which is beneficial for LLM applications. 33 | 34 | 35 | ## Installation 36 | 37 | ```bash 38 | pip install haystack-ai trafilatura 39 | ``` 40 | 41 | 42 | ## Usage 43 | 44 | Trafilatura powers the [`HTMLToDocument`](https://docs.haystack.deepset.ai/docs/htmltodocument) component in Haystack's converters. Here is how to use it: 45 | 46 | ```python 47 | from haystack.components.converters import HTMLToDocument 48 | 49 | converter = HTMLToDocument() 50 | results = converter.run(sources=["path/to/sample.html"]) 51 | documents = results["documents"] 52 | print(documents[0].content) 53 | # 'This is a text from the HTML file.' 54 | ``` 55 | 56 | 57 | ### Settings 58 | 59 | The `__init__` and `run` methods take an optional `extraction_kwargs` parameter which is then passed to Trafilatura. It has to be a dictionary of arguments known to the package, here are useful ideas in this context: 60 | 61 | - Choice of HTML elements 62 | - `include_comments=True` (comment sections at the bottom of articles) 63 | - `include_images=True` 64 | - `include_tables=True` (active by default) 65 | - `prune_xpath=["//p[@class='discarded']"]` (pruning the tree before extraction) 66 | - Optimization for precision or recall 67 | - `favor_precision=True` (if your results contain too much noise) 68 | - `favor_recall=True` (if parts of your documents are missing) 69 | 70 | For more information see the [Python usage](https://trafilatura.readthedocs.io/en/latest/usage-python.html) and [function description](https://trafilatura.readthedocs.io/en/latest/corefunctions.html#extract) parts of the official documentation. 71 | -------------------------------------------------------------------------------- /integrations/unstructured-file-converter.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Unstructured File Converter 4 | description: Component to easily convert files and directories into Documents using the Unstructured API 5 | authors: 6 | - name: deepset 7 | socials: 8 | github: deepset-ai 9 | twitter: deepset_ai 10 | linkedin: https://www.linkedin.com/company/deepset-ai/ 11 | pypi: https://pypi.org/project/unstructured-fileconverter-haystack/ 12 | repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/unstructured 13 | type: Data Ingestion 14 | report_issue: https://github.com/deepset-ai/haystack-core-integrations/issues 15 | logo: /logos/unstructured.svg 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | - [Overview](#overview) 20 | - [Installation](#installation) 21 | - [Usage](#usage) 22 | - [Connecting to the Unstructured API](#connecting-to-the-unstructured-api) 23 | - [Hosted API](#hosted-api) 24 | - [Local API (Docker)](#local-api-docker) 25 | - [Running Unstructured File Converter](#running-unstructured-file-converter) 26 | - [In isolation](#in-isolation) 27 | - [In a Haystack Pipeline](#in-a-haystack-pipeline) 28 | 29 | 30 | 31 | ## Overview 32 | Component for the Haystack LLM framework to convert files and directories into Documents using the Unstructured API. 33 | 34 | **[Unstructured](https://unstructured-io.github.io/unstructured/index.html)** provides ETL tools for LLMs, extracting text and other information from various file formats. See [supported file types](https://docs.unstructured.io/api-reference/api-services/overview#supported-file-types) for more details. 35 | 36 | ## Installation 37 | To install the [Unstructured File Converter](https://docs.haystack.deepset.ai/docs/unstructuredfileconverter), run: 38 | 39 | ```bash 40 | pip install unstructured-fileconverter-haystack 41 | ``` 42 | 43 | ## Usage 44 | 45 | ### Connecting to the Unstructured API 46 | #### Hosted API 47 | 48 | The Unstructured API is available in both free and paid versions: Unstructured Serverless API or Free Unstructured API. 49 | 50 | For the Free Unstructured API, the API URL is `https://api.unstructured.io/general/v0/general`. For the Unstructured Serverless API, find your unique API URL in your Unstructured account. 51 | 52 | Note that the API keys for free and paid versions are not interchangeable. 53 | 54 | Set the Unstructured API key as an environment variable: 55 | ```bash 56 | export UNSTRUCTURED_API_KEY=your_api_key 57 | ``` 58 | 59 | #### Local API (Docker) 60 | You can run a local instance of the Unstructured API using Docker: 61 | 62 | ```bash 63 | docker run -p 8000:8000 -d --rm --name unstructured-api quay.io/unstructured-io/unstructured-api:latest --port 8000 --host 0.0.0.0 64 | ``` 65 | 66 | When initializing the component, specify the localhost URL: 67 | ```python 68 | from haystack_integrations.components.converters.unstructured import UnstructuredFileConverter 69 | 70 | converter = UnstructuredFileConverter(api_url="http://localhost:8000/general/v0/general") 71 | ``` 72 | 73 | ### Running Unstructured File Converter 74 | #### In isolation 75 | ```python 76 | import os 77 | from haystack_integrations.components.converters.unstructured import UnstructuredFileConverter 78 | 79 | converter = UnstructuredFileConverter() 80 | documents = converter.run(paths = ["a/file/path.pdf", "a/directory/path"])["documents"] 81 | ``` 82 | 83 | #### In a Haystack Pipeline 84 | ```python 85 | import os 86 | from haystack import Pipeline 87 | from haystack.components.writers import DocumentWriter 88 | from haystack.document_stores.in_memory import InMemoryDocumentStore 89 | from haystack_integrations.components.converters.unstructured import UnstructuredFileConverter 90 | 91 | document_store = InMemoryDocumentStore() 92 | 93 | indexing = Pipeline() 94 | indexing.add_component("converter", UnstructuredFileConverter()) 95 | indexing.add_component("writer", DocumentWriter(document_store)) 96 | indexing.connect("converter", "writer") 97 | 98 | indexing.run({"converter": {"paths": ["a/file/path.pdf", "a/directory/path"]}}) 99 | ``` -------------------------------------------------------------------------------- /integrations/uptrain.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: UpTrain 4 | description: Use the UpTrain evaluation framework to calculate model-based metrics 5 | authors: 6 | - name: deepset 7 | socials: 8 | github: deepset-ai 9 | twitter: deepset_ai 10 | linkedin: https://www.linkedin.com/company/deepset-ai/ 11 | pypi: https://pypi.org/project/uptrain-haystack 12 | repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/uptrain 13 | type: Evaluation Framework 14 | report_issue: https://github.com/deepset-ai/haystack-core-integrations/issues 15 | logo: /logos/uptrain.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | 20 | ### Table of Contents 21 | 22 | - [Overview](#overview) 23 | - [Installation](#installation) 24 | - [Usage](#usage) 25 | - [UpTrainEvaluator](#UpTrainEvaluator) 26 | 27 | ## Overview 28 | 29 | [UpTrain](https://uptrain.ai/) is an open source framework for model-based evaluation to evaluate your LLM applications by quantifying their performance on aspects such as correctness, tonality, hallucination, fluency, etc. More information can be found on the [documentation page](https://docs.haystack.deepset.ai/docs/uptrainevaluator). 30 | 31 | ## Installation 32 | 33 | Install the UpTrain integration: 34 | ```bash 35 | pip install uptrain-haystack 36 | ``` 37 | 38 | ## Usage 39 | 40 | Once installed, you will have access to an [UpTrainEvaluator](https://docs.haystack.deepset.ai/docs/uptrainevaluator) that supports a variety of model-based evaluation metrics: 41 | - "context_relevance" 42 | - "factual_accuracy" 43 | - "response_relevance" 44 | - "response_completeness" 45 | - "response_completeness_wrt_context" 46 | - "response_consistency" 47 | - "response_conciseness" 48 | - "critique_language" 49 | - "critique_tone" 50 | - "guideline_adherence" 51 | - "response_matching" 52 | 53 | ### UpTrainEvaluator 54 | 55 | To use this integration for calculating model-based evaluation metrics, initialize an `UpTrainEvaluator` with the metric name and metric input parameters: 56 | 57 | ```python 58 | # A valid OpenAI API key is required to run this example. 59 | 60 | from haystack import Pipeline 61 | from haystack_integrations.components.evaluators.uptrain import UpTrainEvaluator, UpTrainMetric 62 | from haystack.utils import Secret 63 | 64 | QUESTIONS = [ 65 | "Which is the most popular global sport?", 66 | ] 67 | CONTEXTS = [ 68 | ["The popularity of sports can be measured in various ways, including TV viewership, social media presence, number of participants, and economic impact. Football is undoubtedly the world's most popular sport with major events like the FIFA World Cup and sports personalities like Ronaldo and Messi, drawing a followership of more than 4 billion people."] 69 | ] 70 | RESPONSES = [ 71 | "Football is the most popular sport with around 4 billion followers worldwide", 72 | ] 73 | 74 | pipeline = Pipeline() 75 | evaluator = UpTrainEvaluator( 76 | metric=UpTrainMetric.FACTUAL_ACCURACY, 77 | api="openai", 78 | api_key=Secret.from_env_var("OPENAI_API_KEY"), 79 | ) 80 | pipeline.add_component("evaluator", evaluator) 81 | 82 | # Each metric expects a specific set of parameters as input. Refer to the 83 | # UpTrainMetric class' documentation for more details. 84 | output = pipeline.run({"evaluator": {"questions": QUESTIONS, "contexts": CONTEXTS, "responses": RESPONSES}}) 85 | 86 | for output in output["evaluator"]["results"]: 87 | print(output) 88 | ``` 89 | Output: 90 | ```python 91 | [{'name': 'factual_accuracy', 'score': 1.0, 'explanation': "1. Football is the most popular sport.\nReasoning for yes: The context explicitly states that football is undoubtedly the world's most popular sport.\nReasoning for no: No arguments.\nJudgement: yes. as the context explicitly supports the fact.\n\n2. Football has around 4 billion followers worldwide.\nReasoning for yes: The context explicitly mentions that major events like the FIFA World Cup and sports personalities like Ronaldo and Messi draw a followership of more than 4 billion people.\nReasoning for no: No arguments.\nJudgement: yes. as the context explicitly supports the fact.\n\n"}] 92 | ``` 93 | -------------------------------------------------------------------------------- /integrations/vllm.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: vLLM Invocation Layer 4 | description: Use the vLLM inference engine with Haystack 5 | authors: 6 | - name: Lukas Kreussel 7 | socials: 8 | github: LLukas22 9 | pypi: https://pypi.org/project/vllm-haystack/ 10 | repo: https://github.com/LLukas22/vLLM-haystack-adapter 11 | type: Model Provider 12 | report_issue: https://github.com/LLukas22/vLLM-haystack-adapter/issues 13 | logo: /logos/vllm.png 14 | version: Haystack 2.0 15 | toc: true 16 | --- 17 | [![PyPI - Version](https://img.shields.io/pypi/v/vllm-haystack.svg)](https://pypi.org/project/vllm-haystack) 18 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/vllm-haystack.svg)](https://pypi.org/project/vllm-haystack) 19 | 20 | Simply use [vLLM](https://github.com/vllm-project/vllm) in your haystack pipeline, to utilize fast, self-hosted LLMs. 21 | 22 |

23 | vLLM 24 | 25 | Haystack 26 | 27 |

28 | 29 | ### Table of Contents 30 | 31 | - [Overview](#overview) 32 | - [Installation](#installation) 33 | - [Usage](#usage) 34 | 35 | ## Overview 36 | 37 | [vLLM](https://github.com/vllm-project/vllm) is a high-throughput and memory-efficient inference and serving engine for LLMs. 38 | It is an open-source project that allows serving open models in production, when you have GPU resources available. 39 | 40 | vLLM can be deployed as a server that implements the OpenAI API protocol and integration with Haystack comes out-of-the-box. 41 | This allows vLLM to be used with the [`OpenAIGenerator`](https://docs.haystack.deepset.ai/docs/openaigenerator) and [`OpenAIChatGenerator`](https://docs.haystack.deepset.ai/docs/openaichatgenerator) components in Haystack. 42 | 43 | For an end-to-end example of [vLLM + Haystack, see this notebook](https://colab.research.google.com/github/deepset-ai/haystack-cookbook/blob/main/notebooks/vllm_inference_engine.ipynb). 44 | 45 | 46 | ## Installation 47 | vLLM should be installed. 48 | - you can use `pip`: `pip install vllm` (more information in the [vLLM documentation](https://docs.vllm.ai/en/latest/getting_started/installation.html)) 49 | - for production use cases, there are many other options, including Docker ([docs](https://docs.vllm.ai/en/latest/serving/deploying_with_docker.html)) 50 | 51 | ## Usage 52 | You first need to run an vLLM OpenAI-compatible server. You can do that using [Python](https://docs.vllm.ai/en/latest/getting_started/quickstart.html#openai-compatible-server) or [Docker](https://docs.vllm.ai/en/latest/serving/deploying_with_docker.html). 53 | 54 | Then, you can use the `OpenAIGenerator` and `OpenAIChatGenerator` components in Haystack to query the vLLM server. 55 | 56 | ```python 57 | from haystack.components.generators.chat import OpenAIChatGenerator 58 | from haystack.dataclasses import ChatMessage 59 | from haystack.utils import Secret 60 | 61 | generator = OpenAIChatGenerator( 62 | api_key=Secret.from_token("VLLM-PLACEHOLDER-API-KEY"), # for compatibility with the OpenAI API, a placeholder api_key is needed 63 | model="mistralai/Mistral-7B-Instruct-v0.1", 64 | api_base_url="http://localhost:8000/v1", 65 | generation_kwargs = {"max_tokens": 512} 66 | ) 67 | 68 | response = generator.run(messages=[ChatMessage.from_user("Hi. Can you help me plan my next trip to Italy?")]) 69 | ``` 70 | -------------------------------------------------------------------------------- /integrations/weaviate-document-store.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Weaviate 4 | description: Use a Weaviate database with Haystack 5 | authors: 6 | - name: deepset 7 | socials: 8 | github: deepset-ai 9 | twitter: deepset_ai 10 | linkedin: https://www.linkedin.com/company/deepset-ai/ 11 | pypi: https://pypi.org/project/weaviate-haystack/ 12 | repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/weaviate 13 | type: Document Store 14 | report_issue: https://github.com/deepset-ai/haystack-core-integrations/issues 15 | logo: /logos/weaviate.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | 20 | ### Table of Contents 21 | 22 | - [Overview](#overview) 23 | - [Installation](#installation) 24 | - [Usage](#usage) 25 | 26 | ## Overview 27 | 28 | [![PyPI - Version](https://img.shields.io/pypi/v/weaviate-haystack.svg)](https://pypi.org/project/weaviate-haystack) 29 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/weaviate-haystack.svg)](https://pypi.org/project/weaviate-haystack) 30 | [![test](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weaviate.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weaviate.yml) 31 | 32 | --- 33 | 34 | ## Installation 35 | 36 | Use `pip` to install Weaviate: 37 | 38 | ```console 39 | pip install weaviate-haystack 40 | ``` 41 | 42 | ## Usage 43 | 44 | Once installed, initialize your Weaviate database to use it with Haystack. 45 | 46 | In this example, we use the temporary embedded version for simplicity. 47 | To use a self-hosted Docker container or Weaviate Cloud Service, take a look at the [docs](https://docs.haystack.deepset.ai/docs/weaviatedocumentstore). 48 | 49 | ```python 50 | from haystack_integrations.document_stores.weaviate import WeaviateDocumentStore 51 | from weaviate.embedded import EmbeddedOptions 52 | 53 | document_store = WeaviateDocumentStore(embedded_options=EmbeddedOptions()) 54 | ``` 55 | 56 | ### Writing Documents to WeaviateDocumentStore 57 | 58 | To write documents to `WeaviateDocumentStore`, create an indexing pipeline. 59 | 60 | ```python 61 | from haystack.components.file_converters import TextFileToDocument 62 | from haystack.components.writers import DocumentWriter 63 | 64 | indexing = Pipeline() 65 | indexing.add_component("converter", TextFileToDocument()) 66 | indexing.add_component("writer", DocumentWriter(document_store)) 67 | indexing.connect("converter", "writer") 68 | indexing.run({"converter": {"paths": file_paths}}) 69 | ``` 70 | 71 | ### License 72 | 73 | `weaviate-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. 74 | -------------------------------------------------------------------------------- /integrations/weights-and-bias-tracer.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: integration 3 | name: Weights & Biases Weave Tracer 4 | description: Send Haystack traces to Weights & Biases for monitoring and visualization 5 | authors: 6 | - name: deepset 7 | socials: 8 | github: deepset-ai 9 | twitter: deepset_ai 10 | linkedin: https://www.linkedin.com/company/deepset-ai/ 11 | pypi: https://pypi.org/project/weave-haystack/ 12 | repo: https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/weights_and_biases_weave 13 | type: Monitoring Tool 14 | report_issue: https://github.com/deepset-ai/haystack-core-integrations/issues 15 | logo: /logos/weights_and_bias.png 16 | version: Haystack 2.0 17 | toc: true 18 | --- 19 | ### **Table of Contents** 20 | - [Overview](#overview) 21 | - [Installation](#installation) 22 | - [Usage](#usage) 23 | - [License](#license) 24 | 25 | ## Overview 26 | 27 | This integration allows you to use [Weights & Biases Weave framework](https://wandb.ai/site/weave/) for tracing and monitoring Haystack pipeline 28 | components. It provides a connector that sends Haystack traces to Weights & Biases for monitoring and visualization. 29 | 30 | ## Installation 31 | 32 | ```bash 33 | pip install weave-haystack 34 | ``` 35 | 36 | ## Usage 37 | 38 | ### Components 39 | This integration introduces one new component, a connector named [`WeaveConnector`](https://docs.haystack.deepset.ai/docs/weaveconnector) whose only responsibility is to send 40 | traces to Weights & Biases. 41 | 42 | Note that you need to have the `WANDB_API_KEY` environment variable set to your Weights & Biases API key. 43 | 44 | NOTE: If you don't have a Weights & Biases account, it will interactively ask you to set one and your input will then 45 | be stored in ~/.netrc 46 | 47 | In addition, you need to set the `HAYSTACK_CONTENT_TRACING_ENABLED` environment variable to `true` in order to 48 | enable Haystack tracing in your pipeline. 49 | 50 | To use this connector, simply add it to your pipeline without any connections, and it will automatically start 51 | sending traces to Weights & Biases. 52 | 53 | 54 | ```python 55 | import os 56 | 57 | from haystack import Pipeline 58 | from haystack.components.builders import ChatPromptBuilder 59 | from haystack.components.generators.chat import OpenAIChatGenerator 60 | from haystack.dataclasses import ChatMessage 61 | 62 | from haystack_integrations.components.connectors import WeaveConnector 63 | 64 | os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true" 65 | messages = [ 66 | ChatMessage.from_system( 67 | "Always respond in German even if some input data is in other languages." 68 | ), 69 | ChatMessage.from_user("Tell me about {{location}}"), 70 | ] 71 | 72 | pipe = Pipeline() 73 | pipe.add_component("prompt_builder", ChatPromptBuilder(template=messages)) 74 | pipe.add_component("llm", OpenAIChatGenerator(model="gpt-4o-mini")) 75 | pipe.connect("prompt_builder.prompt", "llm.messages") 76 | 77 | connector = WeaveConnector(pipeline_name="test_pipeline") 78 | pipe.add_component("weave", connector) 79 | 80 | response = pipe.run( 81 | data={ 82 | "prompt_builder": { 83 | "location": "Berlin" 84 | } 85 | } 86 | ) 87 | print(response["llm"]["replies"][0]) 88 | ``` 89 | 90 | You should then head to `https://wandb.ai//projects` and see the complete trace for your pipeline under 91 | the pipeline name you specified, when creating the `WeaveConnector`. 92 | 93 | ### License 94 | 95 | `weights_biases-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. 96 | -------------------------------------------------------------------------------- /logos/anthropic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/anthropic.png -------------------------------------------------------------------------------- /logos/apify.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/apify.png -------------------------------------------------------------------------------- /logos/arize-phoenix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/arize-phoenix.png -------------------------------------------------------------------------------- /logos/arize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/arize.png -------------------------------------------------------------------------------- /logos/assemblyai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/assemblyai.png -------------------------------------------------------------------------------- /logos/astradb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/astradb.png -------------------------------------------------------------------------------- /logos/aws.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/aws.png -------------------------------------------------------------------------------- /logos/azure-ai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/azure-ai.png -------------------------------------------------------------------------------- /logos/azure-cosmos-db.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/azure-cosmos-db.png -------------------------------------------------------------------------------- /logos/azure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/azure.png -------------------------------------------------------------------------------- /logos/browserbase.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/browserbase.png -------------------------------------------------------------------------------- /logos/burr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/burr.png -------------------------------------------------------------------------------- /logos/cerebras.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/cerebras.png -------------------------------------------------------------------------------- /logos/chainlit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/chainlit.png -------------------------------------------------------------------------------- /logos/chroma.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/chroma.png -------------------------------------------------------------------------------- /logos/cohere.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/cohere.png -------------------------------------------------------------------------------- /logos/context.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 | 12 | 14 | 15 | 17 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /logos/couchbase.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /logos/deepeval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/deepeval.png -------------------------------------------------------------------------------- /logos/docling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/docling.png -------------------------------------------------------------------------------- /logos/duckduckgo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/duckduckgo.png -------------------------------------------------------------------------------- /logos/elastic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/elastic.png -------------------------------------------------------------------------------- /logos/elevenlabs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/elevenlabs.png -------------------------------------------------------------------------------- /logos/flow-ai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/flow-ai.png -------------------------------------------------------------------------------- /logos/github.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/github.png -------------------------------------------------------------------------------- /logos/googleai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/googleai.png -------------------------------------------------------------------------------- /logos/groq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/groq.png -------------------------------------------------------------------------------- /logos/huggingface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/huggingface.png -------------------------------------------------------------------------------- /logos/intel-labs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/intel-labs.png -------------------------------------------------------------------------------- /logos/jina.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/jina.png -------------------------------------------------------------------------------- /logos/lancedb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/lancedb.png -------------------------------------------------------------------------------- /logos/langfuse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/langfuse.png -------------------------------------------------------------------------------- /logos/llama_cpp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/llama_cpp.png -------------------------------------------------------------------------------- /logos/llamafile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/llamafile.png -------------------------------------------------------------------------------- /logos/lmformatenforcer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/lmformatenforcer.png -------------------------------------------------------------------------------- /logos/marqo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/marqo.png -------------------------------------------------------------------------------- /logos/mastodon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/mastodon.png -------------------------------------------------------------------------------- /logos/mcp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/mcp.png -------------------------------------------------------------------------------- /logos/meta.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/meta.png -------------------------------------------------------------------------------- /logos/meta_llama.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/meta_llama.png -------------------------------------------------------------------------------- /logos/milvus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/milvus.png -------------------------------------------------------------------------------- /logos/mistral.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /logos/mixedbread-ai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/mixedbread-ai.png -------------------------------------------------------------------------------- /logos/mongodb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/mongodb.png -------------------------------------------------------------------------------- /logos/monsterapi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/monsterapi.png -------------------------------------------------------------------------------- /logos/needle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/needle.png -------------------------------------------------------------------------------- /logos/neo4j.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/neo4j.png -------------------------------------------------------------------------------- /logos/notion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/notion.png -------------------------------------------------------------------------------- /logos/nvidia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/nvidia.png -------------------------------------------------------------------------------- /logos/ollama.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/ollama.png -------------------------------------------------------------------------------- /logos/opea.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/opea.png -------------------------------------------------------------------------------- /logos/openai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/openai.png -------------------------------------------------------------------------------- /logos/openrouter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/openrouter.png -------------------------------------------------------------------------------- /logos/opensearch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/opensearch.png -------------------------------------------------------------------------------- /logos/opik.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/opik.png -------------------------------------------------------------------------------- /logos/pinecone.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/pinecone.png -------------------------------------------------------------------------------- /logos/qdrant.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/qdrant.png -------------------------------------------------------------------------------- /logos/ragas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/ragas.png -------------------------------------------------------------------------------- /logos/ray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/ray.png -------------------------------------------------------------------------------- /logos/sambanova.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/sambanova.png -------------------------------------------------------------------------------- /logos/snowflake.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/snowflake.png -------------------------------------------------------------------------------- /logos/stackit.svg: -------------------------------------------------------------------------------- 1 | 2 | 11 | 13 | 20 | 21 | 24 | 28 | 32 | 39 | 43 | 47 | 51 | 55 | 56 | 61 | 66 | 67 | -------------------------------------------------------------------------------- /logos/titanml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/titanml.png -------------------------------------------------------------------------------- /logos/traceloop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/traceloop.png -------------------------------------------------------------------------------- /logos/trafilatura.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/trafilatura.png -------------------------------------------------------------------------------- /logos/uptrain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/uptrain.png -------------------------------------------------------------------------------- /logos/vertexai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/vertexai.png -------------------------------------------------------------------------------- /logos/vllm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/vllm.png -------------------------------------------------------------------------------- /logos/voyage_ai.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/voyage_ai.jpg -------------------------------------------------------------------------------- /logos/weaviate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/weaviate.png -------------------------------------------------------------------------------- /logos/weights_and_bias.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepset-ai/haystack-integrations/2df56082975958276e39ac4fb1f20077c79c86a6/logos/weights_and_bias.png --------------------------------------------------------------------------------