634 |
635 | This program is free software: you can redistribute it and/or modify
636 | it under the terms of the GNU Affero General Public License as published
637 | by the Free Software Foundation, either version 3 of the License, or
638 | (at your option) any later version.
639 |
640 | This program is distributed in the hope that it will be useful,
641 | but WITHOUT ANY WARRANTY; without even the implied warranty of
642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
643 | GNU Affero General Public License for more details.
644 |
645 | You should have received a copy of the GNU Affero General Public License
646 | along with this program. If not, see .
647 |
648 | Also add information on how to contact you by electronic and paper mail.
649 |
650 | If your software can interact with users remotely through a computer
651 | network, you should also make sure that it provides a way for users to
652 | get its source. For example, if your program is a web application, its
653 | interface could display a "Source" link that leads users to an archive
654 | of the code. There are many ways you could offer source, and different
655 | solutions will be better for different programs; see section 13 for the
656 | specific requirements.
657 |
658 | You should also get your employer (if you work as a programmer) or school,
659 | if any, to sign a "copyright disclaimer" for the program, if necessary.
660 | For more information on this, and how to apply and follow the GNU AGPL, see
661 | .
662 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 | Sinapsis Langchain
9 |
10 |
11 |
12 | Templates for seamless integration of LangChain frameword.
13 |
14 |
15 | 🐍 Installation •
16 | 🚀 Packages •
17 | 📚 Usage example •
18 | 📙 Documentation •
19 | 🔍 License
20 |
21 |
22 | The `sinapsis-langchain` module adds support for the Langchain framework, in particular, Langchain community data loaders and text splitters.
23 |
24 |
25 | We add support for the following packages:
26 |
27 | * sinapsis-langchain-readers
28 |
29 | * sinapsis-langchain-splitters
30 |
31 | 🐍 Installation
32 | Install using your package manager of choice. We encourage the use of uv
33 |
34 | Example with uv
:
35 |
36 | ```bash
37 | uv pip install sinapsis-langchain-readers --extra-index-url https://pypi.sinapsis.tech
38 | ```
39 | or with raw pip
:
40 | ```bash
41 | pip install sinapsis-langchain-readers --extra-index-url https://pypi.sinapsis.tech
42 | ```
43 |
44 | >[!NOTE]
45 | > Change the name of the package accordingly
46 |
47 |
48 | > [!IMPORTANT]
49 | > The langchain templates may require extra dependencies. For development, we recommend installing the package with all the optional dependencies:
50 | >
51 | ```bash
52 | uv pip install sinapsis-langchain-readers[all] --extra-index-url https://pypi.sinapsis.tech
53 | ```
54 | > [!IMPORTANT]
55 | > Some langchain templates require additional system dependencies. Please refer to the official [LangChain Document Loaders documentation](https://python.langchain.com/docs/integrations/document_loaders/) for additional requirements.
56 | >
57 |
58 |
59 | >[!NOTE]
60 | > Change the name of the package accordingly
61 |
62 | >[!TIP]
63 | > you can also install the full mono repo
64 |
65 | >
66 | ```bash
67 | uv pip install sinapsis-langchain[all] --extra-index-url https://pypi.sinapsis.tech
68 | ```
69 |
70 |
71 | 🚀 Features
72 |
73 | Templates Supported
74 |
75 | The **Sinapsis Langchain** module provides wrapper templates for **Langchain's community data loaders**, making them seamlessly usable within Sinapsis.
76 | > [!NOTE]
77 | > Each loader template supports one attribute:
78 | > - **`add_document_as_text_packet`** (`bool`, default: `False`): Whether to add the loaded document as a text packet.
79 | > Other attributes can be dynamically assigned through the class initialization dictionary (`class init attributes`).
80 |
81 | > [!TIP]
82 | > Use CLI command ``` sinapsis info --all-template-names``` to show a list with all the available Template names installed with Sinapsis Langchain.
83 |
84 | > [!TIP]
85 | > Use CLI command ```sinapsis info --example-template-config TEMPLATE_NAME``` to produce an example Agent config for the Template specified in ***TEMPLATE_NAME***.
86 |
87 |
88 | For example, for ***WikipediaLoaderWrapper*** use ```sinapsis info --example-template-config WikipediaLoaderWrapper``` to produce the following example config:
89 |
90 | ```yaml
91 | agent:
92 | name: agent to load Wikipedia documents using WikipediaLoaderWrapper template
93 | templates:
94 | - template_name: InputTemplate
95 | class_name: InputTemplate
96 | attributes: {}
97 | - template_name: WikipediaLoaderWrapper
98 | class_name: WikipediaLoaderWrapper
99 | template_input: InputTemplate
100 | attributes:
101 | add_document_as_text_packet: false
102 | wikipedialoader_init:
103 | query: the query for wikipedia
104 | lang: en
105 | load_max_docs: 5000
106 | load_all_available_meta: False
107 | doc_content_chars_max: 4000,
108 | ```
109 |
110 | A complete list of available document loader classes in LangChain can be found at:
111 | [LangChain Community Document Loaders](https://python.langchain.com/api_reference/community/document_loaders.html#langchain-community-document-loaders)
112 |
113 |
114 | 🚫 Excluded Loaders
115 |
116 | Some base classes or loaders that required additional configuration have been excluded and support for this will be included in future releases.
117 |
118 | - **Blob**
119 | - **BlobLoader**
120 | - **OracleTextSplitter**
121 | - **OracleDocLoader**
122 | - **TrelloLoaderExecute**
123 | - **TwitterTweetLoader**
124 | - **TrelloLoader**
125 | - **GoogleApiYoutubeLoader**
126 | - **GoogleApiClient**
127 | - **DiscordChatLoader**
128 | - **AssemblyAIAudioTranscriptLoader**
129 | - **ArcGISLoader**
130 |
131 | For all other supported loaders, refer to the LangChain API reference linked above.
132 |
133 | 📚 Usage example
134 |
135 |
136 | The following example demonstrates how to use the **WikipediaLoaderWrapper** template for loading documents from Wikipedia within Sinapsis. Below is the full YAML configuration, followed by a breakdown of each component.
137 |
138 | configuration
139 |
140 | ```yaml
141 | agent:
142 | name: my_test_agent
143 | description: "Wikipedia loader example"
144 |
145 | templates:
146 |
147 | - template_name: InputTemplate
148 | class_name: InputTemplate
149 | attributes: {}
150 |
151 | - template_name: WikipediaLoaderWrapper
152 | class_name: WikipediaLoaderWrapper
153 | template_input: InputTemplate
154 | attributes:
155 | add_document_as_text_packet: false
156 | wikipedialoader_init:
157 | query: GenAI
158 | lang: en
159 | load_max_docs: 1
160 | load_all_available_meta: false
161 | doc_content_chars_max: 4000
162 | ```
163 | To run, simply use:
164 |
165 | ```bash
166 | sinapsis run name_of_the_config.yml
167 | ```
168 |
169 |
170 |
171 |
172 | 📙 Documentation
173 |
174 | Documentation for this and other sinapsis packages is available on the [sinapsis website](https://docs.sinapsis.tech/docs)
175 |
176 | Tutorials for different projects within sinapsis are available at [sinapsis tutorials page](https://docs.sinapsis.tech/tutorials)
177 |
178 | 🔍 License
179 |
180 | This project is licensed under the AGPLv3 license, which encourages open collaboration and sharing. For more details, please refer to the [LICENSE](LICENSE) file.
181 |
182 | For commercial use, please refer to our [official Sinapsis website](https://sinapsis.tech) for information on obtaining a commercial license.
183 |
184 |
185 |
186 |
--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM sinapsis:base
2 | COPY . /app
3 |
4 | WORKDIR /app/.venv
5 | RUN uv sync --frozen
6 | RUN uv pip install sinapsis-langchain-readers[all] --extra-index-url https://pypi.sinapsis.tech
--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
1 | > [!IMPORTANT]
2 | > To build the sinapsis-langchain image you need
3 | the sinapsis:base image. To build it, please follow the instructions in
4 | the [sinapsis repo](https://github.com/Sinapsis-ai/sinapsis?tab=readme-ov-file#docker).
--------------------------------------------------------------------------------
/docker/compose.yaml:
--------------------------------------------------------------------------------
1 | services:
2 |
3 | sinapsis-langchain:
4 | image: sinapsis-langchain:base
5 | build:
6 | context: ../
7 | dockerfile: docker/Dockerfile
8 | ssh:
9 | - default
10 | environment:
11 | PYTHONPATH: /lib/python3.10/:/app/.venv/lib/python3.10/site-packages
12 |
13 | sinapsis-langchain-readers:
14 | extends:
15 | service: sinapsis-langchain
16 | container_name: sinapsis-langchain-readers-wikipedia
17 | shm_size: '8gb'
18 | volumes:
19 | - "../packages/sinapsis_langchain_readers/src/sinapsis_langchain_readers/configs/:/app/src/sinapsis_langchain_readers/configs"
20 | - "~/.cache/sinapsis/:/root/.cache/sinapsis"
21 | command: /app/.venv/bin/sinapsis run /app/src/sinapsis_langchain_readers/configs/wikipedia_loader.yml
22 | network_mode: "host"
23 |
24 |
--------------------------------------------------------------------------------
/packages/sinapsis_langchain_readers/.python-version:
--------------------------------------------------------------------------------
1 | 3.10.12
--------------------------------------------------------------------------------
/packages/sinapsis_langchain_readers/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 | Sinapsis Langchain Readers
9 |
10 |
11 |
12 | Templates for easy integration of LangChain document loaders within Sinapsis.
13 |
14 |
15 | 🐍 Installation •
16 | 🚀 Features •
17 | 📚 Usage example •
18 | 📙 Documentation •
19 | 🔍 License
20 |
21 |
22 | The `sinapsis-langchain-readers` module adds support for the LangChain library, in particular, LangChain community data loaders.
23 |
24 | 🐍 Installation
25 | Install using your package manager of choice. We encourage the use of uv
26 |
27 | Example with uv
:
28 |
29 | ```bash
30 | uv pip install sinapsis-langchain-readers --extra-index-url https://pypi.sinapsis.tech
31 | ```
32 | or with raw pip
:
33 | ```bash
34 | pip install sinapsis-langchain-readers --extra-index-url https://pypi.sinapsis.tech
35 | ```
36 |
37 |
38 |
39 | > [!IMPORTANT]
40 | > The langchain readers templates may require extra dependencies. For development, we recommend installing the package with all the optional dependencies:
41 | >
42 | ```bash
43 | uv pip install sinapsis-langchain-readers[all] --extra-index-url https://pypi.sinapsis.tech
44 | ```
45 | > [!IMPORTANT]
46 | > Some langchain templates require additional system dependencies. Please refer to the official [LangChain Document Loaders documentation](https://python.langchain.com/docs/integrations/document_loaders/) for additional requirements.
47 | >
48 |
49 |
50 | 🚀 Features
51 |
52 | Templates Supported
53 |
54 | The **Sinapsis Langchain** module provides wrapper templates for **LangChain's community data loaders**, making them seamlessly usable within Sinapsis.
55 | > [!NOTE]
56 | > Each loader template supports one attribute:
57 | > - **`add_document_as_text_packet`** (`bool`, default: `False`): Whether to add the loaded document as a text packet.
58 | > Other attributes can be dynamically assigned through the class initialization dictionary (`class init attributes`).
59 |
60 | > [!TIP]
61 | > Use CLI command ``` sinapsis info --all-template-names``` to show a list with all the available Template names installed with Sinapsis Langchain.
62 |
63 | > [!TIP]
64 | > Use CLI command ```sinapsis info --example-template-config TEMPLATE_NAME``` to produce an example Agent config for the Template specified in ***TEMPLATE_NAME***.
65 |
66 |
67 | For example, for ***WikipediaLoaderWrapper*** use ```sinapsis info --example-template-config WikipediaLoaderWrapper``` to produce the following example config:
68 |
69 | ```yaml
70 | agent:
71 | name: agent to load Wikipedia documents using WikipediaLoaderWrapper template
72 | templates:
73 | - template_name: InputTemplate
74 | class_name: InputTemplate
75 | attributes: {}
76 | - template_name: WikipediaLoaderWrapper
77 | class_name: WikipediaLoaderWrapper
78 | template_input: InputTemplate
79 | attributes:
80 | add_document_as_text_packet: false
81 | wikipedialoader_init:
82 | query: the query for wikipedia
83 | lang: en
84 | load_max_docs: 5000
85 | load_all_available_meta: False
86 | doc_content_chars_max: 4000,
87 | ```
88 |
89 | A complete list of available document loader classes in LangChain can be found at:
90 | [LangChain Community Document Loaders](https://python.langchain.com/api_reference/community/document_loaders.html#langchain-community-document-loaders)
91 |
92 |
93 | 🚫 Excluded Loaders
94 |
95 | Some base classes or loaders that required additional configuration have been excluded and support for this will be included in future releases.
96 |
97 | - **Blob**
98 | - **BlobLoader**
99 | - **OracleTextSplitter**
100 | - **OracleDocLoader**
101 | - **TrelloLoaderExecute**
102 | - **TwitterTweetLoader**
103 | - **TrelloLoader**
104 | - **GoogleApiYoutubeLoader**
105 | - **GoogleApiClient**
106 | - **DiscordChatLoader**
107 | - **AssemblyAIAudioTranscriptLoader**
108 | - **ArcGISLoader**
109 |
110 | For all other supported loaders, refer to the LangChain API reference linked above.
111 |
112 | 📚 Usage example
113 |
114 |
115 | The following example demonstrates how to use the **WikipediaLoaderWrapper** template for loading documents from Wikipedia within Sinapsis. Below is the full YAML configuration, followed by a breakdown of each component.
116 |
117 | configuration
118 |
119 | ```yaml
120 | agent:
121 | name: my_test_agent
122 | description: "Wikipedia loader example"
123 |
124 | templates:
125 |
126 | - template_name: InputTemplate
127 | class_name: InputTemplate
128 | attributes: {}
129 |
130 | - template_name: WikipediaLoaderWrapper
131 | class_name: WikipediaLoaderWrapper
132 | template_input: InputTemplate
133 | attributes:
134 | add_document_as_text_packet: false
135 | wikipedialoader_init:
136 | query: GenAI
137 | lang: en
138 | load_max_docs: 1
139 | load_all_available_meta: false
140 | doc_content_chars_max: 4000
141 | ```
142 | To run, simply use:
143 |
144 | ```bash
145 | sinapsis run name_of_the_config.yml
146 | ```
147 |
148 |
149 |
150 |
151 | 📙 Documentation
152 |
153 | Documentation for this and other sinapsis packages is available on the [sinapsis website](https://docs.sinapsis.tech/docs)
154 |
155 | Tutorials for different projects within sinapsis are available at [sinapsis tutorials page](https://docs.sinapsis.tech/tutorials)
156 |
157 | 🔍 License
158 |
159 | This project is licensed under the AGPLv3 license, which encourages open collaboration and sharing. For more details, please refer to the [LICENSE](LICENSE) file.
160 |
161 | For commercial use, please refer to our [official Sinapsis website](https://sinapsis.tech) for information on obtaining a commercial license.
162 |
163 |
164 |
165 |
--------------------------------------------------------------------------------
/packages/sinapsis_langchain_readers/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "sinapsis-langchain-readers"
3 | version = "0.1.4"
4 | description = "Package that provides support for Langchain community data loaders."
5 | authors = [{ name = "SinapsisAI", email = "dev@sinapsis.tech" }]
6 | readme = "README.md"
7 | license-files = ["LICENSE"]
8 | requires-python = ">=3.10"
9 | dependencies = [
10 | "langchain-community>=0.3.5",
11 | "sinapsis>=0.1.1",
12 | ]
13 |
14 |
15 | [project.optional-dependencies]
16 | langchain-webpages-readers = [
17 | "apify-client>=1.8.1",
18 | "arxiv>=2.1.3",
19 | "beautifulsoup4>=4.12.3",
20 | "browserbase>=1.0.0",
21 | "dgml-utils>=0.3.0",
22 | #"docugami-langchain",
23 | "firecrawl-py>=1.4.0",
24 | "geopandas>=1.0.1",
25 | "librosa>=0.11.0",
26 | "llvmlite>=0.44.0",
27 | "pyairtable>=2.3.5",
28 | "pydub>=0.25.1",
29 | "pymupdf>=1.24.13",
30 | "sodapy>=2.2.0",
31 | "yt-dlp>=2024.11.4",
32 | ]
33 | langchain-wikipedia-readers = ["wikipedia>=1.4.0"]
34 | langchain-pdfs-readers = [
35 | "bibtexparser>=1.4.2",
36 | "pymupdf>=1.24.13",
37 | "pypdf>=5.1.0",
38 | ]
39 | langchain-unstructured-readers = [
40 | "langchain-unstructured>=0.1.5",
41 | "unstructured[pdf]>=0.16.5",
42 | "unstructured-client>=0.25.9",
43 | "python-magic>=0.4.27",
44 | ]
45 | langchain-cloud-readers = [
46 | "amazon-textract-caller>=0.2.4",
47 | "assemblyai>=0.35.1",
48 | "atlassian-python-api>=3.41.16",
49 | "azure-ai-generative>=1.0.0b11",
50 | "azure-storage-blob>=12.23.1",
51 | "azureml-fsspec>=1.3.1",
52 | "boto3>=1.35.57",
53 | "dropbox>=12.0.2",
54 | "google-api-python-client>=2.151.0",
55 | "google-auth-httplib2>=0.2.0",
56 | "google-auth-oauthlib>=1.2.1",
57 | "html2text>=2024.2.26",
58 | "langchain-community>=0.3.5",
59 | "langchain-google-bigtable>=0.4.1",
60 | "langchain-google-community[gcs]>=2.0.2",
61 | "langchain-openai>=0.2.6",
62 | "playwright>=1.48.0",
63 | "pyodps>=0.12.1",
64 | "python-dotenv>=1.0.1",
65 | ]
66 | langchain-social-readers = [
67 | "mastodon-py>=1.8.1",
68 | "pandas>=2.2.3",
69 | "tweepy>=4.14.0",
70 | ]
71 | langchain-productivity-tools-readers = ["lxml>=4.30", "py-trello>=0.20.1"]
72 | langchain-common-readers = ["bs4>=0.0.2", "jq>=1.8.0"]
73 | langchain-database-readers = [
74 | "fauna>=2.3.0",
75 | "langchain>=0.3.7",
76 | "langchain-google-alloydb-pg>=0.8.0",
77 | "langchain-google-community[bigquery]>=2.0.2",
78 | "pyowm>=3.3.0",
79 | ]
80 | langchain-productivity-tools = ["gitpython>=3.1.43"]
81 |
82 | all = ["sinapsis-langchain-readers[langchain-productivity-tools]",
83 | "sinapsis-langchain-readers[langchain-database-readers]",
84 | "sinapsis-langchain-readers[langchain-common-readers]",
85 | "sinapsis-langchain-readers[langchain-productivity-tools-readers]",
86 | "sinapsis-langchain-readers[langchain-social-readers]",
87 | "sinapsis-langchain-readers[langchain-cloud-readers]",
88 | "sinapsis-langchain-readers[langchain-unstructured-readers]",
89 | "sinapsis-langchain-readers[langchain-pdfs-readers ]",
90 | "sinapsis-langchain-readers[langchain-wikipedia-readers]",
91 | "sinapsis-langchain-readers[langchain-webpages-readers]",
92 | ]
93 | langchain-wepages-readers = [
94 | "youtube-transcript-api>=1.0.0",
95 | ]
96 |
97 |
98 | [[tool.uv.index]]
99 | url = "https://pypi.sinapsis.tech/"
100 | [dependency-groups]
101 | dev = [
102 | "ruff>=0.8.3",
103 | "pre-commit>=4.0.1",
104 | ]
105 |
106 | [tool.ruff]
107 | lint.select = [
108 | "ARG",
109 | "ANN",
110 | "BLE",
111 | "C4",
112 | "E",
113 | "F",
114 | "FIX",
115 | "FLY",
116 | "I",
117 | "PERF",
118 | "PIE",
119 | "RUF",
120 | "RSE",
121 | "SIM",
122 | "SLOT",
123 | "T10",
124 | "T20",
125 | "TD",
126 | "TID",
127 | ]
128 |
129 |
130 | lint.ignore = ['ANN401']
131 | line-length = 120
132 | show-fixes = true
133 |
134 |
135 | [build-system]
136 | requires = ["setuptools"]
137 | build-backend = "setuptools.build_meta"
138 |
139 | [project.urls]
140 | Homepage = "https://sinapsis.tech"
141 | Documentation = "https://docs.sinapsis.tech/docs"
142 | Tutorials = "https://docs.sinapsis.tech/tutorials"
143 | Repository = "https://github.com/Sinapsis-AI/sinapsis-langchain.git"
144 |
145 |
--------------------------------------------------------------------------------
/packages/sinapsis_langchain_readers/src/sinapsis_langchain_readers/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # sinapsis_langchain_readers package
3 |
--------------------------------------------------------------------------------
/packages/sinapsis_langchain_readers/src/sinapsis_langchain_readers/configs/wikipedia_loader.yml:
--------------------------------------------------------------------------------
1 | agent:
2 | name: my_test_agent
3 | description: "Wikipedia loader example"
4 |
5 | templates:
6 |
7 | - template_name: InputTemplate
8 | class_name: InputTemplate
9 | attributes: {}
10 |
11 | - template_name: WikipediaLoaderWrapper
12 | class_name: WikipediaLoaderWrapper
13 | template_input: InputTemplate
14 | attributes:
15 | add_document_as_text_packet: false
16 | wikipedialoader_init:
17 | query: GenAI
18 | lang: en
19 | load_max_docs: 1
20 | load_all_available_meta: false
21 | doc_content_chars_max: 4000
--------------------------------------------------------------------------------
/packages/sinapsis_langchain_readers/src/sinapsis_langchain_readers/templates/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import importlib
3 | from typing import Callable, cast
4 |
5 | from sinapsis.templates import _import_template_package
6 |
7 | _root_lib_path = "sinapsis_langchain_readers.templates"
8 |
9 | _template_lookup: dict = {}
10 |
11 | _ADDITIONAL_TEMPLATE_MODULES = [
12 | f"{_root_lib_path}.langchain_data_readers",
13 | ]
14 | for t_module in _ADDITIONAL_TEMPLATE_MODULES:
15 | _template_lookup |= _import_template_package(t_module)
16 |
17 |
18 | def __getattr__(name: str) -> Callable:
19 | if name in _template_lookup:
20 | module = importlib.import_module(_template_lookup[name])
21 | return cast(Callable, getattr(module, name))
22 | raise AttributeError(f"template `{name}` not found in {_root_lib_path}")
23 |
24 |
25 | __all__ = list(_template_lookup.keys())
26 |
--------------------------------------------------------------------------------
/packages/sinapsis_langchain_readers/src/sinapsis_langchain_readers/templates/langchain_data_readers.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from typing import cast
4 |
5 | from langchain_community import document_loaders
6 | from langchain_community.document_loaders.base import BaseLoader as LangChainBaseLoader
7 | from langchain_community.document_loaders.blob_loaders import Blob
8 | from langchain_community.document_loaders.blob_loaders import (
9 | BlobLoader as LangChainBlobLoader, # avoid name collision
10 | )
11 | from langchain_core.documents.base import Document
12 | from sinapsis_core.data_containers.data_packet import DataContainer, TextPacket
13 | from sinapsis_core.template_base import Template
14 | from sinapsis_core.template_base.base_models import OutputTypes, TemplateAttributes, UIPropertiesMetadata
15 | from sinapsis_core.template_base.dynamic_template import (
16 | BaseDynamicWrapperTemplate,
17 | WrapperEntryConfig,
18 | )
19 | from sinapsis_core.template_base.dynamic_template_factory import make_dynamic_template
20 | from sinapsis_core.template_base.multi_execute_template import (
21 | execute_template_n_times_wrapper,
22 | )
23 | from sinapsis_core.utils.env_var_keys import SINAPSIS_BUILD_DOCS
24 |
25 | # base classes or loaders that require non built in object instance
26 | EXCLUDED_LOADER_MODULE_OBJECTS = [
27 | "Blob",
28 | "BlobLoader",
29 | "OracleTextSplitter",
30 | "OracleDocLoader",
31 | "TrelloLoaderExecute",
32 | "TwitterTweetLoader",
33 | "TrelloLoader",
34 | "GoogleApiYoutubeLoader",
35 | "GoogleApiClient",
36 | "DiscordChatLoader",
37 | "AssemblyAIAudioTranscriptLoader",
38 | "ArcGISLoader",
39 | ]
40 |
41 |
42 | class LangChainDataReaderBase(BaseDynamicWrapperTemplate):
43 | """
44 | Dynamic Template to load documents from Langchain document_loaders module
45 | The template loads the document either as a Document object in the generic_data field
46 | of DataContainer of each string as a TextPacket if add_document_as_text_packet is set as True
47 |
48 | Usage example:
49 |
50 | agent:
51 | name: my_test_agent
52 | templates:
53 | - template_name: InputTemplate
54 | class_name: InputTemplate
55 | attributes: {}
56 | - template_name: WikipediaLoaderWrapper
57 | class_name: WikipediaLoaderWrapper ## note that because it is a dynamic template,
58 | template_input: InputTemplate ## the class name depends on the actual class that is used.
59 | attributes:
60 | add_document_as_text_packet: false
61 | wikipedialoader_init:
62 | query: the query for wikipedia
63 | lang: en
64 | load_max_docs: 5000
65 | load_all_available_meta: False
66 | doc_content_chars_max: 4000
67 |
68 | """
69 |
70 | WrapperEntry = WrapperEntryConfig(
71 | wrapped_object=document_loaders, exclude_module_atts=EXCLUDED_LOADER_MODULE_OBJECTS
72 | )
73 | UIProperties = UIPropertiesMetadata(category="LangChain", output_type=OutputTypes.TEXT)
74 |
75 | class AttributesBaseModel(TemplateAttributes):
76 | """
77 | add_document_as_text_packet(bool): Whether to add document as text packet or not.
78 | """
79 |
80 | add_document_as_text_packet: bool = False
81 |
82 | @staticmethod
83 | def append_documents_as_text_packet(container: DataContainer, documents: list[Document | Blob]) -> None:
84 | """Method to append each string of the Document or list[Documents] to a new TextPacket
85 | Args:
86 | container (DataContainer) : Container to store the TextPackets
87 | documents (list[Document]) : list of documents to split and append to TextPacket
88 | """
89 | for document in documents:
90 | if document.metadata:
91 | text_packet = TextPacket(
92 | content=document.metadata["summary"],
93 | source=document.metadata["title"],
94 | )
95 | else:
96 | document = cast(Document, document)
97 | text_packet = TextPacket(content=document.page_content)
98 | container.texts.append(text_packet)
99 |
100 | def execute(self, container: DataContainer) -> DataContainer:
101 | documents: list[Document | Blob] = []
102 | if isinstance(self.wrapped_callable, LangChainBlobLoader):
103 | documents = list(self.wrapped_callable.yield_blobs())
104 | elif isinstance(self.wrapped_callable, LangChainBaseLoader):
105 | documents = self.wrapped_callable.load()
106 | else:
107 | self.logger.warning("Unsupported wrapped_callable type for langchain data loader.")
108 |
109 | if documents:
110 | if self.attributes.add_document_as_text_packet:
111 | self.append_documents_as_text_packet(container, documents)
112 | else:
113 | self._set_generic_data(container, documents)
114 | return container
115 |
116 |
117 | @execute_template_n_times_wrapper
118 | class ExecuteNTimesLangchainDataReaders(LangChainDataReaderBase):
119 | WrapperEntry = WrapperEntryConfig(
120 | wrapped_object=document_loaders,
121 | exclude_module_atts=EXCLUDED_LOADER_MODULE_OBJECTS,
122 | template_name_suffix="ExecuteNTimes",
123 | )
124 |
125 |
126 | def __getattr__(name: str) -> Template:
127 | """
128 | Only create a template if it's imported, this avoids creating all the base models for all templates
129 | and potential import errors due to not available packages.
130 | """
131 | if name in LangChainDataReaderBase.WrapperEntry.module_att_names:
132 | return make_dynamic_template(name, LangChainDataReaderBase)
133 | if name in ExecuteNTimesLangchainDataReaders.WrapperEntry.module_att_names:
134 | return make_dynamic_template(name, ExecuteNTimesLangchainDataReaders)
135 | raise AttributeError(f"template `{name}` not found in {__name__}")
136 |
137 |
138 | __all__ = (
139 | LangChainDataReaderBase.WrapperEntry.module_att_names
140 | + ExecuteNTimesLangchainDataReaders.WrapperEntry.module_att_names
141 | )
142 |
143 |
144 | if SINAPSIS_BUILD_DOCS:
145 | dynamic_templates = [__getattr__(template_name) for template_name in __all__]
146 | for template in dynamic_templates:
147 | globals()[template.__name__] = template
148 | del template
149 |
--------------------------------------------------------------------------------
/packages/sinapsis_langchain_splitters/.python-version:
--------------------------------------------------------------------------------
1 | 3.10.12
--------------------------------------------------------------------------------
/packages/sinapsis_langchain_splitters/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 | Sinapsis Langchain Splitters
9 |
10 |
11 |
12 | Templates for easy integration of LangChain text splitters with Sinapsis.
13 |
14 |
15 | 🐍 Installation •
16 | 🚀 Features •
17 | 📚 Usage example •
18 | 📙 Documentation •
19 | 🔍 License
20 |
21 |
22 | The `sinapsis-langchain-splitters` module adds support for all the text splitters supported by LangChain
23 |
24 | 🐍 Installation
25 | Install using your package manager of choice. We encourage the use of uv
26 |
27 | Example with uv
:
28 |
29 | ```bash
30 | uv pip install sinapsis-langchain-splitters --extra-index-url https://pypi.sinapsis.tech
31 | ```
32 | or with raw pip
:
33 | ```bash
34 | pip install sinapsis-langchain-splitters --extra-index-url https://pypi.sinapsis.tech
35 | ```
36 |
37 |
38 |
39 | > [!IMPORTANT]
40 | > The langchain readers templates may require extra dependencies. For development, we recommend installing the package with all the optional dependencies:
41 | >
42 | ```bash
43 | uv pip install sinapsis-langchain-readers[all] --extra-index-url https://pypi.sinapsis.tech
44 | ```
45 | > [!IMPORTANT]
46 | > Some langchain templates require additional system dependencies. Please refer to the official [LangChain Document Loaders documentation](https://python.langchain.com/docs/integrations/document_loaders/) for additional requirements.
47 | >
48 |
49 |
50 | 🚀 Features
51 |
52 | Templates Supported
53 |
54 | The **Sinapsis Langchain** module provides wrapper templates for **Langchain's community data loaders**, making them seamlessly usable within Sinapsis.
55 | > [!NOTE]
56 | > Each loader template supports one attribute:
57 | > - **`add_document_as_text_packet`** (`bool`, default: `False`): Whether to add the loaded document as a text packet.
58 | > Other attributes can be dynamically assigned through the class initialization dictionary (`class init attributes`).
59 |
60 | > [!TIP]
61 | > Use CLI command ``` sinapsis info --all-template-names``` to show a list with all the available Template names installed with Sinapsis Langchain.
62 |
63 | > [!TIP]
64 | > Use CLI command ```sinapsis info --example-template-config TEMPLATE_NAME``` to produce an example Agent config for the Template specified in ***TEMPLATE_NAME***.
65 |
66 |
67 | For example, for ***WikipediaLoaderWrapper*** use ```sinapsis info --example-template-config WikipediaLoaderWrapper``` to produce the following example config:
68 |
69 | ```yaml
70 | agent:
71 | name: my_test_agent
72 | description: Agent to split text using the RecursiveCharacterTextSplitter class from LangChain
73 | templates:
74 | - template_name: InputTemplate
75 | class_name: InputTemplate
76 | attributes: {}
77 | - template_name: RecursiveCharacterTextSplitterWrapper
78 | class_name: RecursiveCharacterTextSplitterWrapper
79 | template_input: InputTemplate
80 | attributes:
81 | add_document_as_text_packet: false
82 | generic_key: WikipediaLoaderWrapper
83 | recursivecharactertextsplitter_init:
84 | separators: [" "]
85 | keep_separator: true
86 | is_separator_regex: false
87 |
88 | ```
89 |
90 | A complete list of available document loader classes in LangChain can be found at:
91 | [LangChain Text Splitters](https://python.langchain.com/api_reference/text_splitters/index.html)
92 |
93 |
94 | 📚 Usage example
95 |
96 |
97 | The following example demonstrates how to use the **RecursiveCharacterTextSplitterWrapper** template to chunk the Documents from a WikipediaLoaderWrapper template.
98 |
99 | configuration
100 |
101 | ```yaml
102 | agent:
103 | name: my_test_agent
104 | description: "Wikipedia loader example"
105 |
106 | templates:
107 |
108 | - template_name: InputTemplate
109 | class_name: InputTemplate
110 | attributes: {}
111 |
112 | - template_name: WikipediaLoaderWrapper
113 | class_name: WikipediaLoaderWrapper
114 | template_input: InputTemplate
115 | attributes:
116 | add_document_as_text_packet: false
117 | wikipedialoader_init:
118 | query: GenAI
119 | lang: en
120 | load_max_docs: 1
121 | load_all_available_meta: false
122 | doc_content_chars_max: 4000
123 | - template_name: InputTemplate
124 | class_name: WikipediaLoaderWrapper
125 | attributes: {}
126 | - template_name: RecursiveCharacterTextSplitterWrapper
127 | class_name: RecursiveCharacterTextSplitterWrapper
128 | template_input: InputTemplate
129 | attributes:
130 | add_document_as_text_packet: false
131 | generic_key: WikipediaLoaderWrapper
132 | recursivecharactertextsplitter_init:
133 | separators: null
134 | keep_separator: true
135 | is_separator_regex: false
136 |
137 | ```
138 | To run, simply use:
139 |
140 | ```bash
141 | sinapsis run name_of_the_config.yml
142 | ```
143 |
144 |
145 |
146 |
147 | 📙 Documentation
148 |
149 | Documentation for this and other sinapsis packages is available on the [sinapsis website](https://docs.sinapsis.tech/docs)
150 |
151 | Tutorials for different projects within sinapsis are available at [sinapsis tutorials page](https://docs.sinapsis.tech/tutorials)
152 |
153 | 🔍 License
154 |
155 | This project is licensed under the AGPLv3 license, which encourages open collaboration and sharing. For more details, please refer to the [LICENSE](LICENSE) file.
156 |
157 | For commercial use, please refer to our [official Sinapsis website](https://sinapsis.tech) for information on obtaining a commercial license.
158 |
159 |
160 |
161 |
--------------------------------------------------------------------------------
/packages/sinapsis_langchain_splitters/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "sinapsis-langchain-splitters"
3 | version = "0.1.3"
4 | description = "Add your description here"
5 | readme = "README.md"
6 | requires-python = ">=3.10"
7 | authors = [{ name = "SinapsisAI", email = "dev@sinapsis.tech" }]
8 | license-files = ["LICENSE"]
9 | dependencies = [
10 | "langchain-community>=0.3.20",
11 | "langchain-text-splitters>=0.3.7",
12 | "sinapsis>=0.1.1",
13 | ]
14 |
15 | [[tool.uv.index]]
16 | url = "https://pypi.sinapsis.tech/"
17 | [dependency-groups]
18 | dev = [
19 | "ruff>=0.8.3",
20 | "pre-commit>=4.0.1",
21 | ]
22 |
23 | [tool.ruff]
24 | lint.select = [
25 | "ARG",
26 | "ANN",
27 | "BLE",
28 | "C4",
29 | "E",
30 | "F",
31 | "FIX",
32 | "FLY",
33 | "I",
34 | "PERF",
35 | "PIE",
36 | "RUF",
37 | "RSE",
38 | "SIM",
39 | "SLOT",
40 | "T10",
41 | "T20",
42 | "TD",
43 | "TID",
44 | ]
45 |
46 |
47 | lint.ignore = ['ANN401']
48 | line-length = 120
49 | show-fixes = true
50 |
51 |
52 | [build-system]
53 | requires = ["setuptools"]
54 | build-backend = "setuptools.build_meta"
55 |
56 | [project.urls]
57 | Homepage = "https://sinapsis.tech"
58 | Documentation = "https://docs.sinapsis.tech/docs"
59 | Tutorials = "https://docs.sinapsis.tech/tutorials"
60 | Repository = "https://github.com/Sinapsis-AI/sinapsis-langchain.git"
61 |
62 |
--------------------------------------------------------------------------------
/packages/sinapsis_langchain_splitters/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sinapsis-AI/sinapsis-langchain/cf77ba025d0e2d42dadfc8a0e321953054ca7c18/packages/sinapsis_langchain_splitters/src/__init__.py
--------------------------------------------------------------------------------
/packages/sinapsis_langchain_splitters/src/sinapsis_langchain_splitters/templates/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import importlib
3 | from typing import Callable, cast
4 |
5 | from sinapsis.templates import _import_template_package
6 |
7 | _root_lib_path = "sinapsis_langchain_splitters.templates"
8 |
9 | _template_lookup: dict = {}
10 |
11 | _ADDITIONAL_TEMPLATE_MODULES = [
12 | f"{_root_lib_path}.langchain_text_splitters",
13 | ]
14 | for t_module in _ADDITIONAL_TEMPLATE_MODULES:
15 | _template_lookup |= _import_template_package(t_module)
16 |
17 |
18 | def __getattr__(name: str) -> Callable:
19 | if name in _template_lookup:
20 | module = importlib.import_module(_template_lookup[name])
21 | return cast(Callable, getattr(module, name))
22 | raise AttributeError(f"template `{name}` not found in {_root_lib_path}")
23 |
24 |
25 | __all__ = list(_template_lookup.keys())
26 |
--------------------------------------------------------------------------------
/packages/sinapsis_langchain_splitters/src/sinapsis_langchain_splitters/templates/langchain_text_splitters.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from langchain import text_splitter
3 | from langchain_core.documents.base import Document
4 | from sinapsis_core.data_containers.data_packet import DataContainer, TextPacket
5 | from sinapsis_core.template_base import Template
6 | from sinapsis_core.template_base.base_models import OutputTypes, TemplateAttributes, UIPropertiesMetadata
7 | from sinapsis_core.template_base.dynamic_template import (
8 | BaseDynamicWrapperTemplate,
9 | WrapperEntryConfig,
10 | )
11 | from sinapsis_core.template_base.dynamic_template_factory import make_dynamic_template
12 | from sinapsis_core.utils.env_var_keys import SINAPSIS_BUILD_DOCS
13 |
14 | SOURCE: str = "source"
15 | CONTENT: str = "content"
16 |
17 |
18 | class LangChainTextSplitterBase(BaseDynamicWrapperTemplate):
19 | """
20 | Dynamic templates for LangChain library to split texts using different classes from
21 | the text_splitter list
22 | The template takes a Document from the generic_data field of DataContainers or the text
23 | in a TextPacket, and process the chunks, creating dictionaries with source and context keys
24 |
25 | Usage example
26 |
27 | agent:
28 | name: my_test_agent
29 | templates:
30 | - template_name: InputTemplate
31 | class_name: InputTemplate
32 | attributes: {}
33 | - template_name: RecursiveCharacterTextSplitterWrapper
34 | class_name: RecursiveCharacterTextSplitterWrapper
35 | template_input: InputTemplate
36 | attributes:
37 | add_document_as_text_packet: false
38 | generic_key: null
39 | recursivecharactertextsplitter_init:
40 | separators: null
41 | keep_separator: true
42 | is_separator_regex: false
43 |
44 |
45 | """
46 |
47 | WrapperEntry = WrapperEntryConfig(wrapped_object=text_splitter, exclude_module_atts=["split_text_on_tokens"])
48 | UIProperties = UIPropertiesMetadata(category="LangChain", output_type=OutputTypes.TEXT)
49 |
50 | class AttributesBaseModel(TemplateAttributes):
51 | """
52 | add_document_as_text_packet(bool): Whether to add the document as a TextPacket or not.
53 | generic_key : str | list[str] | None: Optional generic key to retrieve the data from
54 | """
55 |
56 | add_document_as_text_packet: bool = False
57 | generic_key: str | list[str] | None = None
58 |
59 | def split_document(
60 | self, text_to_split: Document | list[Document] | TextPacket, chunks: list[dict] | None = None
61 | ) -> list:
62 | """For the document entry split the text with the langchain text_splitter
63 | Args:
64 | text_to_split (Document | list[Document] | TextPacket): The text to split. Can be of type
65 | Langchain Document, a list of Langchain Documents or a TextPacket
66 | chunks (list | None): The incoming list of chunks if any.
67 | Returns:
68 | list : list of processed chunks
69 | """
70 | if not chunks:
71 | chunks = []
72 | if isinstance(text_to_split, Document):
73 | chunks.append(
74 | {
75 | SOURCE: text_to_split.metadata[SOURCE],
76 | CONTENT: self.wrapped_callable(text_to_split.page_content),
77 | }
78 | )
79 |
80 | elif isinstance(text_to_split, list):
81 | chunks.extend(
82 | [
83 | {SOURCE: text.metadata[SOURCE], CONTENT: self.wrapped_callable.split_text(text.page_content)}
84 | for text in text_to_split
85 | ]
86 | )
87 | else:
88 | if text_to_split:
89 | chunks.append({SOURCE: text_to_split.source, CONTENT: self.wrapped_callable(text_to_split.content)})
90 | return chunks
91 |
92 | def execute(self, container: DataContainer) -> DataContainer:
93 | """Execute method of the template. It extracts the generic_data field of the container and
94 | uses a TextSplitter from Langchain to split the text into chunks, to later be stored in the container"""
95 | chunks: list[dict] = []
96 | if self.attributes.generic_key:
97 | document = self._get_generic_data(container)
98 | chunks = self.split_document(document, chunks)
99 | else:
100 | for text in container.texts:
101 | chunks = self.split_document(text, chunks)
102 | self._set_generic_data(container, chunks)
103 | return container
104 |
105 |
106 | def __getattr__(name: str) -> Template:
107 | """Only create a template if it's imported, this avoids creating all the base models for all templates
108 | and potential import errors due to not available packages.
109 | """
110 | if name in LangChainTextSplitterBase.WrapperEntry.module_att_names:
111 | return make_dynamic_template(name, LangChainTextSplitterBase)
112 |
113 | raise AttributeError(f"template `{name}` not found in {__name__}")
114 |
115 |
116 | __all__ = LangChainTextSplitterBase.WrapperEntry.module_att_names
117 |
118 |
119 | if SINAPSIS_BUILD_DOCS:
120 | dynamic_templates = [__getattr__(template_name) for template_name in __all__]
121 | for template in dynamic_templates:
122 | globals()[template.__name__] = template
123 | del template
124 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "sinapsis-langchain"
3 | version = "0.2.4"
4 | description = "Sinapsis module that adds support for the Langchain library"
5 | authors = [{ name = "SinapsisAI", email = "dev@sinapsis.tech" }]
6 | readme = "README.md"
7 | license-files = ["LICENSE"]
8 | requires-python = ">=3.10"
9 | dependencies = [
10 | "sinapsis>=0.1.1",
11 | ]
12 |
13 | [project.optional-dependencies]
14 | all = [
15 | "sinapsis-langchain-readers[all]",
16 | "sinapsis-langchain-splitters"
17 | ]
18 |
19 |
20 | [tool.uv.workspace]
21 | members = ["packages/*"]
22 | exclude = ["packages/*.egg-info"]
23 |
24 |
25 | [tool.uv.sources]
26 | sinapsis-langchain-readers = { workspace = true }
27 | sinapsis-langchain-splitters = { workspace = true }
28 |
29 | [[tool.uv.index]]
30 | url = "https://pypi.sinapsis.tech/"
31 | [dependency-groups]
32 | dev = [
33 | "ruff>=0.8.3",
34 | "pre-commit>=4.0.1",
35 | ]
36 |
37 | [tool.ruff]
38 | lint.select = [
39 | "ARG",
40 | "ANN",
41 | "BLE",
42 | "C4",
43 | "E",
44 | "F",
45 | "FIX",
46 | "FLY",
47 | "I",
48 | "PERF",
49 | "PIE",
50 | "RUF",
51 | "RSE",
52 | "SIM",
53 | "SLOT",
54 | "T10",
55 | "T20",
56 | "TD",
57 | "TID",
58 | ]
59 |
60 |
61 | lint.ignore = ['ANN401']
62 | line-length = 120
63 | show-fixes = true
64 |
65 | [tool.setuptools]
66 | packages = { find = { where = ["packages"] } }
67 |
68 | [build-system]
69 | requires = ["setuptools"]
70 | build-backend = "setuptools.build_meta"
71 |
72 | [project.urls]
73 | Homepage = "https://sinapsis.tech"
74 | Documentation = "https://docs.sinapsis.tech/docs"
75 | Tutorials = "https://docs.sinapsis.tech/tutorials"
76 | Repository = "https://github.com/Sinapsis-AI/sinapsis-langchain.git"
77 |
78 |
--------------------------------------------------------------------------------