├── .difyignore
├── .env.example
├── .gitignore
├── GUIDE.md
├── LICENSE
├── PRIVACY.md
├── README.md
├── _assets
    └── icon.svg
├── img
    ├── actions.png
    ├── retrieve.png
    └── upload.png
├── knowledge-manager.difypkg
├── knowledge-upload.difypkg
├── main.py
├── manifest.yaml
├── plugin.yaml
├── provider
    ├── knowledge.py
    └── knowledge.yaml
├── requirements.txt
├── test_tool.py
├── test_tool.py.combined
├── test_tool.py.retrieve
├── test_tool.py.upload
└── tools
    ├── knowledge_retrieve.py
    ├── knowledge_retrieve.yaml
    ├── knowledge_upload.py
    └── knowledge_upload.yaml


/.difyignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # Distribution / packaging
  7 | .Python
  8 | build/
  9 | develop-eggs/
 10 | dist/
 11 | downloads/
 12 | eggs/
 13 | .eggs/
 14 | lib/
 15 | lib64/
 16 | parts/
 17 | sdist/
 18 | var/
 19 | wheels/
 20 | share/python-wheels/
 21 | *.egg-info/
 22 | .installed.cfg
 23 | *.egg
 24 | MANIFEST
 25 | 
 26 | # PyInstaller
 27 | #  Usually these files are written by a python script from a template
 28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 29 | *.manifest
 30 | *.spec
 31 | 
 32 | # Installer logs
 33 | pip-log.txt
 34 | pip-delete-this-directory.txt
 35 | 
 36 | # Unit test / coverage reports
 37 | htmlcov/
 38 | .tox/
 39 | .nox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *.cover
 46 | *.py,cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | cover/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | local_settings.py
 58 | db.sqlite3
 59 | db.sqlite3-journal
 60 | 
 61 | # Flask stuff:
 62 | instance/
 63 | .webassets-cache
 64 | 
 65 | # Scrapy stuff:
 66 | .scrapy
 67 | 
 68 | # Sphinx documentation
 69 | docs/_build/
 70 | 
 71 | # PyBuilder
 72 | .pybuilder/
 73 | target/
 74 | 
 75 | # Jupyter Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # IPython
 79 | profile_default/
 80 | ipython_config.py
 81 | 
 82 | # pyenv
 83 | #   For a library or package, you might want to ignore these files since the code is
 84 | #   intended to run in multiple environments; otherwise, check them in:
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | Pipfile.lock
 93 | 
 94 | # UV
 95 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 96 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
 97 | #   commonly ignored for libraries.
 98 | uv.lock
 99 | 
100 | # poetry
101 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
103 | #   commonly ignored for libraries.
104 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105 | poetry.lock
106 | 
107 | # pdm
108 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109 | #pdm.lock
110 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111 | #   in version control.
112 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
113 | .pdm.toml
114 | .pdm-python
115 | .pdm-build/
116 | 
117 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
118 | __pypackages__/
119 | 
120 | # Celery stuff
121 | celerybeat-schedule
122 | celerybeat.pid
123 | 
124 | # SageMath parsed files
125 | *.sage.py
126 | 
127 | # Environments
128 | .env
129 | .venv
130 | env/
131 | venv/
132 | ENV/
133 | env.bak/
134 | venv.bak/
135 | 
136 | # Spyder project settings
137 | .spyderproject
138 | .spyproject
139 | 
140 | # Rope project settings
141 | .ropeproject
142 | 
143 | # mkdocs documentation
144 | /site
145 | 
146 | # mypy
147 | .mypy_cache/
148 | .dmypy.json
149 | dmypy.json
150 | 
151 | # Pyre type checker
152 | .pyre/
153 | 
154 | # pytype static type analyzer
155 | .pytype/
156 | 
157 | # Cython debug symbols
158 | cython_debug/
159 | 
160 | # PyCharm
161 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
162 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
163 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
164 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
165 | .idea/
166 | 
167 | # Vscode
168 | .vscode/
169 | 
170 | # Git
171 | .git/
172 | .gitignore
173 | 
174 | # Mac
175 | .DS_Store
176 | 
177 | # Windows
178 | Thumbs.db
179 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | INSTALL_METHOD=remote
2 | REMOTE_INSTALL_HOST=debug.dify.ai
3 | REMOTE_INSTALL_PORT=5003
4 | REMOTE_INSTALL_KEY=********-****-****-****-************
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # UV
 98 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #uv.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 | 
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 | 
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 | 
127 | # SageMath parsed files
128 | *.sage.py
129 | 
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 | 
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 | 
143 | # Rope project settings
144 | .ropeproject
145 | 
146 | # mkdocs documentation
147 | /site
148 | 
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 | 
154 | # Pyre type checker
155 | .pyre/
156 | 
157 | # pytype static type analyzer
158 | .pytype/
159 | 
160 | # Cython debug symbols
161 | cython_debug/
162 | 
163 | # PyCharm
164 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
167 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | .idea/
169 | 
170 | # Vscode
171 | .vscode/
172 | 


--------------------------------------------------------------------------------
/GUIDE.md:
--------------------------------------------------------------------------------
  1 | # Dify知识库管理插件使用指南
  2 | 
  3 | ## 简介
  4 | 
  5 | Dify知识库管理插件是一个用于管理Dify知识库的综合工具。它支持创建知识库、上传文本内容以及从知识库检索信息，提供简单的界面来配置各种操作参数。
  6 | 
  7 | ## 安装前准备
  8 | 
  9 | 在安装和使用此插件前，您需要：
 10 | 
 11 | 1. 拥有Dify平台账号
 12 | 2. 获取知识库API Key
 13 | 
 14 | ## 安装步骤
 15 | 
 16 | 1. 在Dify平台中，点击右上角的"插件"按钮
 17 | 2. 选择"本地上传"选项
 18 | 3. 上传插件文件（.difypkg格式）
 19 | 4. 完成安装后，您需要配置API Key才能使用插件
 20 | 
 21 | ## 配置API Key
 22 | 
 23 | 1. 在插件列表中找到"Dify知识库管理"插件
 24 | 2. 点击"配置"按钮
 25 | 3. 在弹出的窗口中输入您的知识库API Key
 26 | 4. 点击"保存"按钮完成配置
 27 | 
 28 | ## 如何获取知识库API Key
 29 | 
 30 | 1. 登录Dify平台
 31 | 2. 导航到知识库页面
 32 | 3. 从左侧导航切换到**API访问**页面
 33 | 4. 在**API Keys**部分管理访问凭证
 34 | 5. 如果没有API Key，点击"创建"按钮生成一个新的API Key
 35 | 
 36 | ## 如何获取知识库ID
 37 | 
 38 | 知识库ID可以从知识库URL获取，例如：
 39 | `https://app.dify.ai/datasets/12345678-1234-1234-1234-123456789012`
 40 | 其中`12345678-1234-1234-1234-123456789012`就是知识库ID。
 41 | 
 42 | ## 使用方法
 43 | 
 44 | ### 上传功能
 45 | 
 46 | #### 在Agent应用中使用
 47 | 
 48 | 1. 创建或编辑一个Agent应用
 49 | 2. 在应用编排页面底部找到"工具"选项
 50 | 3. 选择"知识库上传"工具
 51 | 4. 配置工具参数：
 52 |    - 知识库名称：要创建的知识库名称
 53 |    - 描述：知识库的描述（可选）
 54 |    - 文档名称：要创建的文档名称
 55 |    - 文本内容：要上传的文本内容
 56 |    - 权限：选择知识库权限（仅自己或公开可读）
 57 |    - 索引技术：选择高质量(high_quality)或经济(economy)模式
 58 | 5. 保存配置
 59 | 
 60 | #### 在Chatflow/Workflow应用中使用
 61 | 
 62 | 1. 在工作流编排画布中，点击节点末尾的"+"号
 63 | 2. 选择"知识库上传"工具
 64 | 3. 将其连接到上游节点
 65 | 4. 配置工具参数
 66 | 5. 保存配置
 67 | 
 68 | ### 检索功能
 69 | 
 70 | #### 在Agent应用中使用
 71 | 
 72 | 1. 创建或编辑一个Agent应用
 73 | 2. 在应用编排页面底部找到"工具"选项
 74 | 3. 选择"知识库检索"工具
 75 | 4. 配置工具参数：
 76 |    - 知识库ID：要检索的知识库ID
 77 |    - 查询内容：在知识库中搜索的查询内容
 78 |    - 搜索方法：选择搜索方法（关键词检索、语义检索、全文检索或混合检索）
 79 |    - 启用重排序：是否启用搜索结果重排序
 80 |    - 结果数量：返回的结果数量
 81 |    - 启用分数阈值：是否启用分数阈值过滤
 82 |    - 分数阈值：结果的最小分数阈值
 83 | 5. 保存配置
 84 | 
 85 | #### 在Chatflow/Workflow应用中使用
 86 | 
 87 | 1. 在工作流编排画布中，点击节点末尾的"+"号
 88 | 2. 选择"知识库检索"工具
 89 | 3. 将其连接到上游节点
 90 | 4. 配置工具参数
 91 | 5. 保存配置
 92 | 
 93 | ## 上传功能参数说明
 94 | 
 95 | - **知识库名称**（必填）：要创建的知识库名称
 96 | - **描述**（可选）：知识库的描述
 97 | - **文档名称**（必填）：要创建的文档名称
 98 | - **文本内容**（必填）：要上传的文本内容
 99 | - **权限**（必填）：知识库的权限设置，可选值：
100 |   - `only_me`：仅自己可见，只有创建者可以访问
101 |   - `publicly_readable`：公开可读，所有人可以读取
102 | - **索引技术**（必填）：选择索引技术，可选值：
103 |   - `high_quality`：高质量模式，提供更准确的检索结果，但处理时间较长
104 |   - `economy`：经济模式，处理速度较快，但检索准确性可能略低
105 | 
106 | ## 检索功能参数说明
107 | 
108 | - **知识库ID**（必填）：要检索的知识库ID
109 | - **查询内容**（必填）：在知识库中搜索的查询内容
110 | - **搜索方法**（可选，默认为语义检索）：用于搜索知识库的方法，可选值：
111 |   - `keyword_search`：关键词检索，基于关键词匹配
112 |   - `semantic_search`：语义检索，基于语义理解
113 |   - `full_text_search`：全文检索，搜索整个文本内容
114 |   - `hybrid_search`：混合检索，结合关键词和语义检索
115 | - **启用重排序**（可选，默认为否）：是否启用搜索结果重排序
116 | - **结果数量**（可选，默认为3）：返回的结果数量
117 | - **启用分数阈值**（可选，默认为否）：是否启用分数阈值过滤
118 | - **分数阈值**（可选，默认为0.5）：结果的最小分数阈值，范围0-1
119 | 
120 | ## 上传工作流程
121 | 
122 | 1. 创建新的知识库：插件会根据提供的名称和描述创建一个新的知识库
123 | 2. 创建文档：在知识库中创建一个空文档
124 | 3. 上传文本内容：将文本内容上传到创建的文档中
125 | 4. 处理文档：Dify平台会自动处理上传的文本内容，包括文本分段和索引
126 | 5. 返回结果：插件会返回处理状态和结果
127 | 
128 | ## 检索工作流程
129 | 
130 | 1. 提供知识库ID和查询内容
131 | 2. 选择搜索方法和其他参数
132 | 3. 执行知识库检索
133 | 4. 返回检索结果和相关信息
134 | 
135 | ## 搜索方法说明
136 | 
137 | - **关键词检索**：基于关键词匹配，适合精确查找特定术语或短语
138 | - **语义检索**：基于语义理解，能够找到语义相关但可能不包含完全相同关键词的内容
139 | - **全文检索**：搜索整个文本内容，适合需要在大量文本中查找信息
140 | - **混合检索**：结合关键词和语义检索，提供更全面的搜索结果
141 | 
142 | ## 常见问题
143 | 
144 | ### 上传或检索失败，提示"API Key validation failed"
145 | 
146 | 请检查您提供的API Key是否正确，以及是否有足够的权限访问知识库。
147 | 
148 | ### 上传成功后，文档在知识库中不可见
149 | 
150 | 文档上传后需要一段时间进行处理和索引。您可以在知识库页面查看文档的处理状态。
151 | 
152 | ### 检索失败，提示"知识库不存在或无权访问"
153 | 
154 | 请确保您提供的知识库ID是正确的，并且您的API Key有权限访问该知识库。
155 | 
156 | ### 检索结果不符合预期
157 | 
158 | 尝试调整搜索方法、启用重排序或调整分数阈值，以获得更符合预期的结果。
159 | 
160 | ## 技术支持
161 | 
162 | 如果您在使用过程中遇到任何问题，请联系插件作者或在Dify社区论坛中提问。


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Steven Lynn
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/PRIVACY.md:
--------------------------------------------------------------------------------
 1 | # 隐私政策
 2 | 
 3 | ## 数据收集和使用
 4 | 
 5 | 本插件需要访问以下信息：
 6 | 
 7 | 1. **Dify知识库API Key**：用于验证用户身份并授权访问Dify知识库。API Key存储在用户的Dify工作区中，不会被插件保存或传输到其他地方。
 8 | 
 9 | 2. **文本内容**：插件需要读取用户提供的文本内容，以便上传到Dify知识库。文本内容仅用于上传目的，不会被插件保存或用于其他目的。
10 | 
11 | 3. **查询内容**：插件需要读取用户提供的查询内容，以便在Dify知识库中进行检索。查询内容仅用于检索目的，不会被插件保存或用于其他目的。
12 | 
13 | 4. **知识库ID**：插件需要知道用户想要检索的知识库ID。知识库ID仅用于检索目的，不会被插件保存或用于其他目的。
14 | 
15 | ## 数据传输
16 | 
17 | 所有数据传输都通过HTTPS协议进行加密。插件直接与Dify API通信，不经过任何第三方服务器。
18 | 
19 | ## 数据存储
20 | 
21 | 插件本身不存储任何用户数据。所有上传的文本内容都存储在用户的Dify知识库中，受Dify平台的隐私政策保护。所有检索操作都是实时进行的，结果直接返回给用户，不会在插件中保存。
22 | 
23 | ## 第三方服务
24 | 
25 | 本插件使用Dify API服务。使用本插件即表示您同意Dify的服务条款和隐私政策。


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Dify Knowledge Base Manager Plugin
  2 | 
  3 | A plugin for managing Dify knowledge base - creating knowledge bases, uploading text content, and retrieving information.
  4 | ![](./img/actions.png)
  5 | 
  6 | ## Prerequisites
  7 | 
  8 | Before using this plugin, you need:
  9 | 
 10 | 1. A Dify platform account
 11 | 2. Knowledge base API Key
 12 | 
 13 | ## How to Get API Key
 14 | 
 15 | 1. Log in to [Dify.ai](https://cloud.dify.ai)
 16 | 2. Navigate to the knowledge base page
 17 | 3. Switch to the **API Access** page from the left navigation
 18 | 4. Manage access credentials in the **API Keys** section
 19 | <iframe
 20 |   src="https://www.motionshot.app/walkthrough/67d03b3a6058d05660c65e41/embed?hideSteps=1&hideAsSteps=1&hideDownload=1&hideCopy=1&fullscreen=1"
 21 |   width="100%"
 22 |   style='border: 1px solid rgba(0,0,0,0.16); border-radius: 20px; min-height: 740px'
 23 | ></iframe>
 24 | 
 25 | ## How to Get Knowledge Base ID
 26 | 
 27 | The knowledge base ID can be obtained from the knowledge base URL, for example:
 28 | `https://app.dify.ai/datasets/12345678-1234-1234-1234-123456789012`
 29 | where `12345678-1234-1234-1234-123456789012` is the knowledge base ID.
 30 | 
 31 | ## Upload Tool Parameters
 32 | 
 33 | ![](./img/upload.png)
 34 | - **Knowledge Base Name**: The name of the knowledge base to create
 35 | - **Description**: Description of the knowledge base (optional)
 36 | - **Document Name**: The name of the document to create
 37 | - **Text Content**: The text content to upload
 38 | - **Permission**: Knowledge base permission settings
 39 |   - only_me: Only the creator can access
 40 |   - publicly_readable: Everyone can read
 41 | - **Indexing Technology**: Choose high_quality or economy mode
 42 | 
 43 | ## Retrieve Tool Parameters
 44 | 
 45 | ![](./img/retrieve.png)
 46 | - **Knowledge Base ID**: The ID of the knowledge base to retrieve from (required)
 47 | - **Query**: The query to search for in the knowledge base (required)
 48 | - **Search Method**: The method to use for searching the knowledge base (optional, default is semantic search)
 49 |   - keyword_search: Keyword search, based on keyword matching
 50 |   - semantic_search: Semantic search, based on semantic understanding
 51 |   - full_text_search: Full text search, searches the entire text content
 52 |   - hybrid_search: Hybrid search, combines keyword and semantic search
 53 | - **Enable Reranking**: Whether to enable reranking of search results (optional, default is false)
 54 | - **Number of Results**: The number of results to return (optional, default is 3)
 55 | - **Enable Score Threshold**: Whether to enable score threshold filtering (optional, default is false)
 56 | - **Score Threshold**: The minimum score threshold for results (0-1) (optional, default is 0.5)
 57 | 
 58 | ## Upload Output
 59 | 
 60 | The upload tool returns a JSON response with the following structure:
 61 | ```json
 62 | {
 63 |   "status": "success",
 64 |   "knowledge_base_id": "12345678-1234-1234-1234-123456789012",
 65 |   "knowledge_base": {
 66 |     "id": "12345678-1234-1234-1234-123456789012",
 67 |     "name": "Knowledge Base Name"
 68 |   },
 69 |   "document": {
 70 |     "id": "document-id",
 71 |     "name": "Document Name",
 72 |     "batch": "batch-id",
 73 |     "status": "completed"
 74 |   }
 75 | }
 76 | ```
 77 | 
 78 | ## Retrieve Output
 79 | 
 80 | The retrieve tool returns a JSON response with the following structure:
 81 | ```json
 82 | {
 83 |   "status": "success",
 84 |   "query": "Query content",
 85 |   "knowledge_base_id": "Knowledge base ID",
 86 |   "results": [
 87 |     {
 88 |       "segment": {
 89 |         "id": "Segment ID",
 90 |         "content": "Segment content",
 91 |         "document": {
 92 |           "id": "Document ID",
 93 |           "name": "Document name"
 94 |         }
 95 |       },
 96 |       "score": 0.95
 97 |     }
 98 |   ]
 99 | }
100 | ```
101 | 
102 | The `knowledge_base_id` field can be used for further operations with the knowledge base.
103 | 
104 | ## Notes
105 | 
106 | - Text content requires some time for processing and indexing after upload
107 | - Processing large amounts of text may take longer
108 | - If processing is not complete, the plugin will return the current status, and you can check the processing results later on the Dify platform
109 | - Text content will be automatically segmented for processing, using automatic mode by default
110 | 
111 | ## Supported File Formats
112 | 
113 | - Text files (.txt)
114 | - PDF files (.pdf)
115 | - Word documents (.doc, .docx)
116 | - Markdown files (.md, .markdown)
117 | - HTML files (.html, .htm)
118 | - Excel files (.xlsx)
119 | - CSV files (.csv)
120 | 
121 | 
122 | 
123 | 


--------------------------------------------------------------------------------
/_assets/icon.svg:
--------------------------------------------------------------------------------
1 | <svg width="100" height="100" xmlns="http://www.w3.org/2000/svg">
2 |   <rect x="20" y="20" width="60" height="60" rx="5" ry="5" fill="#4A90E2" />
3 |   <rect x="30" y="30" width="40" height="30" rx="2" ry="2" fill="white" />
4 |   <path d="M35 40 H65 M35 45 H55 M35 50 H60" stroke="#4A90E2" stroke-width="2" />
5 |   <path d="M50 65 L50 80 M40 70 L50 65 L60 70" fill="none" stroke="white" stroke-width="3" />
6 | </svg>


--------------------------------------------------------------------------------
/img/actions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stvlynn/better-knowledge-base/d78e166819ff14e774f8e612ca7e4b1255b59563/img/actions.png


--------------------------------------------------------------------------------
/img/retrieve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stvlynn/better-knowledge-base/d78e166819ff14e774f8e612ca7e4b1255b59563/img/retrieve.png


--------------------------------------------------------------------------------
/img/upload.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stvlynn/better-knowledge-base/d78e166819ff14e774f8e612ca7e4b1255b59563/img/upload.png


--------------------------------------------------------------------------------
/knowledge-manager.difypkg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stvlynn/better-knowledge-base/d78e166819ff14e774f8e612ca7e4b1255b59563/knowledge-manager.difypkg


--------------------------------------------------------------------------------
/knowledge-upload.difypkg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stvlynn/better-knowledge-base/d78e166819ff14e774f8e612ca7e4b1255b59563/knowledge-upload.difypkg


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | from dify_plugin import Plugin, DifyPluginEnv
2 | 
3 | plugin = Plugin(DifyPluginEnv(MAX_REQUEST_TIMEOUT=120))
4 | 
5 | if __name__ == '__main__':
6 |     plugin.run()
7 | 


--------------------------------------------------------------------------------
/manifest.yaml:
--------------------------------------------------------------------------------
 1 | version: 0.0.1
 2 | type: plugin
 3 | author: stvlynn
 4 | name: knowledge-manager
 5 | label:
 6 |   en_US: Dify Knowledge Base Manager
 7 |   ja_JP: Dify Knowledge Base Manager
 8 |   zh_Hans: Dify知识库管理器
 9 |   pt_BR: Dify Knowledge Base Manager
10 | description:
11 |   en_US: A tool to manage Dify Knowledge Base - create, upload text content, and retrieve information.
12 |   ja_JP: A tool to manage Dify Knowledge Base - create, upload text content, and retrieve information.
13 |   zh_Hans: 一个管理Dify知识库的工具 - 创建、上传文本内容和检索信息。
14 |   pt_BR: A tool to manage Dify Knowledge Base - create, upload text content, and retrieve information.
15 | icon: icon.svg
16 | resource:
17 |   memory: 268435456
18 |   permission: {}
19 | plugins:
20 |   tools:
21 |     - provider/knowledge.yaml
22 | meta:
23 |   version: 0.0.1
24 |   arch:
25 |     - amd64
26 |     - arm64
27 |   runner:
28 |     language: python
29 |     version: "3.12"
30 |     entrypoint: main
31 | created_at: 2025-03-04T23:40:41.394653+08:00
32 | privacy: PRIVACY.md
33 | verified: false
34 | 


--------------------------------------------------------------------------------
/plugin.yaml:
--------------------------------------------------------------------------------
 1 | name: knowledge-manager
 2 | version: 0.0.1
 3 | description:
 4 |   en_US: Manage Dify knowledge base - create, upload text content, and retrieve information
 5 |   zh_Hans: 管理Dify知识库 - 创建、上传文本内容和检索信息
 6 | author: stvlynn
 7 | avatar: ./_assets/icon.svg
 8 | category: knowledge
 9 | type: tool
10 | env:
11 |   - name: DIFY_KNOWLEDGE_API_KEY
12 |     description:
13 |       en_US: Dify Knowledge API Key
14 |       zh_Hans: Dify知识库API密钥
15 |     type: secret
16 |     required: true
17 |     help:
18 |       en_US: You can get the API Key from the API Access page in the Dify knowledge base
19 |       zh_Hans: 您可以从Dify知识库的API访问页面获取API密钥 


--------------------------------------------------------------------------------
/provider/knowledge.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | import requests
 3 | import os
 4 | 
 5 | from dify_plugin import ToolProvider
 6 | from dify_plugin.errors.tool import ToolProviderCredentialValidationError
 7 | 
 8 | 
 9 | class KnowledgeProvider(ToolProvider):
10 |     def _validate_credentials(self, credentials: dict[str, Any]) -> None:
11 |         try:
12 |             api_key = credentials.get('api_key')
13 |             if not api_key:
14 |                 raise ValueError("API Key is required")
15 |             
16 |             # 将API Key保存到环境变量中，以便工具可以访问
17 |             os.environ['DIFY_KNOWLEDGE_API_KEY'] = api_key
18 |             
19 |             # 尝试使用API Key获取知识库列表，验证API Key是否有效
20 |             headers = {
21 |                 'Authorization': f'Bearer {api_key}',
22 |                 'Content-Type': 'application/json'
23 |             }
24 |             
25 |             response = requests.get('https://api.dify.ai/v1/datasets?page=1&limit=1', headers=headers)
26 |             
27 |             if response.status_code != 200:
28 |                 error_data = response.json()
29 |                 raise ValueError(f"API Key validation failed: {error_data.get('message', 'Unknown error')}")
30 |                 
31 |         except Exception as e:
32 |             raise ToolProviderCredentialValidationError(str(e))
33 | 


--------------------------------------------------------------------------------
/provider/knowledge.yaml:
--------------------------------------------------------------------------------
 1 | identity:
 2 |   author: stvlynn
 3 |   name: knowledge
 4 |   label:
 5 |     en_US: Dify Knowledge Base
 6 |     zh_Hans: Dify 知识库
 7 |   description:
 8 |     en_US: A tool to manage Dify Knowledge Base - upload content and retrieve information
 9 |     zh_Hans: 一个管理Dify知识库的工具 - 上传内容和检索信息
10 |   icon: icon.svg
11 |   tags:
12 |     - productivity
13 |     - education
14 | credentials_for_provider:
15 |   api_key:
16 |     type: secret-input
17 |     required: true
18 |     label:
19 |       en_US: Dify Knowledge Base API Key
20 |       zh_Hans: Dify 知识库 API Key
21 |     placeholder:
22 |       en_US: Please input your Dify Knowledge Base API Key
23 |       zh_Hans: 请输入您的Dify知识库API Key
24 |     help:
25 |       en_US: Get your API Key from Dify Knowledge Base API Access page
26 |       zh_Hans: 从Dify知识库API访问页面获取您的API Key
27 | tools:
28 |   - tools/knowledge_upload.yaml
29 |   - tools/knowledge_retrieve.yaml
30 | extra:
31 |   python:
32 |     source: provider/knowledge.py
33 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | dify-plugin==0.0.1b70
2 | requests>=2.31.0
3 | 


--------------------------------------------------------------------------------
/test_tool.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import time
  4 | import requests
  5 | 
  6 | # Test parameters
  7 | knowledge_base_name = "Test KB"
  8 | description = ""
  9 | document_name = "Test Doc"
 10 | text_content = "This is a test content for the document."
 11 | permission = "only_me"
 12 | indexing_technique = "high_quality"
 13 | 
 14 | # Set API Key
 15 | api_key = "test_key"  # Replace with your actual API Key
 16 | 
 17 | # Set request headers
 18 | headers = {
 19 |     'Authorization': f'Bearer {api_key}',
 20 |     'Content-Type': 'application/json'
 21 | }
 22 | 
 23 | print("Starting API request test...")
 24 | 
 25 | # Step 1: Create knowledge base
 26 | print("Step 1: Creating knowledge base")
 27 | url = "https://api.dify.ai/v1/datasets"
 28 | payload = {
 29 |     "name": knowledge_base_name,
 30 |     "description": description,
 31 |     "permission": permission,
 32 |     "indexing_technique": indexing_technique,
 33 |     "provider": "vendor"
 34 | }
 35 | 
 36 | print(f"Request URL: {url}")
 37 | print(f"Request parameters: {payload}")
 38 | 
 39 | try:
 40 |     response = requests.post(url, headers=headers, json=payload)
 41 |     print(f"Response status code: {response.status_code}")
 42 |     print(f"Response content: {response.text}")
 43 |     
 44 |     if response.status_code == 200:
 45 |         result = response.json()
 46 |         dataset_id = result.get('id')
 47 |         print(f"Knowledge base created successfully, ID: {dataset_id}")
 48 |         
 49 |         # Step 2: Create document by text
 50 |         print("\nStep 2: Creating document")
 51 |         url = f"https://api.dify.ai/v1/datasets/{dataset_id}/document/create-by-text"
 52 |         
 53 |         # Prepare processing rules
 54 |         process_rule = {
 55 |             "mode": "automatic"
 56 |         }
 57 |         
 58 |         payload = {
 59 |             "name": document_name,
 60 |             "text": text_content,
 61 |             "indexing_technique": indexing_technique,
 62 |             "process_rule": process_rule
 63 |         }
 64 |         
 65 |         print(f"Request URL: {url}")
 66 |         print(f"Request parameters: {payload}")
 67 |         
 68 |         response = requests.post(url, headers=headers, json=payload)
 69 |         print(f"Response status code: {response.status_code}")
 70 |         print(f"Response content: {response.text}")
 71 |         
 72 |         if response.status_code == 200:
 73 |             result = response.json()
 74 |             document = result.get('document', {})
 75 |             document_id = document.get('id')
 76 |             batch = result.get('batch', '')
 77 |             print(f"Document created successfully, ID: {document_id}, Batch: {batch}")
 78 |             
 79 |             # Step 3: Check document processing status
 80 |             print("\nStep 3: Checking document processing status")
 81 |             url = f"https://api.dify.ai/v1/datasets/{dataset_id}/documents/{batch}/indexing-status"
 82 |             
 83 |             print(f"Request URL: {url}")
 84 |             
 85 |             response = requests.get(url, headers=headers)
 86 |             print(f"Response status code: {response.status_code}")
 87 |             print(f"Response content: {response.text}")
 88 |             
 89 |             if response.status_code == 200:
 90 |                 result = response.json()
 91 |                 documents = result.get('data', [])
 92 |                 if documents:
 93 |                     document = documents[0]
 94 |                     status = document.get('indexing_status', 'unknown')
 95 |                     print(f"Document status: {status}")
 96 |                     
 97 |                     # 测试输出JSON响应
 98 |                     print("\nStep 4: 输出最终JSON响应")
 99 |                     final_response = {
100 |                         "status": "success",
101 |                         "knowledge_base_id": dataset_id,  # 单独列出知识库ID作为顶级字段
102 |                         "knowledge_base": {
103 |                             "id": dataset_id,
104 |                             "name": knowledge_base_name
105 |                         },
106 |                         "document": {
107 |                             "id": document_id,
108 |                             "name": document_name,
109 |                             "batch": batch,
110 |                             "status": status
111 |                         }
112 |                     }
113 |                     print(f"最终JSON响应: {json.dumps(final_response, indent=2)}")
114 |                     print(f"知识库ID: {final_response['knowledge_base_id']}")
115 |                 else:
116 |                     print("No document status information found")
117 |             else:
118 |                 error_data = response.json()
119 |                 error_code = error_data.get('code', 'unknown_error')
120 |                 error_message = error_data.get('message', 'Unknown error')
121 |                 
122 |                 if error_code == "archived_document_immutable":
123 |                     print("Error: The archived document is not editable.")
124 |                 elif error_code == "document_already_finished":
125 |                     print("Error: The document has been processed. Please refresh the page or go to the document details.")
126 |                 elif error_code == "document_indexing":
127 |                     print("Error: The document is being processed and cannot be edited.")
128 |                 else:
129 |                     print(f"Error checking document status: {error_message}")
130 |         else:
131 |             error_data = response.json()
132 |             error_code = error_data.get('code', 'unknown_error')
133 |             error_message = error_data.get('message', 'Unknown error')
134 |             
135 |             if error_code == "no_file_uploaded":
136 |                 print("Error: Please upload your file.")
137 |             elif error_code == "too_many_files":
138 |                 print("Error: Only one file is allowed.")
139 |             elif error_code == "file_too_large":
140 |                 print("Error: File size exceeded.")
141 |             elif error_code == "unsupported_file_type":
142 |                 print("Error: File type not allowed.")
143 |             elif error_code == "high_quality_dataset_only":
144 |                 print("Error: Current operation only supports 'high-quality' datasets.")
145 |             elif error_code == "dataset_not_initialized":
146 |                 print("Error: The dataset is still being initialized or indexing. Please wait a moment.")
147 |             elif error_code == "invalid_metadata":
148 |                 print("Error: The metadata content is incorrect. Please check and verify.")
149 |             else:
150 |                 print(f"Error creating document: {error_message}")
151 |     else:
152 |         error_data = response.json()
153 |         error_code = error_data.get('code', 'unknown_error')
154 |         error_message = error_data.get('message', 'Unknown error')
155 |         
156 |         if error_code == "dataset_name_duplicate":
157 |             print("Error: The dataset name already exists. Please modify your dataset name.")
158 |         elif error_code == "invalid_action":
159 |             print("Error: Invalid action.")
160 |         else:
161 |             print(f"Error creating knowledge base: {error_message}")
162 | except Exception as e:
163 |     print(f"Error occurred during test: {str(e)}")
164 | 
165 | print("Test completed")
166 | 
167 | def test_knowledge_upload_tool():
168 |     """Test the KnowledgeUploadTool with the modified output"""
169 |     from tools.knowledge_upload import KnowledgeUploadTool
170 |     from dify_plugin.entities.tool import ToolInvokeMessage
171 |     from unittest.mock import MagicMock, patch
172 |     
173 |     print("\n=== Testing KnowledgeUploadTool with modified output ===")
174 |     
175 |     # Create mock objects for runtime and session
176 |     mock_runtime = MagicMock()
177 |     mock_session = MagicMock()
178 |     
179 |     # Create tool instance with mock objects
180 |     tool = KnowledgeUploadTool(mock_runtime, mock_session)
181 |     
182 |     # Set test parameters
183 |     tool_parameters = {
184 |         "knowledge_base_name": "Test KB",
185 |         "description": "Test description",
186 |         "document_name": "Test Doc",
187 |         "text": "This is a test content for the document.",
188 |         "permission": "only_me",
189 |         "indexing_technique": "high_quality"
190 |     }
191 |     
192 |     # Mock the API calls
193 |     tool._create_knowledge_base = lambda headers, name, desc, perm, tech: "test-dataset-id-123"
194 |     tool._create_document_by_text = lambda headers, dataset_id, doc_name, text, tech: {"id": "test-doc-id-456", "batch": "test-batch-789"}
195 |     tool._check_document_status = lambda headers, dataset_id, batch: "completed"
196 |     
197 |     # Mock the create_text_message and create_json_message methods
198 |     def mock_create_text_message(content):
199 |         mock_msg = MagicMock()
200 |         mock_msg.message_type = "text"
201 |         mock_msg.content = content
202 |         return mock_msg
203 |     
204 |     def mock_create_json_message(content):
205 |         mock_msg = MagicMock()
206 |         mock_msg.message_type = "json"
207 |         mock_msg.content = json.dumps(content)
208 |         return mock_msg
209 |     
210 |     tool.create_text_message = mock_create_text_message
211 |     tool.create_json_message = mock_create_json_message
212 |     
213 |     # Mock environment variable for API key
214 |     with patch.dict('os.environ', {'DIFY_KNOWLEDGE_API_KEY': 'test_api_key'}):
215 |         # Invoke the tool
216 |         messages = list(tool._invoke(tool_parameters))
217 |         
218 |         # Print all messages
219 |         for i, msg in enumerate(messages):
220 |             print(f"Message {i+1}: {msg.message_type}")
221 |             if msg.message_type == "json":
222 |                 json_content = json.loads(msg.content)
223 |                 print(f"JSON content: {json.dumps(json_content, indent=2)}")
224 |                 
225 |                 # Verify the output fields
226 |                 assert "id" in json_content, "Missing 'id' field in output"
227 |                 assert "status" in json_content, "Missing 'status' field in output"
228 |                 assert json_content["status"] == 200, "Status should be 200"
229 |                 assert json_content["id"] == "test-dataset-id-123", "ID should match the test dataset ID"
230 |                 
231 |                 print("✅ Output validation passed!")
232 |             else:
233 |                 print(f"Content: {msg.content}")
234 |     
235 |     print("=== Test completed ===\n")
236 | 
237 | if __name__ == "__main__":
238 |     # Run the new test for the modified output
239 |     test_knowledge_upload_tool()
240 |     
241 |     # Uncomment to run the original API test
242 |     # try:
243 |     #     # Step 1: Create knowledge base
244 |     #     # ... existing code ...
245 |     # except Exception as e:
246 |     #     print(f"Error occurred during test: {str(e)}")
247 |     # 
248 |     # print("Test completed") 


--------------------------------------------------------------------------------
/test_tool.py.combined:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | 


--------------------------------------------------------------------------------
/test_tool.py.retrieve:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import requests
  4 | 
  5 | # 测试参数
  6 | dataset_id = "your_dataset_id"  # 替换为您的知识库ID
  7 | query = "测试查询"
  8 | search_method = "semantic_search"
  9 | reranking_enable = False
 10 | top_k = 3
 11 | score_threshold_enabled = False
 12 | score_threshold = 0.5
 13 | 
 14 | # 设置API Key
 15 | api_key = "your_api_key"  # 替换为您的API Key
 16 | 
 17 | # 设置请求头
 18 | headers = {
 19 |     'Authorization': f'Bearer {api_key}',
 20 |     'Content-Type': 'application/json'
 21 | }
 22 | 
 23 | print("开始API请求测试...")
 24 | 
 25 | # 构建检索模型参数
 26 | retrieval_model = {
 27 |     "search_method": search_method,
 28 |     "reranking_enable": reranking_enable,
 29 |     "reranking_mode": None,
 30 |     "reranking_model": {
 31 |         "reranking_provider_name": "",
 32 |         "reranking_model_name": ""
 33 |     },
 34 |     "weights": None,
 35 |     "top_k": top_k,
 36 |     "score_threshold_enabled": score_threshold_enabled,
 37 |     "score_threshold": score_threshold if score_threshold_enabled else None
 38 | }
 39 | 
 40 | # 执行知识库检索
 41 | print(f"从知识库 {dataset_id} 检索与 '{query}' 相关的信息...")
 42 | 
 43 | try:
 44 |     url = f"https://api.dify.ai/v1/datasets/{dataset_id}/retrieve"
 45 |     
 46 |     payload = {
 47 |         "query": query,
 48 |         "retrieval_model": retrieval_model
 49 |     }
 50 |     
 51 |     print(f"请求URL: {url}")
 52 |     print(f"请求参数: {payload}")
 53 |     
 54 |     response = requests.post(url, headers=headers, json=payload)
 55 |     print(f"响应状态码: {response.status_code}")
 56 |     print(f"响应内容: {response.text}")
 57 |     
 58 |     if response.status_code == 200:
 59 |         result = response.json()
 60 |         records = result.get('records', [])
 61 |         
 62 |         if not records:
 63 |             print(f"未找到与 '{query}' 相关的信息。")
 64 |         else:
 65 |             print(f"找到 {len(records)} 条相关信息:")
 66 |             
 67 |             for i, record in enumerate(records):
 68 |                 segment = record.get('segment', {})
 69 |                 content = segment.get('content', '')
 70 |                 document = segment.get('document', {})
 71 |                 document_name = document.get('name', '未知文档')
 72 |                 score = record.get('score', 0)
 73 |                 
 74 |                 print(f"结果 {i+1}:")
 75 |                 print(f"文档: {document_name}")
 76 |                 print(f"相关度: {score}")
 77 |                 print(f"内容: {content}")
 78 |                 print("-------------------")
 79 |             
 80 |             # 输出最终JSON响应
 81 |             print("\n最终JSON响应:")
 82 |             final_response = {
 83 |                 "status": "success",
 84 |                 "query": query,
 85 |                 "knowledge_base_id": dataset_id,
 86 |                 "results": records
 87 |             }
 88 |             print(json.dumps(final_response, indent=2, ensure_ascii=False))
 89 |     else:
 90 |         error_data = response.json()
 91 |         error_code = error_data.get('code', 'unknown_error')
 92 |         error_message = error_data.get('message', '未知错误')
 93 |         
 94 |         if error_code == "dataset_not_found":
 95 |             print("知识库不存在或无权访问")
 96 |         elif error_code == "invalid_api_key":
 97 |             print("API Key无效")
 98 |         else:
 99 |             print(f"检索知识库时出错: {error_message}")
100 | except Exception as e:
101 |     print(f"测试过程中发生错误: {str(e)}")
102 | 
103 | print("测试完成") 


--------------------------------------------------------------------------------
/test_tool.py.upload:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import time
  4 | import requests
  5 | 
  6 | # 测试选项
  7 | test_upload = True  # 设置为True测试上传功能
  8 | test_retrieve = True  # 设置为True测试检索功能
  9 | 
 10 | # 上传功能测试参数
 11 | knowledge_base_name = "Test KB"
 12 | description = ""
 13 | document_name = "Test Doc"
 14 | text_content = "This is a test content for the document."
 15 | permission = "only_me"
 16 | indexing_technique = "high_quality"
 17 | 
 18 | # 检索功能测试参数
 19 | dataset_id = "your_dataset_id"  # 替换为您的知识库ID，如果测试上传成功，会自动使用上传创建的知识库ID
 20 | query = "test"
 21 | search_method = "semantic_search"
 22 | reranking_enable = False
 23 | top_k = 3
 24 | score_threshold_enabled = False
 25 | score_threshold = 0.5
 26 | 
 27 | # 设置API Key
 28 | api_key = "test_key"  # 替换为您的实际API Key
 29 | 
 30 | # 设置请求头
 31 | headers = {
 32 |     'Authorization': f'Bearer {api_key}',
 33 |     'Content-Type': 'application/json'
 34 | }
 35 | 
 36 | print("开始API请求测试...")
 37 | 
 38 | # 上传功能测试
 39 | if test_upload:
 40 |     print("\n===== 测试上传功能 =====")
 41 |     # 步骤1: 创建知识库
 42 |     print("步骤1: 创建知识库")
 43 |     url = "https://api.dify.ai/v1/datasets"
 44 |     payload = {
 45 |         "name": knowledge_base_name,
 46 |         "description": description,
 47 |         "permission": permission,
 48 |         "indexing_technique": indexing_technique,
 49 |         "provider": "vendor"
 50 |     }
 51 | 
 52 |     print(f"请求URL: {url}")
 53 |     print(f"请求参数: {payload}")
 54 | 
 55 |     try:
 56 |         response = requests.post(url, headers=headers, json=payload)
 57 |         print(f"响应状态码: {response.status_code}")
 58 |         print(f"响应内容: {response.text}")
 59 |         
 60 |         if response.status_code == 200:
 61 |             result = response.json()
 62 |             dataset_id = result.get('id')
 63 |             print(f"知识库创建成功，ID: {dataset_id}")
 64 |             
 65 |             # 步骤2: 通过文本创建文档
 66 |             print("\n步骤2: 创建文档")
 67 |             url = f"https://api.dify.ai/v1/datasets/{dataset_id}/document/create-by-text"
 68 |             
 69 |             # 准备处理规则
 70 |             process_rule = {
 71 |                 "mode": "automatic"
 72 |             }
 73 |             
 74 |             payload = {
 75 |                 "name": document_name,
 76 |                 "text": text_content,
 77 |                 "indexing_technique": indexing_technique,
 78 |                 "process_rule": process_rule
 79 |             }
 80 |             
 81 |             print(f"请求URL: {url}")
 82 |             print(f"请求参数: {payload}")
 83 |             
 84 |             response = requests.post(url, headers=headers, json=payload)
 85 |             print(f"响应状态码: {response.status_code}")
 86 |             print(f"响应内容: {response.text}")
 87 |             
 88 |             if response.status_code == 200:
 89 |                 result = response.json()
 90 |                 document = result.get('document', {})
 91 |                 document_id = document.get('id')
 92 |                 batch = result.get('batch', '')
 93 |                 print(f"文档创建成功，ID: {document_id}, 批次: {batch}")
 94 |                 
 95 |                 # 步骤3: 检查文档处理状态
 96 |                 print("\n步骤3: 检查文档处理状态")
 97 |                 url = f"https://api.dify.ai/v1/datasets/{dataset_id}/documents/{batch}/indexing-status"
 98 |                 
 99 |                 print(f"请求URL: {url}")
100 |                 
101 |                 response = requests.get(url, headers=headers)
102 |                 print(f"响应状态码: {response.status_code}")
103 |                 print(f"响应内容: {response.text}")
104 |                 
105 |                 if response.status_code == 200:
106 |                     result = response.json()
107 |                     documents = result.get('data', [])
108 |                     if documents:
109 |                         document = documents[0]
110 | # Step 1: Create knowledge base
111 | print("Step 1: Creating knowledge base")
112 | url = "https://api.dify.ai/v1/datasets"
113 | payload = {
114 |     "name": knowledge_base_name,
115 |     "description": description,
116 |     "permission": permission,
117 |     "indexing_technique": indexing_technique,
118 |     "provider": "vendor"
119 | }
120 | 
121 | print(f"Request URL: {url}")
122 | print(f"Request parameters: {payload}")
123 | 
124 | try:
125 |     response = requests.post(url, headers=headers, json=payload)
126 |     print(f"Response status code: {response.status_code}")
127 |     print(f"Response content: {response.text}")
128 |     
129 |     if response.status_code == 200:
130 |         result = response.json()
131 |         dataset_id = result.get('id')
132 |         print(f"Knowledge base created successfully, ID: {dataset_id}")
133 |         
134 |         # Step 2: Create document by text
135 |         print("\nStep 2: Creating document")
136 |         url = f"https://api.dify.ai/v1/datasets/{dataset_id}/document/create-by-text"
137 |         
138 |         # Prepare processing rules
139 |         process_rule = {
140 |             "mode": "automatic"
141 |         }
142 |         
143 |         payload = {
144 |             "name": document_name,
145 |             "text": text_content,
146 |             "indexing_technique": indexing_technique,
147 |             "process_rule": process_rule
148 |         }
149 |         
150 |         print(f"Request URL: {url}")
151 |         print(f"Request parameters: {payload}")
152 |         
153 |         response = requests.post(url, headers=headers, json=payload)
154 |         print(f"Response status code: {response.status_code}")
155 |         print(f"Response content: {response.text}")
156 |         
157 |         if response.status_code == 200:
158 |             result = response.json()
159 |             document = result.get('document', {})
160 |             document_id = document.get('id')
161 |             batch = result.get('batch', '')
162 |             print(f"Document created successfully, ID: {document_id}, Batch: {batch}")
163 |             
164 |             # Step 3: Check document processing status
165 |             print("\nStep 3: Checking document processing status")
166 |             url = f"https://api.dify.ai/v1/datasets/{dataset_id}/documents/{batch}/indexing-status"
167 |             
168 |             print(f"Request URL: {url}")
169 |             
170 |             response = requests.get(url, headers=headers)
171 |             print(f"Response status code: {response.status_code}")
172 |             print(f"Response content: {response.text}")
173 |             
174 |             if response.status_code == 200:
175 |                 result = response.json()
176 |                 documents = result.get('data', [])
177 |                 if documents:
178 |                     document = documents[0]
179 |                     status = document.get('indexing_status', 'unknown')
180 |                     print(f"Document status: {status}")
181 |                     
182 |                     # 测试输出JSON响应
183 |                     print("\nStep 4: 输出最终JSON响应")
184 |                     final_response = {
185 |                         "status": "success",
186 |                         "knowledge_base_id": dataset_id,  # 单独列出知识库ID作为顶级字段
187 |                         "knowledge_base": {
188 |                             "id": dataset_id,
189 |                             "name": knowledge_base_name
190 |                         },
191 |                         "document": {
192 |                             "id": document_id,
193 |                             "name": document_name,
194 |                             "batch": batch,
195 |                             "status": status
196 |                         }
197 |                     }
198 |                     print(f"最终JSON响应: {json.dumps(final_response, indent=2)}")
199 |                     print(f"知识库ID: {final_response['knowledge_base_id']}")
200 |                 else:
201 |                     print("No document status information found")
202 |             else:
203 |                 error_data = response.json()
204 |                 error_code = error_data.get('code', 'unknown_error')
205 |                 error_message = error_data.get('message', 'Unknown error')
206 |                 
207 |                 if error_code == "archived_document_immutable":
208 |                     print("Error: The archived document is not editable.")
209 |                 elif error_code == "document_already_finished":
210 |                     print("Error: The document has been processed. Please refresh the page or go to the document details.")
211 |                 elif error_code == "document_indexing":
212 |                     print("Error: The document is being processed and cannot be edited.")
213 |                 else:
214 |                     print(f"Error checking document status: {error_message}")
215 |         else:
216 |             error_data = response.json()
217 |             error_code = error_data.get('code', 'unknown_error')
218 |             error_message = error_data.get('message', 'Unknown error')
219 |             
220 |             if error_code == "no_file_uploaded":
221 |                 print("Error: Please upload your file.")
222 |             elif error_code == "too_many_files":
223 |                 print("Error: Only one file is allowed.")
224 |             elif error_code == "file_too_large":
225 |                 print("Error: File size exceeded.")
226 |             elif error_code == "unsupported_file_type":
227 |                 print("Error: File type not allowed.")
228 |             elif error_code == "high_quality_dataset_only":
229 |                 print("Error: Current operation only supports 'high-quality' datasets.")
230 |             elif error_code == "dataset_not_initialized":
231 |                 print("Error: The dataset is still being initialized or indexing. Please wait a moment.")
232 |             elif error_code == "invalid_metadata":
233 |                 print("Error: The metadata content is incorrect. Please check and verify.")
234 |             else:
235 |                 print(f"Error creating document: {error_message}")
236 |     else:
237 |         error_data = response.json()
238 |         error_code = error_data.get('code', 'unknown_error')
239 |         error_message = error_data.get('message', 'Unknown error')
240 |         
241 |         if error_code == "dataset_name_duplicate":
242 |             print("Error: The dataset name already exists. Please modify your dataset name.")
243 |         elif error_code == "invalid_action":
244 |             print("Error: Invalid action.")
245 |         else:
246 |             print(f"Error creating knowledge base: {error_message}")
247 | except Exception as e:
248 |     print(f"Error occurred during test: {str(e)}")
249 | 
250 | print("Test completed") 


--------------------------------------------------------------------------------
/tools/knowledge_retrieve.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import requests
  4 | from collections.abc import Generator
  5 | from typing import Any, Dict, Optional, List
  6 | 
  7 | from dify_plugin import Tool
  8 | from dify_plugin.entities.tool import ToolInvokeMessage
  9 | 
 10 | class KnowledgeRetrieveTool(Tool):
 11 |     def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage, None, None]:
 12 |         # Get parameters
 13 |         dataset_id = tool_parameters.get('dataset_id')
 14 |         query = tool_parameters.get('query')
 15 |         search_method = tool_parameters.get('search_method', 'semantic_search')
 16 |         reranking_enable = tool_parameters.get('reranking_enable', False)
 17 |         top_k = tool_parameters.get('top_k', 3)
 18 |         score_threshold_enabled = tool_parameters.get('score_threshold_enabled', False)
 19 |         score_threshold = tool_parameters.get('score_threshold', 0.5)
 20 |         
 21 |         # Debug information
 22 |         print(f"Received parameters: {tool_parameters}")
 23 |         
 24 |         # Check required parameters
 25 |         if not dataset_id:
 26 |             yield self.create_text_message("Knowledge base ID is required.")
 27 |             return
 28 |         
 29 |         if not query:
 30 |             yield self.create_text_message("Query content is required.")
 31 |             return
 32 |         
 33 |         # Get API Key from environment variables
 34 |         api_key = os.environ.get('DIFY_KNOWLEDGE_API_KEY')
 35 |         if not api_key:
 36 |             yield self.create_text_message("API Key not found. Please make sure it's set in the plugin configuration.")
 37 |             return
 38 |         
 39 |         # Set request headers
 40 |         headers = {
 41 |             'Authorization': f'Bearer {api_key}',
 42 |             'Content-Type': 'application/json'
 43 |         }
 44 |         
 45 |         # Build retrieval model parameters
 46 |         retrieval_model = {
 47 |             "search_method": search_method,
 48 |             "reranking_enable": reranking_enable,
 49 |             "reranking_mode": None,
 50 |             "reranking_model": {
 51 |                 "reranking_provider_name": "",
 52 |                 "reranking_model_name": ""
 53 |             },
 54 |             "weights": None,
 55 |             "top_k": top_k,
 56 |             "score_threshold_enabled": score_threshold_enabled,
 57 |             "score_threshold": score_threshold if score_threshold_enabled else None
 58 |         }
 59 |         
 60 |         # Perform knowledge base retrieval
 61 |         yield self.create_text_message(f"Retrieving information from knowledge base {dataset_id} related to '{query}'...")
 62 |         
 63 |         result = self._retrieve_from_knowledge_base(headers, dataset_id, query, retrieval_model)
 64 |         if not result:
 65 |             yield self.create_text_message("Retrieval failed. Please check your API Key and parameters.")
 66 |             return
 67 |         
 68 |         if isinstance(result, str):
 69 |             # This is an error message
 70 |             yield self.create_text_message(f"Error: {result}")
 71 |             return
 72 |             
 73 |         records = result.get('records', [])
 74 |         
 75 |         if not records:
 76 |             yield self.create_text_message(f"No information found related to '{query}'.")
 77 |             return
 78 |         
 79 |         # Return retrieval results
 80 |         yield self.create_text_message(f"Found {len(records)} related results:")
 81 |         
 82 |         for i, record in enumerate(records):
 83 |             segment = record.get('segment', {})
 84 |             content = segment.get('content', '')
 85 |             document = segment.get('document', {})
 86 |             document_name = document.get('name', 'Unknown document')
 87 |             score = record.get('score', 0)
 88 |             
 89 |             result_text = f"Result {i+1}:\n"
 90 |             result_text += f"Document: {document_name}\n"
 91 |             result_text += f"Relevance: {score}\n"
 92 |             result_text += f"Content: {content}\n"
 93 |             result_text += "-------------------"
 94 |             
 95 |             yield self.create_text_message(result_text)
 96 |         
 97 |         # Return detailed information
 98 |         yield self.create_json_message({
 99 |             "status": "success",
100 |             "query": query,
101 |             "knowledge_base_id": dataset_id,
102 |             "results": records
103 |         })
104 |     
105 |     def _retrieve_from_knowledge_base(self, headers: Dict, dataset_id: str, query: str, retrieval_model: Dict) -> Optional[Dict]:
106 |         """Retrieve information from knowledge base"""
107 |         try:
108 |             url = f"https://api.dify.ai/v1/datasets/{dataset_id}/retrieve"
109 |             
110 |             payload = {
111 |                 "query": query,
112 |                 "retrieval_model": retrieval_model
113 |             }
114 |             
115 |             print(f"Knowledge base retrieval request URL: {url}")
116 |             print(f"Knowledge base retrieval request parameters: {payload}")
117 |             
118 |             response = requests.post(url, headers=headers, json=payload)
119 |             
120 |             print(f"Knowledge base retrieval response status code: {response.status_code}")
121 |             print(f"Knowledge base retrieval response content: {response.text}")
122 |             
123 |             if response.status_code == 200:
124 |                 return response.json()
125 |             else:
126 |                 error_data = response.json()
127 |                 error_code = error_data.get('code', 'unknown_error')
128 |                 error_message = error_data.get('message', 'Unknown error')
129 |                 
130 |                 if error_code == "dataset_not_found":
131 |                     return "Knowledge base does not exist or you don't have access"
132 |                 elif error_code == "invalid_api_key":
133 |                     return "Invalid API Key"
134 |                 else:
135 |                     print(f"Error retrieving from knowledge base: {error_message}")
136 |                     print(f"Status code: {response.status_code}")
137 |                     print(f"Response content: {response.text}")
138 |                     return f"Retrieval failed: {error_message}"
139 |         except Exception as e:
140 |             print(f"Error occurred while retrieving from knowledge base: {str(e)}")
141 |             return f"Exception occurred: {str(e)}" 


--------------------------------------------------------------------------------
/tools/knowledge_retrieve.yaml:
--------------------------------------------------------------------------------
  1 | identity:
  2 |   name: knowledge_retrieve
  3 |   author: stvlynn
  4 |   label:
  5 |     en_US: Retrieve from Knowledge Base
  6 |     zh_Hans: 从知识库检索
  7 | description:
  8 |   human:
  9 |     en_US: A tool to retrieve information from Dify Knowledge Base.
 10 |     zh_Hans: 一个从Dify知识库检索信息的工具。
 11 |   llm: A tool to retrieve information from Dify Knowledge Base.
 12 | parameters:
 13 |   - name: dataset_id
 14 |     type: string
 15 |     required: true
 16 |     label:
 17 |       en_US: Knowledge Base ID
 18 |       zh_Hans: 知识库ID
 19 |     human_description:
 20 |       en_US: The ID of the knowledge base to retrieve from
 21 |       zh_Hans: 要检索的知识库ID
 22 |     llm_description: The ID of the knowledge base to retrieve from
 23 |     form: form
 24 |   - name: query
 25 |     type: string
 26 |     required: true
 27 |     label:
 28 |       en_US: Query
 29 |       zh_Hans: 查询内容
 30 |     human_description:
 31 |       en_US: The query to search for in the knowledge base
 32 |       zh_Hans: 在知识库中搜索的查询内容
 33 |     llm_description: The query to search for in the knowledge base
 34 |     form: llm
 35 |   - name: search_method
 36 |     type: select
 37 |     required: false
 38 |     options:
 39 |       - value: keyword_search
 40 |         label:
 41 |           en_US: Keyword Search
 42 |           zh_Hans: 关键词检索
 43 |       - value: semantic_search
 44 |         label:
 45 |           en_US: Semantic Search
 46 |           zh_Hans: 语义检索
 47 |       - value: full_text_search
 48 |         label:
 49 |           en_US: Full Text Search
 50 |           zh_Hans: 全文检索
 51 |       - value: hybrid_search
 52 |         label:
 53 |           en_US: Hybrid Search
 54 |           zh_Hans: 混合检索
 55 |     default: semantic_search
 56 |     label:
 57 |       en_US: Search Method
 58 |       zh_Hans: 搜索方法
 59 |     human_description:
 60 |       en_US: The method to use for searching the knowledge base
 61 |       zh_Hans: 用于搜索知识库的方法
 62 |     llm_description: The method to use for searching the knowledge base
 63 |     form: form
 64 |   - name: reranking_enable
 65 |     type: boolean
 66 |     required: false
 67 |     default: false
 68 |     label:
 69 |       en_US: Enable Reranking
 70 |       zh_Hans: 启用重排序
 71 |     human_description:
 72 |       en_US: Whether to enable reranking of search results
 73 |       zh_Hans: 是否启用搜索结果重排序
 74 |     llm_description: Whether to enable reranking of search results
 75 |     form: form
 76 |   - name: top_k
 77 |     type: number
 78 |     required: false
 79 |     default: 3
 80 |     label:
 81 |       en_US: Number of Results
 82 |       zh_Hans: 结果数量
 83 |     human_description:
 84 |       en_US: The number of results to return
 85 |       zh_Hans: 返回的结果数量
 86 |     llm_description: The number of results to return
 87 |     form: form
 88 |   - name: score_threshold_enabled
 89 |     type: boolean
 90 |     required: false
 91 |     default: false
 92 |     label:
 93 |       en_US: Enable Score Threshold
 94 |       zh_Hans: 启用分数阈值
 95 |     human_description:
 96 |       en_US: Whether to enable score threshold filtering
 97 |       zh_Hans: 是否启用分数阈值过滤
 98 |     llm_description: Whether to enable score threshold filtering
 99 |     form: form
100 |   - name: score_threshold
101 |     type: number
102 |     required: false
103 |     default: 0.5
104 |     label:
105 |       en_US: Score Threshold
106 |       zh_Hans: 分数阈值
107 |     human_description:
108 |       en_US: The minimum score threshold for results (0-1)
109 |       zh_Hans: 结果的最小分数阈值（0-1）
110 |     llm_description: The minimum score threshold for results (0-1)
111 |     form: form
112 | extra:
113 |   python:
114 |     source: tools/knowledge_retrieve.py 


--------------------------------------------------------------------------------
/tools/knowledge_upload.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import time
  4 | import requests
  5 | from collections.abc import Generator
  6 | from typing import Any, Dict, Optional, List
  7 | 
  8 | from dify_plugin import Tool
  9 | from dify_plugin.entities.tool import ToolInvokeMessage
 10 | 
 11 | class KnowledgeUploadTool(Tool):
 12 |     def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage, None, None]:
 13 |         # Get parameters
 14 |         knowledge_base_name = tool_parameters.get('knowledge_base_name')
 15 |         description = tool_parameters.get('description', '')
 16 |         document_name = tool_parameters.get('document_name')
 17 |         text_content = tool_parameters.get('text')
 18 |         permission = tool_parameters.get('permission', 'only_me')
 19 |         indexing_technique = tool_parameters.get('indexing_technique', 'high_quality')
 20 |         
 21 |         # Debug information
 22 |         print(f"Received parameters: {tool_parameters}")
 23 |         print(f"Text content: {text_content}")
 24 |         
 25 |         # Check required parameters
 26 |         if not knowledge_base_name:
 27 |             yield self.create_text_message("Knowledge base name is required.")
 28 |             return
 29 |         
 30 |         if not document_name:
 31 |             yield self.create_text_message("Document name is required.")
 32 |             return
 33 |         
 34 |         if not text_content:
 35 |             yield self.create_text_message("Text content is required.")
 36 |             return
 37 |         
 38 |         # Get API Key from environment variables
 39 |         api_key = os.environ.get('DIFY_KNOWLEDGE_API_KEY')
 40 |         if not api_key:
 41 |             yield self.create_text_message("API Key not found. Please make sure it's set in the plugin configuration.")
 42 |             return
 43 |         
 44 |         # Set request headers
 45 |         headers = {
 46 |             'Authorization': f'Bearer {api_key}',
 47 |             'Content-Type': 'application/json'
 48 |         }
 49 |         
 50 |         # Step 1: Create knowledge base
 51 |         yield self.create_text_message(f"Creating knowledge base: {knowledge_base_name}...")
 52 |         
 53 |         dataset_id = self._create_knowledge_base(headers, knowledge_base_name, description, permission, indexing_technique)
 54 |         if not dataset_id:
 55 |             yield self.create_text_message("Failed to create knowledge base. Please check your API Key and parameters.")
 56 |             return
 57 |         
 58 |         yield self.create_text_message(f"Knowledge base created successfully, ID: {dataset_id}")
 59 |         
 60 |         # Step 2: Create document with text
 61 |         yield self.create_text_message(f"Creating document: {document_name}...")
 62 |         
 63 |         document_result = self._create_document_by_text(headers, dataset_id, document_name, text_content, indexing_technique)
 64 |         if not document_result:
 65 |             yield self.create_text_message("Failed to create document. Please check your parameters.")
 66 |             return
 67 |         
 68 |         if isinstance(document_result, str):
 69 |             # This is an error message
 70 |             yield self.create_text_message(f"Error: {document_result}")
 71 |             return
 72 |             
 73 |         document_id = document_result.get('id')
 74 |         batch = document_result.get('batch')
 75 |         
 76 |         yield self.create_text_message(f"Document created successfully, ID: {document_id}, Batch: {batch}")
 77 |         
 78 |         # Step 3: Check document processing status
 79 |         yield self.create_text_message(f"Processing document, please wait...")
 80 |         
 81 |         status_result = self._check_document_status(headers, dataset_id, batch)
 82 |         
 83 |         if isinstance(status_result, str) and status_result.startswith("Error:"):
 84 |             yield self.create_text_message(status_result)
 85 |             return
 86 |             
 87 |         status = status_result
 88 |         
 89 |         if status == "completed":
 90 |             yield self.create_text_message(f"Document processing completed!")
 91 |         elif status == "error" or status == "failed":
 92 |             yield self.create_text_message(f"Document processing failed. Please check your text content.")
 93 |         else:
 94 |             yield self.create_text_message(f"Document is being processed, current status: {status}")
 95 |             yield self.create_text_message(f"You can check the result later on the Dify platform.")
 96 |         
 97 |         # Return detailed information
 98 |         yield self.create_json_message({
 99 |             "status": 200,
100 |             "id": dataset_id,
101 |             "knowledge_base": {
102 |                 "id": dataset_id,
103 |                 "name": knowledge_base_name
104 |             },
105 |             "document": {
106 |                 "id": document_id,
107 |                 "name": document_name,
108 |                 "batch": batch,
109 |                 "status": status
110 |             }
111 |         })
112 |     
113 |     def _create_knowledge_base(self, headers: Dict, name: str, description: str, permission: str, indexing_technique: str) -> Optional[str]:
114 |         """Create an empty knowledge base"""
115 |         try:
116 |             url = "https://api.dify.ai/v1/datasets"
117 |             payload = {
118 |                 "name": name,
119 |                 "description": description,
120 |                 "permission": permission,
121 |                 "indexing_technique": indexing_technique,
122 |                 "provider": "vendor"
123 |             }
124 |             
125 |             print(f"Knowledge base creation request URL: {url}")
126 |             print(f"Knowledge base creation request parameters: {payload}")
127 |             
128 |             response = requests.post(url, headers=headers, json=payload)
129 |             
130 |             print(f"Knowledge base creation response status code: {response.status_code}")
131 |             print(f"Knowledge base creation response content: {response.text}")
132 |             
133 |             if response.status_code == 200:
134 |                 result = response.json()
135 |                 return result.get('id')
136 |             else:
137 |                 error_data = response.json()
138 |                 error_code = error_data.get('code', 'unknown_error')
139 |                 error_message = error_data.get('message', 'Unknown error')
140 |                 
141 |                 if error_code == "dataset_name_duplicate":
142 |                     print(f"Error: The dataset name already exists. Please modify your dataset name.")
143 |                 elif error_code == "invalid_action":
144 |                     print(f"Error: Invalid action.")
145 |                 else:
146 |                     print(f"Error creating knowledge base: {error_message}")
147 |                     
148 |                 print(f"Status code: {response.status_code}")
149 |                 print(f"Response content: {response.text}")
150 |                 return None
151 |         except Exception as e:
152 |             print(f"Error occurred while creating knowledge base: {str(e)}")
153 |             return None
154 |     
155 |     def _create_document_by_text(self, headers: Dict, dataset_id: str, document_name: str, text_content: str, indexing_technique: str) -> Optional[Dict]:
156 |         """Create document by text"""
157 |         try:
158 |             # Correct API endpoint
159 |             url = f"https://api.dify.ai/v1/datasets/{dataset_id}/document/create-by-text"
160 |             
161 |             # Prepare processing rules
162 |             process_rule = {
163 |                 "mode": "automatic"
164 |             }
165 |             
166 |             # Ensure text content is a string
167 |             if not isinstance(text_content, str):
168 |                 text_content = str(text_content)
169 |             
170 |             payload = {
171 |                 "name": document_name,
172 |                 "text": text_content,
173 |                 "indexing_technique": indexing_technique,
174 |                 "process_rule": process_rule
175 |             }
176 |             
177 |             print(f"Document creation request URL: {url}")
178 |             print(f"Document creation request parameters: {payload}")
179 |             
180 |             response = requests.post(url, headers=headers, json=payload)
181 |             
182 |             print(f"Document creation response status code: {response.status_code}")
183 |             print(f"Document creation response content: {response.text}")
184 |             
185 |             if response.status_code == 200:
186 |                 result = response.json()
187 |                 document = result.get('document', {})
188 |                 batch = result.get('batch', '')
189 |                 return {
190 |                     'id': document.get('id'),
191 |                     'batch': batch
192 |                 }
193 |             else:
194 |                 error_data = response.json()
195 |                 error_code = error_data.get('code', 'unknown_error')
196 |                 error_message = error_data.get('message', 'Unknown error')
197 |                 
198 |                 if error_code == "no_file_uploaded":
199 |                     return "Error: Please upload your file."
200 |                 elif error_code == "too_many_files":
201 |                     return "Error: Only one file is allowed."
202 |                 elif error_code == "file_too_large":
203 |                     return "Error: File size exceeded."
204 |                 elif error_code == "unsupported_file_type":
205 |                     return "Error: File type not allowed."
206 |                 elif error_code == "high_quality_dataset_only":
207 |                     return "Error: Current operation only supports 'high-quality' datasets."
208 |                 elif error_code == "dataset_not_initialized":
209 |                     return "Error: The dataset is still being initialized or indexing. Please wait a moment."
210 |                 elif error_code == "invalid_metadata":
211 |                     return "Error: The metadata content is incorrect. Please check and verify."
212 |                 else:
213 |                     print(f"Error creating document: {error_message}")
214 |                     print(f"Status code: {response.status_code}")
215 |                     print(f"Response content: {response.text}")
216 |                     return None
217 |         except Exception as e:
218 |             print(f"Error occurred while creating document: {str(e)}")
219 |             print(f"Exception details: {repr(e)}")
220 |             return None
221 |     
222 |     def _check_document_status(self, headers: Dict, dataset_id: str, batch: str) -> str:
223 |         """Check document processing status"""
224 |         try:
225 |             # Use the correct API endpoint
226 |             url = f"https://api.dify.ai/v1/datasets/{dataset_id}/documents/{batch}/indexing-status"
227 |             
228 |             print(f"Document status check request URL: {url}")
229 |             
230 |             # Try to check status, maximum 5 attempts
231 |             for i in range(5):
232 |                 print(f"Document status check attempt {i+1}")
233 |                 response = requests.get(url, headers=headers)
234 |                 
235 |                 print(f"Document status check response status code: {response.status_code}")
236 |                 print(f"Document status check response content: {response.text}")
237 |                 
238 |                 if response.status_code == 200:
239 |                     result = response.json()
240 |                     documents = result.get('data', [])
241 |                     
242 |                     if documents:
243 |                         document = documents[0]
244 |                         status = document.get('indexing_status', 'unknown')
245 |                         print(f"Document status: {status}")
246 |                         
247 |                         if status in ['completed', 'error', 'failed']:
248 |                             return status
249 |                     
250 |                     # If still processing, wait 3 seconds before checking again
251 |                     print("Document is still being processed, waiting 3 seconds before checking again")
252 |                     time.sleep(3)
253 |                 else:
254 |                     error_data = response.json()
255 |                     error_code = error_data.get('code', 'unknown_error')
256 |                     error_message = error_data.get('message', 'Unknown error')
257 |                     
258 |                     if error_code == "archived_document_immutable":
259 |                         return "Error: The archived document is not editable."
260 |                     elif error_code == "document_already_finished":
261 |                         return "Error: The document has been processed. Please refresh the page or go to the document details."
262 |                     elif error_code == "document_indexing":
263 |                         return "Error: The document is being processed and cannot be edited."
264 |                     else:
265 |                         print(f"Error checking document status: {error_message}")
266 |                         print(f"Response content: {response.text}")
267 |                         return "Error: Failed to check document status."
268 |             
269 |             # If still not completed after 5 attempts, return processing status
270 |             return "processing"
271 |         except Exception as e:
272 |             print(f"Error occurred while checking document status: {str(e)}")
273 |             return "Error: An unexpected error occurred while checking document status." 


--------------------------------------------------------------------------------
/tools/knowledge_upload.yaml:
--------------------------------------------------------------------------------
  1 | identity:
  2 |   name: knowledge_upload
  3 |   author: stvlynn
  4 |   label:
  5 |     en_US: Upload to Knowledge Base
  6 |     zh_Hans: 上传到知识库
  7 | description:
  8 |   human:
  9 |     en_US: A tool to upload text content to Dify Knowledge Base.
 10 |     zh_Hans: 一个上传文本内容到Dify知识库的工具。
 11 |   llm: A tool to upload text content to Dify Knowledge Base.
 12 | parameters:
 13 |   - name: knowledge_base_name
 14 |     type: string
 15 |     required: true
 16 |     label:
 17 |       en_US: Knowledge Base Name
 18 |       zh_Hans: 知识库名称
 19 |     human_description:
 20 |       en_US: The name of the knowledge base to create
 21 |       zh_Hans: 要创建的知识库名称
 22 |     llm_description: The name of the knowledge base to create
 23 |     form: form
 24 |   - name: description
 25 |     type: string
 26 |     required: false
 27 |     label:
 28 |       en_US: Description
 29 |       zh_Hans: 描述
 30 |     human_description:
 31 |       en_US: The description of the knowledge base
 32 |       zh_Hans: 知识库的描述
 33 |     llm_description: The description of the knowledge base
 34 |     form: form
 35 |   - name: document_name
 36 |     type: string
 37 |     required: true
 38 |     label:
 39 |       en_US: Document Name
 40 |       zh_Hans: 文档名称
 41 |     human_description:
 42 |       en_US: The name of the document to create
 43 |       zh_Hans: 要创建的文档名称
 44 |     llm_description: The name of the document to create
 45 |     form: form
 46 |   - name: text
 47 |     type: string
 48 |     required: true
 49 |     label:
 50 |       en_US: Text Content
 51 |       zh_Hans: 文本内容
 52 |     human_description:
 53 |       en_US: The text content to upload to the knowledge base
 54 |       zh_Hans: 要上传到知识库的文本内容
 55 |     llm_description: The text content to upload to the knowledge base
 56 |     form: llm
 57 |   - name: permission
 58 |     type: select
 59 |     required: true
 60 |     options:
 61 |       - value: only_me
 62 |         label:
 63 |           en_US: Only Me
 64 |           zh_Hans: 仅自己
 65 |       - value: publicly_readable
 66 |         label:
 67 |           en_US: Publicly Readable
 68 |           zh_Hans: 公开可读
 69 |     default: only_me
 70 |     label:
 71 |       en_US: Permission
 72 |       zh_Hans: 权限
 73 |     human_description:
 74 |       en_US: The permission of the knowledge base (only_me or publicly_readable)
 75 |       zh_Hans: 知识库的权限（仅自己或公开可读）
 76 |     llm_description: The permission of the knowledge base (only_me or publicly_readable)
 77 |     form: form
 78 |   - name: indexing_technique
 79 |     type: select
 80 |     required: true
 81 |     options:
 82 |       - value: high_quality
 83 |         label:
 84 |           en_US: High Quality
 85 |           zh_Hans: 高质量
 86 |       - value: economy
 87 |         label:
 88 |           en_US: Economy
 89 |           zh_Hans: 经济
 90 |     default: high_quality
 91 |     label:
 92 |       en_US: Indexing Technique
 93 |       zh_Hans: 索引技术
 94 |     human_description:
 95 |       en_US: The indexing technique to use (high_quality or economy)
 96 |       zh_Hans: 要使用的索引技术（高质量或经济）
 97 |     llm_description: The indexing technique to use (high_quality or economy)
 98 |     form: form
 99 | extra:
100 |   python:
101 |     source: tools/knowledge_upload.py 


--------------------------------------------------------------------------------