├── docs
├── .nojekyll
├── Elixir-Presentation.pdf
├── example-data
│ └── nuclei.tif
├── screenshots
│ ├── bia-tutor.png
│ ├── channels.png
│ ├── chatbot-hi.png
│ ├── extensions.png
│ ├── gpts_json.png
│ ├── web-search.png
│ ├── bia-executor.png
│ ├── more-details.png
│ ├── role_create.png
│ ├── search-biii.png
│ ├── user-profile.png
│ ├── SimilarityScore.png
│ ├── chat-interface.png
│ ├── configure_gpt.png
│ ├── direct-response.png
│ ├── respond_to_user.png
│ ├── retrieval-text.png
│ ├── screenshot-hi.png
│ ├── chatbot-animation.gif
│ ├── chatbot-biologist.png
│ ├── chatbot-developer.png
│ ├── gpts_landing_page.png
│ ├── search-model-zoo.png
│ ├── select-extension.png
│ ├── chatbot-model-upload.png
│ ├── chatbot-wsi-pipeline.png
│ ├── search-bioimage-docs.png
│ ├── search-image-forum.png
│ ├── customization1_student.png
│ ├── customization_biotool.png
│ ├── chatbot-community-partner.png
│ ├── customization2_developer.png
│ ├── script-gen-exe-retrieval.png
│ ├── search-bioimage-archive.png
│ ├── similarity-score-results.png
│ ├── chatbot-channel-bioimageio.png
│ ├── chatbot-technical-question.png
│ ├── chatbot-channel-scikit-image.png
│ └── chatgpt-vs-bioimageiochatbot.png
├── _sidebar.md
├── beta-testing-guidelines.md
├── creating_GPTs.md
├── CONTRIBUTING.md
├── DISCLAIMER.md
├── index.html
├── technical-overview.md
├── figure-2-use-cases.md
├── installation.md
├── usage-example.md
├── README.md
└── development.md
├── bioimageio_chatbot
├── __init__.py
├── tools.py
├── gpts_action.py
├── static
│ ├── imagej-js-extension.imjoy.html
│ ├── bioimage-model-zoo-extension.imjoy.html
│ ├── worker-manager.js
│ └── pyodide-worker.js
├── evaluation.py
├── chatbot_extensions
│ ├── web_search_extension
│ │ ├── __init__.py
│ │ └── langchain_websearch.py
│ ├── __init__.py
│ ├── vision_extension.py
│ ├── bia_extension.py
│ ├── hpa_extension.py
│ ├── biii_extension.py
│ ├── image_sc_extension.py
│ └── docs_extension.py
├── __main__.py
├── utils.py
├── quota.py
├── jsonschema_pydantic.py
└── knowledge_base.py
├── MANIFEST.in
├── requirements_test.txt
├── Dockerfile
├── tests
├── __pycache__
│ ├── test_chatbot.cpython-39-pytest-7.2.1.pyc
│ └── test_chatbot.cpython-310-pytest-7.4.2.pyc
├── test_knowledge_base.py
├── test_chatbot_answer.py
└── test_chatbot.py
├── scripts
└── publish.sh
├── requirements.txt
├── pyproject.toml
├── .github
└── workflows
│ ├── publish.yml
│ └── build.yml
├── LICENSE
└── .gitignore
/docs/.nojekyll:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/bioimageio_chatbot/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include bioimageio_chatbot/static/*
--------------------------------------------------------------------------------
/requirements_test.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | pytest
3 | pytest-asyncio
4 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM continuumio/miniconda3:latest
2 | RUN pip install bioimageio-chatbot
3 |
4 | EXPOSE 9000
--------------------------------------------------------------------------------
/docs/Elixir-Presentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/Elixir-Presentation.pdf
--------------------------------------------------------------------------------
/docs/example-data/nuclei.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/example-data/nuclei.tif
--------------------------------------------------------------------------------
/docs/screenshots/bia-tutor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/bia-tutor.png
--------------------------------------------------------------------------------
/docs/screenshots/channels.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/channels.png
--------------------------------------------------------------------------------
/docs/screenshots/chatbot-hi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-hi.png
--------------------------------------------------------------------------------
/docs/screenshots/extensions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/extensions.png
--------------------------------------------------------------------------------
/docs/screenshots/gpts_json.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/gpts_json.png
--------------------------------------------------------------------------------
/docs/screenshots/web-search.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/web-search.png
--------------------------------------------------------------------------------
/docs/screenshots/bia-executor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/bia-executor.png
--------------------------------------------------------------------------------
/docs/screenshots/more-details.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/more-details.png
--------------------------------------------------------------------------------
/docs/screenshots/role_create.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/role_create.png
--------------------------------------------------------------------------------
/docs/screenshots/search-biii.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/search-biii.png
--------------------------------------------------------------------------------
/docs/screenshots/user-profile.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/user-profile.png
--------------------------------------------------------------------------------
/docs/screenshots/SimilarityScore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/SimilarityScore.png
--------------------------------------------------------------------------------
/docs/screenshots/chat-interface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chat-interface.png
--------------------------------------------------------------------------------
/docs/screenshots/configure_gpt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/configure_gpt.png
--------------------------------------------------------------------------------
/docs/screenshots/direct-response.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/direct-response.png
--------------------------------------------------------------------------------
/docs/screenshots/respond_to_user.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/respond_to_user.png
--------------------------------------------------------------------------------
/docs/screenshots/retrieval-text.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/retrieval-text.png
--------------------------------------------------------------------------------
/docs/screenshots/screenshot-hi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/screenshot-hi.png
--------------------------------------------------------------------------------
/docs/screenshots/chatbot-animation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-animation.gif
--------------------------------------------------------------------------------
/docs/screenshots/chatbot-biologist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-biologist.png
--------------------------------------------------------------------------------
/docs/screenshots/chatbot-developer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-developer.png
--------------------------------------------------------------------------------
/docs/screenshots/gpts_landing_page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/gpts_landing_page.png
--------------------------------------------------------------------------------
/docs/screenshots/search-model-zoo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/search-model-zoo.png
--------------------------------------------------------------------------------
/docs/screenshots/select-extension.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/select-extension.png
--------------------------------------------------------------------------------
/docs/screenshots/chatbot-model-upload.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-model-upload.png
--------------------------------------------------------------------------------
/docs/screenshots/chatbot-wsi-pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-wsi-pipeline.png
--------------------------------------------------------------------------------
/docs/screenshots/search-bioimage-docs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/search-bioimage-docs.png
--------------------------------------------------------------------------------
/docs/screenshots/search-image-forum.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/search-image-forum.png
--------------------------------------------------------------------------------
/docs/screenshots/customization1_student.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/customization1_student.png
--------------------------------------------------------------------------------
/docs/screenshots/customization_biotool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/customization_biotool.png
--------------------------------------------------------------------------------
/docs/screenshots/chatbot-community-partner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-community-partner.png
--------------------------------------------------------------------------------
/docs/screenshots/customization2_developer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/customization2_developer.png
--------------------------------------------------------------------------------
/docs/screenshots/script-gen-exe-retrieval.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/script-gen-exe-retrieval.png
--------------------------------------------------------------------------------
/docs/screenshots/search-bioimage-archive.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/search-bioimage-archive.png
--------------------------------------------------------------------------------
/docs/screenshots/similarity-score-results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/similarity-score-results.png
--------------------------------------------------------------------------------
/bioimageio_chatbot/tools.py:
--------------------------------------------------------------------------------
1 |
2 | # example function
3 | def example_function(address):
4 | pass
5 |
6 |
7 | TOOL_MAP = {"example_function": example_function}
--------------------------------------------------------------------------------
/docs/screenshots/chatbot-channel-bioimageio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-channel-bioimageio.png
--------------------------------------------------------------------------------
/docs/screenshots/chatbot-technical-question.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-technical-question.png
--------------------------------------------------------------------------------
/docs/screenshots/chatbot-channel-scikit-image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-channel-scikit-image.png
--------------------------------------------------------------------------------
/docs/screenshots/chatgpt-vs-bioimageiochatbot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatgpt-vs-bioimageiochatbot.png
--------------------------------------------------------------------------------
/tests/__pycache__/test_chatbot.cpython-39-pytest-7.2.1.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/tests/__pycache__/test_chatbot.cpython-39-pytest-7.2.1.pyc
--------------------------------------------------------------------------------
/tests/__pycache__/test_chatbot.cpython-310-pytest-7.4.2.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/tests/__pycache__/test_chatbot.cpython-310-pytest-7.4.2.pyc
--------------------------------------------------------------------------------
/scripts/publish.sh:
--------------------------------------------------------------------------------
1 | pip install -U twine
2 | pip install -U wheel
3 | python3 -m pip install --upgrade build
4 | rm -rf ./build
5 | rm ./dist/*
6 | python3 -m build
7 | twine upload dist/*
8 | rm -rf ./build
--------------------------------------------------------------------------------
/docs/_sidebar.md:
--------------------------------------------------------------------------------
1 |
2 | * [Overview](/README)
3 | * [Installation](/installation)
4 | * [Usage guide](/usage-example)
5 | * [Technical Overview](/technical-overview)
6 | * [Extension Development](/development)
7 | * [Contribution Guidelines](/CONTRIBUTING)
8 | * [Use Cases](/figure-2-use-cases)
9 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | schema-agents>=0.1.59
2 | hypha-rpc==0.20.38
3 | requests
4 | pypdf
5 | pillow
6 | matplotlib
7 | hypha==0.20.38.post19
8 | tqdm
9 | aiofiles
10 | serpapi
11 | langchain>=0.1.11
12 | langchain-community==0.0.27
13 | langchain-core==0.1.31
14 | beautifulsoup4
15 | pandas
16 | duckduckgo-search==6.1.5
17 | langchain-openai==0.0.8
18 | rank-bm25==0.2.2
19 | html2text==2020.1.16
20 | setuptools
21 |
--------------------------------------------------------------------------------
/tests/test_knowledge_base.py:
--------------------------------------------------------------------------------
1 | from langchain_community.vectorstores import FAISS
2 | from langchain_openai import OpenAIEmbeddings
3 |
4 | def test_knowledge_base():
5 | """Test the knowledge base"""
6 | vectordb = FAISS.load_local(folder_path="./bioimageio-knowledge-base", index_name="bioimage.io", embeddings=OpenAIEmbeddings(), allow_dangerous_deserialization=True)
7 | retriever = vectordb.as_retriever(score_threshold=0.4)
8 | items = retriever.get_relevant_documents("community partner", verbose=True)
9 | assert len(items) > 0
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel"]
3 |
4 | [project]
5 | name = "bioimageio-chatbot"
6 | version = "0.2.13"
7 | readme = "README.md"
8 | description = "Your Personal Assistant in Computational BioImaging."
9 | dependencies = [
10 | "schema-agents>=0.1.59",
11 | "hypha-rpc>=0.20.38",
12 | "requests",
13 | "pypdf",
14 | "pillow",
15 | "matplotlib",
16 | "hypha>=0.20.38.post19",
17 | "tqdm",
18 | "aiofiles",
19 | "langchain>=0.1.6",
20 | "beautifulsoup4",
21 | "pandas",
22 | "duckduckgo-search>=6.1.5",
23 | "rank-bm25",
24 | "langchain-openai",
25 | "langchain-core>=0.1.31",
26 | "langchain-community>=0.0.27",
27 | "html2text",
28 | ]
29 |
30 | [tool.setuptools]
31 | include-package-data = true
32 |
33 | [tool.setuptools.packages.find]
34 | include = ["bioimageio_chatbot*"]
35 | exclude = ["tests*", "scripts*"]
36 |
37 | [options.entry_points]
38 | console_scripts = [
39 | "bioimageio-chatbot = bioimageio_chatbot.__main__:main",
40 | ]
41 |
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | name: Publish to PyPI
2 |
3 | on:
4 | workflow_dispatch:
5 | # Optional inputs, you can add more according to your needs
6 | inputs:
7 | # version:
8 | # description: 'Version of the package to release'
9 | # required: true
10 | # default: '1.0.0'
11 |
12 | jobs:
13 | publish:
14 | runs-on: ubuntu-latest
15 | steps:
16 | - name: Check out code
17 | uses: actions/checkout@v2
18 |
19 | # Add steps for any necessary setup, like installing dependencies
20 | - name: Build
21 | run: |
22 | python -m pip install --upgrade pip
23 | python -m pip install -U twine
24 | python -m pip install -U wheel
25 | python3 -m pip install build==1.0.3 # pin build
26 | rm -rf ./build
27 | rm -rf ./dist/*
28 | python3 -m build
29 |
30 | - name: Publish to PyPI
31 | uses: pypa/gh-action-pypi-publish@master
32 | with:
33 | user: __token__
34 | password: ${{ secrets.PYPI_API_TOKEN }}
35 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 BioImage.IO
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | on:
2 | push:
3 | branches:
4 | - main
5 |
6 | name: Build
7 |
8 | permissions:
9 | contents: read
10 |
11 | jobs:
12 | build:
13 | permissions:
14 | contents: write # for Git to git push
15 | name: Build
16 | runs-on: ubuntu-latest
17 | steps:
18 | - name: Checkout code
19 | uses: actions/checkout@v3
20 | - name: Setup conda
21 | uses: s-weigand/setup-conda@v1
22 | with:
23 | update-conda: true
24 | python-version: 3.9
25 | conda-channels: anaconda, conda-forge
26 | - name: Install dependencies
27 | run: |
28 | python -m pip install --upgrade pip
29 | python -m pip install -r requirements.txt
30 | - name: Trigger workflow at bioimage.io
31 | uses: benc-uk/workflow-dispatch@v1
32 | with:
33 | workflow: build-site.yml
34 | repo: bioimage-io/bioimage.io
35 | inputs: '{}'
36 | # Required when using the `repo` option. Either a PAT or a token generated from the GitHub app or CLI
37 | token: "${{ secrets.PAT_TOKEN_WORKFLOW }}"
38 | - name: Run Init
39 | run: python -m bioimageio_chatbot init
40 | - name: Test server
41 | env:
42 | OPENAI_API_KEY: sk-xxxxxxxx
43 | run: python -m bioimageio_chatbot start-server --host=0.0.0.0 --port=9000 & wget http://127.0.0.1:9000/
44 |
45 | # - name: Build vector database
46 | # if: github.event_name == 'push' && github.ref == 'refs/heads/main'
47 | # run: |
48 | # python -m bioimageio_chatbot create-knowledge-base --output-dir=./dist/bioimageio-knowledge-base
49 | # env:
50 | # OPENAI_API_KEY: ${{ secrets.OPENAI_SECRET_API_KEY }}
51 | # - name: Deploy
52 | # if: github.event_name == 'push' && github.ref == 'refs/heads/main'
53 | # uses: peaceiris/actions-gh-pages@v3
54 | # with:
55 | # github_token: ${{ secrets.GITHUB_TOKEN }}
56 | # publish_dir: ./dist
57 |
--------------------------------------------------------------------------------
/bioimageio_chatbot/gpts_action.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from openai import AsyncOpenAI
3 | from bioimageio_chatbot.chatbot_extensions import extension_to_tools
4 | from schema_agents.utils.schema_conversion import get_service_openapi_schema
5 | from hypha_rpc import login, connect_to_server
6 |
7 | client = AsyncOpenAI()
8 |
9 | async def convert_extensions(builtin_extensions):
10 | extension_services = {}
11 | for extension in builtin_extensions:
12 | tools = await extension_to_tools(extension)
13 | for tool in tools:
14 | extension_services[tool.__name__] = tool
15 | return extension_services
16 |
17 | async def serve_actions(server, server_url, builtin_extensions):
18 | extension_services = await convert_extensions(builtin_extensions)
19 | svc = {
20 | "id": "bioimageio-chatbot-extensions-api",
21 | "name": "BioImage.io Chatbot Extensions",
22 | "description": "A collection of chatbot extensions for facilitate user interactions with external documentation, services and tools.",
23 | "config": {
24 | "visibility": "public",
25 | "require_context": False
26 | },
27 | }
28 | svc.update(extension_services)
29 | workspace = server.config['workspace']
30 | service_id = "bioimageio-chatbot-extensions-api"
31 | openapi_schema = get_service_openapi_schema(svc, f"{server_url}/{workspace}/services/{service_id}")
32 | svc["get_openapi_schema"] = lambda : openapi_schema
33 |
34 | service_info = await server.register_service(svc)
35 | print(f"Service registered, openapi schema: {server_url}/services/call?service_id={service_info['id']}&function_key=get_openapi_schema")
36 |
37 | async def start_server(server_url):
38 | token = await login({"server_url": server_url})
39 | server = await connect_to_server({"server_url": server_url, "token": token, "method_timeout": 100})
40 | print(f"Connected to server: {server_url}")
41 | await serve_actions(server, server_url)
42 |
43 |
44 | if __name__ == "__main__":
45 | server_url = "https://staging.chat.bioimage.io/"
46 | loop = asyncio.get_event_loop()
47 | loop.create_task(start_server(server_url))
48 | loop.run_forever()
--------------------------------------------------------------------------------
/docs/beta-testing-guidelines.md:
--------------------------------------------------------------------------------
1 | # BioImage.IO Chatbot Beta Testing Guidelines
2 |
3 | Thank you for participating in the beta testing phase of the BioImage.IO Chatbot! Your feedback is invaluable in helping us improve and refine the chatbot. Please follow these guidelines to ensure a smooth testing experience:
4 |
5 | ## Prerequisites
6 |
7 | 1. Please Read the [Disclaimer for BioImage.IO Chatbot](./DISCLAIMER.md).
8 | 2. **Login with Your Email Account:** First, please create an account [here](https://ai.imjoy.io/public/apps/hypha-login/). Then use the chatbot via [https://bioimage.io/chat](https://bioimage.io/chat). If you have any issues with the login, please contact us.
9 |
10 | ## Beta Testing Steps
11 |
12 | 1. **Access the Chatbot:**
13 | - Explore the example usage guide: [Usage Example Guide](https://github.com/bioimage-io/bioimageio-chatbot/blob/main/docs/usage-example.md)
14 | - Access the BioImage.IO Chatbot through the link we provided via email.
15 |
16 | 2. **Testing Scenarios:**
17 | - Try to edit your profile which mostly suitable for you.
18 | - Test the chatbot with various scenarios relevant to your field (biologist, developer, etc.).
19 | - Try different knowledge base channels and observe the responses.
20 |
21 | 3. **Provide Feedback:**
22 | - For a specific chatbot response, you can also like/dislike a response from the BioImage.IO Chatbot by clicking the thumbs up/down button under each message. Then, a pop up will appear for you to provide feedback on the response.
23 | - For general feedback, please use the "Feedback" button on the bottom of the chatbot interface. Feel free to comment on the current chat session, or provide general feedbacks or ideas. E.g. we would be happy to hear about your experience and ideas on how to improve the chatbot.
24 | - If you encounter any bugs, issues, or unexpected behavior, please use the [Github issues](https://github.com/bioimage-io/bioimageio-chatbot/issues) section to create a new issue.
25 |
26 | 4. **Community Contributions:**
27 | - If you have data sources or documentation relevant to the chatbot, feel free to propose community contributions.
28 |
29 | ## Conclusion
30 |
31 | Thank you for your valuable contribution to the BioImage.IO Chatbot beta testing! Your input helps us enhance the functionality and user experience for the entire community.
32 |
--------------------------------------------------------------------------------
/docs/creating_GPTs.md:
--------------------------------------------------------------------------------
1 | # Creating OpenAI GPTs with Chatbot Extensions
2 |
3 | ## Introduction
4 | In addition to standalone usage, the BioImage.IO Chatbot supports porting extensions to OpenAI custom [GPTs](https://chat.openai.com/gpts) for users with OpenAI accounts. Chatbot extensions following the development model specified in the [development guidelines](./development.md) and [notebook tutorial](./bioimage-chatbot-extension-tutorial.ipynb) are automatically converted to `openapi` schema which can be used to create OpenAI GPTs using the online GPT creator.
5 |
6 | `openapi` schemas for extensions are generated on Chatbot server startup via the `register_service` function in [gpts_action.py](../bioimageio_chatbot/gpts_action.py). These schemas are then made available for OpenAI GPT creator import directly via url. This process for creating a custom GPT from the public BioImage.IO Chatbot instance extensions is shown below. Users are encouraged to submit their extensions to the BioImage.IO team for incorporation into the public Chatbot instance.
7 |
8 | Note that GPT actions are run through the hosted server instance (chat.bioimage.io in the case of the public Chatbot instance). Also note that the creation of custom OpenAI GPTs requires a paid OpenAI account.
9 |
10 | ## Creating a Custom GPT from the public Chatbot Instance
11 | The public Chatbot instance's `openapi` extension schema are available at the following link: `https://chat.bioimage.io/public/services/bioimageio-chatbot-extensions-api/get_openapi_schema`
12 |
13 | After logging in to their OpenAI accounts, users can navigate to the GPTs [page](https://chat.openai.com/gpts) and click `Create` as shown below:
14 |
15 | 
16 |
17 | To add GPT actions from Chatbot extensions, navigate to the `Configure` tab and select `Create new action`:
18 |
19 | 
20 |
21 | The Chatbot-generated `openapi` schema can then be imported direct by selecting `Import from URL` and inputting the public Chatbot's extension schema `https://chat.bioimage.io/public/services/bioimageio-chatbot-extensions-api/get_openapi_schema`
22 |
23 | Users can edit the JSON content to select individual actions from the Chatbot extensions if desired:
24 |
25 | 
26 |
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/docs/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # BioImage.IO Chatbot Contribution Guidelines
2 |
3 | Thank you for your interest in contributing to the BioImage.IO Chatbot. Your contributions help us enhance the chatbot's knowledge base and provide more accurate and detailed responses. This document outlines how you can contribute new databases or information for retrieval by the chatbot.
4 |
5 | ## Introduction
6 |
7 | The BioImage.IO Chatbot relies on a comprehensive knowledge base to provide accurate responses to user queries. We encourage contributions to expand this knowledge base by adding new databases, information, or resources. Whether you're a researcher, developer, or domain expert, your contributions can help improve the chatbot's functionality.
8 |
9 | ## Contribution Process
10 | ### Knowledge Base
11 |
12 | You can contribute to the chatbot's knowledge base by adding new databases or information.
13 |
14 | We use the [`knowledge-base-manifest.yaml`](../knowledge-base-manifest.yaml) file to keep track of the databases and their details.
15 |
16 | Follow these steps to contribute to the BioImage.IO Chatbot:
17 |
18 | 1. Take a look at the [`knowledge-base-manifest.yaml`](../knowledge-base-manifest.yaml) file to see the databases that are currently integrated with the chatbot. The existing data sources are markdown files hosted on github, json files etc.
19 | 2. Prepare your database by organising your information to ensure it is accurate, relevant, and structured in a manner that can be easily retrived. You can find some URLs for the existing data sources, please use those as examples.
20 | 3. Fork this repository and edit the manifest to include the details of your database, including the name, URL and description.
21 | 4. You can submit your contribution with a Pull Request (PR) with the updated manifest. Our team will review and integrate the changes.
22 | 5. Once your contribution is accepted and the chatbot's knowledge is updated, test that the chatbot is accurate on its responses when retrieving information from your database.
23 |
24 | Remember that, in any step of the process you can contact us to look for feedback or assistance. We deeply appreciate your contribution!
25 |
26 | ### Develop Custom Extenstion
27 |
28 | The BioImage.IO Chatbot offers a framework designed for easy extensibility, allowing developers to enrich its capabilities with custom extensions. Please check details on how to contribute to the chatbot by developing custom extension [`Developing Chatbot Extensions`](./development.md).
29 |
30 |
31 | ## Contact Us
32 |
33 | If you have any questions, need assistance, or want to contribute to the chatbot's knowledge base, please don't hesitate to contact us via [Github issues](https://github.com/bioimage-io/bioimageio-chatbot/issues). Our team is here to help you get started and make valuable contributions.
34 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | quota_manager.db
2 | # Byte-compiled / optimized / DLL files
3 | __pycache__/
4 | *.py[cod]
5 | *$py.class
6 | tests/*.pyc
7 | *.csv
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | build/
14 | develop-eggs/
15 | data/
16 | dist/
17 | chat_sessions/
18 |
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | wheels/
28 | share/python-wheels/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 | MANIFEST
33 |
34 | # PyInstaller
35 | # Usually these files are written by a python script from a template
36 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 |
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 |
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .nox/
48 | .coverage
49 | .coverage.*
50 | .cache
51 | nosetests.xml
52 | coverage.xml
53 | *.cover
54 | *.py,cover
55 | .hypothesis/
56 | .pytest_cache/
57 | cover/
58 |
59 | # Translations
60 | *.mo
61 | *.pot
62 |
63 | # Django stuff:
64 | *.log
65 | local_settings.py
66 | db.sqlite3
67 | db.sqlite3-journal
68 |
69 | # Flask stuff:
70 | instance/
71 | .webassets-cache
72 |
73 | # Scrapy stuff:
74 | .scrapy
75 |
76 | # Sphinx documentation
77 | docs/_build/
78 |
79 | # PyBuilder
80 | .pybuilder/
81 | target/
82 |
83 | # Jupyter Notebook
84 | .ipynb_checkpoints
85 |
86 | # IPython
87 | profile_default/
88 | ipython_config.py
89 |
90 | # pyenv
91 | # For a library or package, you might want to ignore these files since the code is
92 | # intended to run in multiple environments; otherwise, check them in:
93 | # .python-version
94 |
95 | # pipenv
96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
99 | # install all needed dependencies.
100 | #Pipfile.lock
101 |
102 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
103 | __pypackages__/
104 |
105 | # Celery stuff
106 | celerybeat-schedule
107 | celerybeat.pid
108 |
109 | # SageMath parsed files
110 | *.sage.py
111 |
112 | # Environments
113 | .env
114 | .venv
115 | env/
116 | venv/
117 | ENV/
118 | env.bak/
119 | venv.bak/
120 |
121 | # Spyder project settings
122 | .spyderproject
123 | .spyproject
124 |
125 | # Rope project settings
126 | .ropeproject
127 |
128 | # mkdocs documentation
129 | /site
130 |
131 | # mypy
132 | .mypy_cache/
133 | .dmypy.json
134 | dmypy.json
135 |
136 | # Pyre type checker
137 | .pyre/
138 |
139 | # pytype static type analyzer
140 | .pytype/
141 |
142 | # Cython debug symbols
143 | cython_debug/
144 |
145 | # Visual Studio Code
146 | .vscode/
147 |
148 | # others
149 | *.bin
150 | logs/
151 | bioimageio-knowledge-base/
152 | .DS_Store
153 | chat_logs
154 | authorized_users.json
155 | .pypirc
156 |
--------------------------------------------------------------------------------
/bioimageio_chatbot/static/imagej-js-extension.imjoy.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | [TODO: write documentation for this plugin.]
4 |
5 |
6 |
7 | {
8 | "name": "ImageJ.JS Chatbot Extension",
9 | "type": "web-worker",
10 | "tags": [],
11 | "ui": "",
12 | "version": "0.1.0",
13 | "cover": "",
14 | "description": "Run ImageJ.JS macro in the chatbot",
15 | "icon": "extension",
16 | "inputs": null,
17 | "outputs": null,
18 | "api_version": "0.1.8",
19 | "env": "",
20 | "permissions": [],
21 | "requirements": [],
22 | "dependencies": []
23 | }
24 |
25 |
26 |
99 |
--------------------------------------------------------------------------------
/bioimageio_chatbot/evaluation.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel, Field
2 | from schema_agents.schema import Message
3 | from typing import Any, Dict, List, Optional, Union
4 | from schema_agents.role import Role
5 |
6 | class EvaluationCriteria(BaseModel):
7 | relevance_and_accuracy: str = Field(
8 | default="Score 0-100: 0 means completely irrelevant, providing no useful information. "
9 | "100 means the answer is comprehensive, accurate, and closely matches the reference answer.",
10 | description="Assess how relevant and accurate the chatbot's answer is compared to the reference answer."
11 | )
12 | coverage_of_key_points: str = Field(
13 | default="Baseline >60: Answers covering the main points from the reference should score above 60, "
14 | "indicating they address the primary aspects of the question.",
15 | description="Evaluate whether the chatbot's answer includes the main points mentioned in the reference answer."
16 | )
17 | additional_information: str = Field(
18 | default="Variable Impact: Additional helpful information can increase the score. "
19 | "Irrelevant or unhelpful information should lead to a reduced score.",
20 | description="Assess the impact of additional information not present in the reference answer."
21 | )
22 | evaluation_guidelines: str = Field(
23 | default="Apply scoring criteria consistently and impartially. "
24 | "Provide justification for scores, especially for significant deviations from the baseline.",
25 | description="Guidelines for objective and transparent evaluation."
26 | )
27 |
28 | class EvalInput(BaseModel):
29 | """Input for evaluating scores of LLM-based system."""
30 | question: str = Field(description="The question that was asked.")
31 | reference_answer: str = Field(description="The answer that was expected.")
32 | llm_answer: str = Field(description="The answer that was generated by the LLM-based system.")
33 |
34 | class EvalScores(BaseModel):
35 | """Scores of evaluating llm answer."""
36 | criteria: EvaluationCriteria = Field(description="Criteria for evaluating the performance of the LLM-based system.")
37 | similarity_score: float = Field(description="Following the criteria, access the llm_answer. Float between 0 and 100 representing the similarity score. ")
38 |
39 | def create_eval_agent():
40 | async def bot_answer_evaluate(req: EvalInput, role: Role) -> EvalScores:
41 | """Return the answer to the question."""
42 | response = await role.aask(req, EvalScores)
43 | return response
44 |
45 | eval_bot = Role(
46 | name="Thomas",
47 | profile="Evaluator",
48 | goal="Evaluate the performance of the LLM-based system.",
49 | constraints=None,
50 | actions=[bot_answer_evaluate],
51 | model="gpt-4-1106-preview"
52 | )
53 | return eval_bot
54 |
55 | async def evaluate(question, reference_answer, llm_answer):
56 | eval_bot = create_eval_agent()
57 | eval_input = EvalInput(question=question, reference_answer=reference_answer, llm_answer=llm_answer)
58 | scores = await eval_bot.handle(Message(content=eval_input.model_dump_json(), data=eval_input, role="User"))
59 | similarity_score = scores[0].data.similarity_score
60 | return similarity_score
61 |
--------------------------------------------------------------------------------
/bioimageio_chatbot/chatbot_extensions/web_search_extension/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from schema_agents import schema_tool
3 | from bioimageio_chatbot.utils import ChatbotExtension
4 | from pydantic import Field
5 | from typing import Optional
6 |
7 | import httpx
8 | from bs4 import BeautifulSoup
9 |
10 | from .langchain_websearch import LangchainCompressor
11 |
12 | default_langchain_compressor = None
13 |
14 | @schema_tool
15 | async def search_web(query: str=Field(description="space separated keywords for the duckduckgo search engine"), max_results: int = Field(description="maximum number of results to return")):
16 | """Search the web for information using duckduckgo."""
17 | from duckduckgo_search import AsyncDDGS
18 | query = query.strip("\"'")
19 | results = await AsyncDDGS(proxy=None).atext(query, region='wt-wt', safesearch='moderate', timelimit=None,
20 | max_results=max_results)
21 | if not results:
22 | return "No relevant information found."
23 | docs = []
24 | for d in results:
25 | docs.append({"title": d['title'], "body": d['body'], "url": d['href']})
26 | return docs
27 |
28 | @schema_tool
29 | async def browse_web_pages(query: str=Field(description="keywords or a sentence describing the information to be retrieved"), urls: list[str]=Field(description="list of web page urls to analyse"), num_results_to_process: Optional[int]=Field(5, description="number of results to process")):
30 | """Read web pages and return compressed documents with most relevant information."""
31 | global default_langchain_compressor
32 | default_langchain_compressor = default_langchain_compressor or LangchainCompressor(device="cpu")
33 |
34 | documents = await default_langchain_compressor.faiss_embedding_query_urls(query, urls,
35 | num_results=num_results_to_process)
36 |
37 | if not documents: # Fall back to old simple search rather than returning nothing
38 | print("LLM_Web_search | Could not find any page content "
39 | "similar enough to be extracted, using basic search fallback...")
40 | return "No relevant information found."
41 | #return the json serializable documents
42 | return [doc.page_content + '\nsource: ' + doc.metadata.get('source') for doc in documents]
43 |
44 | @schema_tool
45 | async def read_webpage(url: str=Field(description="the web url to read")) -> str:
46 | """Read the full content of a web page converted to plain text."""
47 | headers = {
48 | "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0",
49 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
50 | "Accept-Language": "en-US,en;q=0.5"
51 | }
52 |
53 | async with httpx.AsyncClient() as client:
54 | response = await client.get(url, headers=headers)
55 |
56 | soup = BeautifulSoup(response.content, features="lxml")
57 | for script in soup(["script", "style"]):
58 | script.extract()
59 |
60 | strings = soup.stripped_strings
61 | return '\n'.join([s.strip() for s in strings])
62 |
63 |
64 | def get_extension():
65 | return ChatbotExtension(
66 | id="web",
67 | name="Search Web",
68 | description="Search the web for information using duckduckgo. Search by keywords and returns a list of relevant documents.",
69 | tools=dict(search=search_web, browse=browse_web_pages)
70 | )
71 |
--------------------------------------------------------------------------------
/bioimageio_chatbot/__main__.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import argparse
3 | import asyncio
4 | import subprocess
5 | import os
6 | from bioimageio_chatbot.knowledge_base import load_knowledge_base
7 |
8 | def start_server(args):
9 | if args.login_required:
10 | os.environ["BIOIMAGEIO_LOGIN_REQUIRED"] = "true"
11 | else:
12 | os.environ["BIOIMAGEIO_LOGIN_REQUIRED"] = "false"
13 | # get current file path so we can get the path of apps under the same directory
14 | current_dir = os.path.dirname(os.path.abspath(__file__))
15 | command = [
16 | sys.executable,
17 | "-m",
18 | "hypha.server",
19 | f"--host={args.host}",
20 | f"--port={args.port}",
21 | f"--public-base-url={args.public_base_url}",
22 | f"--static-mounts=/chat:{current_dir}/static",
23 | "--startup-functions=bioimageio_chatbot.chatbot:register_chat_service"
24 | ]
25 | subprocess.run(command)
26 |
27 | def connect_server(args):
28 | from bioimageio_chatbot.chatbot import connect_server
29 | if args.login_required:
30 | os.environ["BIOIMAGEIO_LOGIN_REQUIRED"] = "true"
31 | else:
32 | os.environ["BIOIMAGEIO_LOGIN_REQUIRED"] = "false"
33 | server_url = args.server_url
34 | loop = asyncio.get_event_loop()
35 | loop.create_task(connect_server(server_url))
36 | loop.run_forever()
37 |
38 | def create_knowledge_base(args):
39 | from bioimageio_chatbot.knowledge_base import create_vector_knowledge_base
40 | create_vector_knowledge_base(args.output_dir)
41 |
42 | def init(args):
43 | knowledge_base_path = os.environ.get("BIOIMAGEIO_KNOWLEDGE_BASE_PATH", "./bioimageio-knowledge-base")
44 | assert knowledge_base_path is not None, "Please set the BIOIMAGEIO_KNOWLEDGE_BASE_PATH environment variable to the path of the knowledge base."
45 | if not os.path.exists(knowledge_base_path):
46 | print(f"The knowledge base is not found at {knowledge_base_path}, will download it automatically.")
47 | os.makedirs(knowledge_base_path, exist_ok=True)
48 | docs_store_dict = load_knowledge_base(knowledge_base_path)
49 |
50 | print("Databases loaded in the knowledge base:")
51 | for key in docs_store_dict.keys():
52 | print(f" - {key}")
53 |
54 | def main():
55 | parser = argparse.ArgumentParser(description="BioImage.IO Chatbot utility commands.")
56 |
57 | subparsers = parser.add_subparsers()
58 |
59 | # Init command
60 | parser_init = subparsers.add_parser("init")
61 | parser_init.set_defaults(func=init)
62 |
63 | # Start server command
64 | parser_start_server = subparsers.add_parser("start-server")
65 | parser_start_server.add_argument("--host", type=str, default="0.0.0.0")
66 | parser_start_server.add_argument("--port", type=int, default=9000)
67 | parser_start_server.add_argument("--public-base-url", type=str, default="")
68 | parser_start_server.add_argument("--login-required", action="store_true")
69 | parser_start_server.set_defaults(func=start_server)
70 |
71 | # Connect server command
72 | parser_connect_server = subparsers.add_parser("connect-server")
73 | parser_connect_server.add_argument("--server-url", default="https://ai.imjoy.io")
74 | parser_connect_server.add_argument("--login-required", action="store_true")
75 | parser_connect_server.set_defaults(func=connect_server)
76 |
77 | # Create knowledge base command
78 | parser_create_kb = subparsers.add_parser("create-knowledge-base")
79 | parser_create_kb.add_argument("--output-dir", default="./bioimageio-knowledge-base")
80 | parser_create_kb.set_defaults(func=create_knowledge_base)
81 |
82 | args = parser.parse_args()
83 | if hasattr(args, 'func'):
84 | args.func(args)
85 | else:
86 | parser.print_help()
87 |
88 | if __name__ == '__main__':
89 | main()
--------------------------------------------------------------------------------
/docs/DISCLAIMER.md:
--------------------------------------------------------------------------------
1 | # Disclaimer for BioImage.IO Chatbot
2 |
3 | ## Research Purpose and Use of Copyrighted Material
4 |
5 | The BioImage.IO Chatbot ("Chatbot") is part of a research project focused on Text and Data Mining (TDM) to support advancements in bioimage analysis. The primary purpose of this Chatbot is to assist users in navigating resources, tools, and workflows related to bioimage analysis for research purposes. In compliance with the European Union's copyright exception on TDM as outlined in [Directive (EU) 2019/790](https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A32019L0790), the Chatbot utilizes copyrighted materials to which we have lawful access. These materials are used exclusively for non-commercial research purposes.
6 |
7 | ## Evaluation-Only Use of Source Code and Live Demo
8 |
9 | The BioImage.IO Chatbot's source code and live demo instance are provided solely for evaluation purposes. These services are intended to support ongoing research in Text and Data Mining and are not intended for production use or commercial purposes. Users are encouraged to explore the capabilities of the Chatbot but should refrain from deploying it in any production environment or for commercial gain.
10 |
11 | ## General Usage
12 |
13 | While we strive for accuracy, the Chatbot is not a substitute for professional advice, consultation, diagnosis, or any kind of formal scientific interpretation. Users should independently verify the accuracy and completeness of the information provided by the Chatbot.
14 |
15 | ## No Warranties
16 |
17 | The Chatbot service is provided "as is" and "as available" without any warranties of any kind, either express or implied, including but not limited to the implied warranties of merchantability, fitness for a particular purpose, or non-infringement. We make no warranty that the service will meet your requirements or be available on an uninterrupted, secure, or error-free basis.
18 |
19 | ## Liability
20 |
21 | Under no circumstances will we be liable for any loss or damage incurred as a result of the use of this Chatbot, including but not limited to any errors or omissions in the content, any unauthorized access to or use of our servers, or any loss of data or profits.
22 |
23 | ## User Responsibility
24 |
25 | The user assumes all responsibility and risk for the use of this Chatbot. It is the user's responsibility to evaluate the accuracy, completeness, or usefulness of any information, opinion, or content available through the Chatbot service. Users are reminded to carefully check with the original sources and to respect the respective licenses of any copyrighted materials. If you are the author of any material used by the Chatbot and wish to opt-out, please contact us via [this form](https://oeway.typeform.com/to/K3j2tJt7).
26 |
27 | ## Third-Party Links
28 |
29 | The Chatbot may provide links to external websites or resources for your convenience. We have no control over these sites and resources, and we are not responsible for their availability, reliability, or the content provided.
30 |
31 | ## Data Privacy
32 |
33 | User interactions with the Chatbot may be stored for analysis and improvement of the service. All data will be handled in accordance with our Privacy Policy.
34 |
35 | ## Privacy Policy
36 |
37 | The personal data you may provide will be used to disseminate information pertaining to the execution of the Horizon Europe Funded AI4Life project (Grant number: 101057970). In accordance with the Grant Agreement, your data will be retained during the project and deleted when it has ended as soon as the retention period established by the EC is over. If you would like to update or delete your data during the course of the project, please contact us using [this form](https://oeway.typeform.com/to/K3j2tJt7?typeform-source=bioimage.io).
38 |
39 | ## Modifications
40 |
41 | We reserve the right to modify this disclaimer at any time, effective upon posting of an updated version on this website. Continued use of the Chatbot after any such changes shall constitute your consent to such changes.
42 |
--------------------------------------------------------------------------------
/bioimageio_chatbot/utils.py:
--------------------------------------------------------------------------------
1 |
2 | import requests
3 | import yaml
4 | import os
5 | from tqdm import tqdm
6 | from pydantic import BaseModel, Field
7 | from typing import Callable, Optional
8 | import typing
9 | from inspect import signature
10 | from typing import Any, Callable, Dict, Optional
11 | from bioimageio_chatbot.jsonschema_pydantic import json_schema_to_pydantic_model
12 | from schema_agents import schema_tool
13 |
14 | def get_manifest():
15 | # If no manifest is provided, download from the repo
16 | if not os.path.exists("./knowledge-base-manifest.yaml"):
17 | print("Downloading the knowledge base manifest...")
18 | response = requests.get("https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/main/knowledge-base-manifest.yaml")
19 | assert response.status_code == 200
20 | with open("./knowledge-base-manifest.yaml", "wb") as f:
21 | f.write(response.content)
22 |
23 | return yaml.load(open("./knowledge-base-manifest.yaml", "r"), Loader=yaml.FullLoader)
24 |
25 |
26 | def download_file(url, filename):
27 | response = requests.get(url, stream=True)
28 | file_size = int(response.headers.get('content-length', 0))
29 |
30 | # Initialize the progress bar
31 | progress = tqdm(response.iter_content(1024), f"Downloading {filename}", total=file_size, unit="B", unit_scale=True, unit_divisor=1024)
32 |
33 | with open(filename, 'wb') as f:
34 | for data in progress:
35 | # Update the progress bar
36 | progress.update(len(data))
37 | f.write(data)
38 |
39 |
40 | def extract_schemas(function):
41 | sig = signature(function)
42 | positional_annotation = [
43 | p.annotation
44 | for p in sig.parameters.values()
45 | if p.kind == p.POSITIONAL_OR_KEYWORD
46 | ][0]
47 | output_schemas = (
48 | [sig.return_annotation]
49 | if not isinstance(sig.return_annotation, typing._UnionGenericAlias)
50 | else list(sig.return_annotation.__args__)
51 | )
52 | input_schemas = (
53 | [positional_annotation]
54 | if not isinstance(positional_annotation, typing._UnionGenericAlias)
55 | else list(positional_annotation.__args__)
56 | )
57 | return input_schemas, output_schemas
58 |
59 | class ChatbotExtension(BaseModel):
60 | """Chatbot extension."""
61 |
62 | id: str
63 | name: str
64 | description: str
65 | tools: Optional[Dict[str, Any]] = {}
66 | get_schema: Optional[Callable] = None
67 | get_state: Optional[Callable] = None
68 | info: Optional[Dict[str, Any]] = {}
69 |
70 | class LegacyChatbotExtension(BaseModel):
71 | """A class that defines the interface for a user extension"""
72 | name: str = Field(..., description="The name of the extension")
73 | description: str = Field(..., description="A description of the extension")
74 | get_schema: Optional[Callable] = Field(None, description="A function that returns the schema for the extension")
75 | execute: Callable = Field(..., description="The extension's execution function")
76 | schema_class: Optional[BaseModel] = Field(None, description="The schema class for the extension")
77 |
78 | def convert_to_dict(obj):
79 | if isinstance(obj, BaseModel):
80 | return obj.dict()
81 | if isinstance(obj, dict):
82 | return {k: convert_to_dict(v) for k, v in obj.items()}
83 | if isinstance(obj, list):
84 | return [convert_to_dict(v) for v in obj]
85 | return obj
86 |
87 |
88 | async def legacy_extension_to_tool(extension: LegacyChatbotExtension):
89 | if extension.get_schema:
90 | schema = await extension.get_schema()
91 | extension.schema_class = json_schema_to_pydantic_model(schema)
92 | else:
93 | input_schemas, _ = extract_schemas(extension.execute)
94 | extension.schema_class = input_schemas[0]
95 |
96 | assert extension.schema_class, f"Extension {extension.name} has no valid schema class."
97 |
98 | # NOTE: Right now, the first arguments has to be req
99 | async def execute(req: extension.schema_class):
100 | print("Executing extension:", extension.name, req)
101 | # req = extension.schema_class.parse_obj(req)
102 | result = await extension.execute(req)
103 | return convert_to_dict(result)
104 |
105 | execute.__name__ = extension.name
106 |
107 | if extension.get_schema:
108 | execute.__doc__ = schema['description']
109 |
110 | if not execute.__doc__:
111 | # if extension.execute is partial
112 | if hasattr(extension.execute, "func"):
113 | execute.__doc__ = extension.execute.func.__doc__ or extension.description
114 | else:
115 | execute.__doc__ = extension.execute.__doc__ or extension.description
116 | return schema_tool(execute)
--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
--------------------------------------------------------------------------------
/bioimageio_chatbot/chatbot_extensions/__init__.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import re
3 | import pkgutil
4 | import importlib.util
5 | from pydantic import BaseModel
6 | from bioimageio_chatbot.utils import ChatbotExtension
7 | from bioimageio_chatbot.jsonschema_pydantic import json_schema_to_pydantic_model
8 | from schema_agents import schema_tool
9 |
10 | def get_builtin_extensions():
11 | extensions = []
12 | for module in pkgutil.walk_packages(__path__, __name__ + '.'):
13 | if module.name.endswith('_extension'):
14 | if hasattr(module.module_finder, 'find_module'):
15 | ext_module = module.module_finder.find_module(module.name).load_module(module.name)
16 | else:
17 | # for newer python versions, find_spec is used instead of find_module
18 | module_spec = importlib.util.find_spec(module.name)
19 | ext_module = importlib.util.module_from_spec(module_spec)
20 | module_spec.loader.exec_module(ext_module)
21 | exts = ext_module.get_extension() or []
22 | if isinstance(exts, ChatbotExtension):
23 | exts = [exts]
24 | for ext in exts:
25 | if not isinstance(ext, ChatbotExtension):
26 | print(f"Failed to load chatbot extension: {module.name}.")
27 | continue
28 | if ext.id in [e.id for e in extensions]:
29 | raise ValueError(f"Extension name {ext.id} already exists.")
30 | extensions.append(ext)
31 |
32 | return extensions
33 |
34 | def convert_to_dict(obj):
35 | if isinstance(obj, BaseModel):
36 | return obj.model_dump()
37 | if isinstance(obj, dict):
38 | return {k: convert_to_dict(v) for k, v in obj.items()}
39 | if isinstance(obj, list):
40 | return [convert_to_dict(v) for v in obj]
41 | return obj
42 |
43 | def create_tool_name(ext_id, tool_id=""):
44 | text = f"{ext_id}_{tool_id}"
45 | text = text.replace("-", " ").replace("_", " ").replace(".", " ")
46 | words = re.findall(r'[A-Z]?[a-z]+|[A-Z]+(?=[A-Z]|$)|\d+', text)
47 | return ''.join(word if word.istitle() else word.capitalize() for word in words)
48 |
49 | def tool_factory(ext_id, tool_id, ext_tool, schema):
50 | input_model = json_schema_to_pydantic_model(schema)
51 | ext_tool.__name__ = create_tool_name(ext_id, tool_id)
52 | ext_tool.__doc__ = input_model.__doc__
53 | return schema_tool(ext_tool, input_model=input_model)
54 |
55 | async def extension_to_tools(extension: ChatbotExtension):
56 |
57 | if extension.get_schema:
58 | schemas = await extension.get_schema()
59 | tools = []
60 | for k in schemas:
61 | assert k in extension.tools, f"Tool `{k}` not found in extension `{extension.id}`."
62 | ext_tool = extension.tools[k]
63 | tool = tool_factory(extension.id, k, ext_tool, schemas[k])
64 | tool.__tool_id__ = k
65 | tools.append(tool)
66 | else:
67 | tools = []
68 | for k in extension.tools:
69 | ext_tool = extension.tools[k]
70 | ext_tool.__name__ = create_tool_name(extension.id, k)
71 | ext_tool.__tool_id__ = k
72 | tools.append(ext_tool)
73 |
74 | return tools
75 |
76 | async def main():
77 | extensions = get_builtin_extensions()
78 | tools = []
79 | for svc in extensions:
80 | tool = await extension_to_tools(svc)
81 | tools.append(tool)
82 | print(tools)
83 |
84 | if __name__ == "__main__":
85 | import json
86 |
87 | schema = {
88 | "type": "object",
89 | "title": "RunScript",
90 | "description": "description",
91 | "properties": {
92 | "script": {
93 | "type": "string",
94 | "description": "Python script to execute",
95 | },
96 | "inputs": {
97 | "type": "array",
98 | "description": "Input objects to be restored into the script",
99 | "items": {
100 | "type": "string",
101 | "properties": {
102 | "key": {
103 | "type": "string",
104 | "description": "Key of the object from the store to be restored",
105 | },
106 | "name": {
107 | "type": "string",
108 | "description": "Variable name of the object",
109 | }
110 | }
111 | }
112 | },
113 | "outputs": {
114 | "type": "array",
115 | "description": "Objects produced by the script as outputs or for further use",
116 | "items": {
117 | "type": "string"
118 | }
119 | }
120 | },
121 | "required": ["script", "outputs"],
122 | "allow_additional_properties": False,
123 | }
124 |
125 | model = json_schema_to_pydantic_model(schema)
126 | print(model)
127 | asyncio.run(main())
--------------------------------------------------------------------------------
/tests/test_chatbot_answer.py:
--------------------------------------------------------------------------------
1 | import os
2 | from bioimageio_chatbot.chatbot import create_customer_service, get_builtin_extensions, QuestionWithHistory, UserProfile
3 | from bioimageio_chatbot.evaluation import evaluate
4 | from schema_agents.schema import Message
5 | import json
6 | import pandas as pd
7 | import asyncio
8 | import pytest
9 |
10 | KNOWLEDGE_BASE_PATH = "./bioimageio-knowledge-base"
11 | builtin_extensions = get_builtin_extensions()
12 | extensions = [{key:value for key, value in ext.model_dump().items() if key in ["name", "description"]} for ext in builtin_extensions]
13 | customer_service = create_customer_service(builtin_extensions)
14 |
15 | dir_path = os.path.dirname(os.path.realpath(__file__))
16 |
17 | @pytest.fixture
18 | def eval_questions():
19 |
20 | eval_file = os.path.join(dir_path, "Minimal-Eval-Test-20240111.csv")
21 | if os.path.exists(eval_file):
22 | query_answer = pd.read_csv(eval_file)
23 | else:
24 | query_answer = pd.read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vTVgE2_eqBiAktHmg13jLrFrQJhbANkByY40f9vxptC6pShcjLzEuHzx93ATo0c0XcSYs9W1RRbaDdu/pub?gid=1280822572&single=true&output=csv")
25 | eval_index = range(1,10)
26 | query_answer = query_answer.iloc[eval_index]
27 |
28 | question_col = "Question"
29 | channel_id_col = "GT: Retrieved channel id"
30 | question_list = list(query_answer[question_col])
31 | reference_answer_list = list(query_answer["GPT-4-turbo Answer (With Context)- GT"])
32 | # ground_type = "Document Retrieval"
33 | # make it as list as the length equals to question_list
34 | # ground_type_list = [ground_type] * len(question_list)
35 | channel_id_list_gt = list(query_answer[channel_id_col])
36 | return question_list, reference_answer_list, channel_id_list_gt
37 |
38 |
39 | async def validate_chatbot_answer(question, reference_answer, use_tools_gt, channel_id_gt, relevance_gt, similary_score_gt):
40 | chat_history=[]
41 | profile = UserProfile(name="", occupation="", background="")
42 |
43 | m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), chatbot_extensions=extensions)
44 | resp = await customer_service.handle(Message(content=m.model_dump_json(), data=m , role="User"))
45 | # use_tools =resp[0].data.steps[0].details["use_tools"]
46 | # assert use_tools == use_tools_gt
47 | # execute_tool = resp[0].data.steps[1].name
48 | # # get the string after 'Execute: '
49 | # channel_id = execute_tool.split(": ")[1]
50 | # assert channel_id == channel_id_gt+"(docs)"
51 |
52 | # eval score
53 | # relevance = resp[0].data.steps[-1].details["relevant"]
54 | # assert relevance == relevance_gt
55 | chatbot_answer = resp[0].data.steps[-1].details['details'][0]['response']
56 | similary_score = await evaluate(question, reference_answer, chatbot_answer)
57 | assert similary_score >= similary_score_gt
58 |
59 |
60 | @pytest.mark.asyncio
61 | async def test_chatbot1(eval_questions):
62 |
63 | # await validate_chatbot_answer(
64 | # question="What is deepImageJ?",
65 | # reference_answer="DeepImageJ is a user-friendly plugin designed to facilitate the utilization of pre-trained neural networks within ImageJ and Fiji. It serves as a bridge between developers of deep-learning models and end-users in life-science applications, promoting the sharing of trained models across research groups. DeepImageJ is particularly valuable in various imaging domains and does not necessitate deep learning expertise or programming skills.",
66 | # use_tools_gt=True,
67 | # channel_id_gt="deepimagej(docs)",
68 | # relevance_gt=True,
69 | # similary_score_gt=4.0
70 | # )
71 |
72 | # await validate_chatbot_answer(
73 | # question="What is a Bioimage Model Zoo community partner?",
74 | # reference_answer="A BioImage Model Zoo community partner is an organization, company, research group, or software team that can consume and/or produce resources of the BioImage.IO model zoo. These partners continuously and openly contribute resources of their own, and they can participate in the decision-making process of the model specification. Additionally, they can show their logo in BioImage.IO, connect CI to automatically test new model compatibility with their software, and use other infrastructure features provided by BioImage.IO. The community partners can host their own Github repository for storing models and other relevant resources, which are then dynamically linked to the central repository of BioImage.IO. Each community partner is responsible for maintaining the resources that are relevant.",
75 | # use_tools_gt=True,
76 | # channel_id_gt="bioimage.io(docs)",
77 | # relevance_gt=True,
78 | # similary_score_gt=4.0
79 | # )
80 |
81 | questions, reference_answers, channel_id_list_gt = eval_questions
82 | for question, reference_answer, channel_id_gt in zip(questions, reference_answers, channel_id_list_gt):
83 | await validate_chatbot_answer(
84 | question=question,
85 | reference_answer=reference_answer,
86 | use_tools_gt=True,
87 | channel_id_gt=channel_id_gt,
88 | relevance_gt=True,
89 | similary_score_gt=80
90 | )
--------------------------------------------------------------------------------
/bioimageio_chatbot/quota.py:
--------------------------------------------------------------------------------
1 | import sqlite3
2 | import time
3 |
4 | class QuotaManager:
5 | def __init__(self, db_file=':memory:', vip_list=None, default_quota=1.0, default_reset_period='daily'):
6 | self.db_file = db_file
7 | self.conn = sqlite3.connect(self.db_file)
8 | self.vip_list = vip_list or []
9 | self.default_quota = default_quota
10 | self.default_reset_period = self.period_to_seconds(default_reset_period)
11 | self._setup_database()
12 |
13 | def _setup_database(self):
14 | cursor = self.conn.cursor()
15 | cursor.execute('''
16 | CREATE TABLE IF NOT EXISTS quotas (
17 | user_id TEXT PRIMARY KEY,
18 | quota REAL,
19 | last_reset REAL,
20 | reset_period INTEGER, -- Reset period in seconds
21 | max_quota REAL
22 | )
23 | ''')
24 | self.conn.commit()
25 |
26 | def set_user_quota(self, user_id, max_quota, reset_period):
27 | reset_seconds = self.period_to_seconds(reset_period)
28 | cursor = self.conn.cursor()
29 | cursor.execute('''
30 | INSERT OR REPLACE INTO quotas (user_id, quota, last_reset, reset_period, max_quota)
31 | VALUES (?, ?, ?, ?, ?)
32 | ''', (user_id, max_quota, time.time(), reset_seconds, max_quota))
33 | self.conn.commit()
34 |
35 | def check_quota(self, user_id):
36 | if user_id in self.vip_list:
37 | return float('inf')
38 |
39 | cursor = self.conn.cursor()
40 | cursor.execute("SELECT quota, last_reset, reset_period, max_quota FROM quotas WHERE user_id = ?", (user_id,))
41 | row = cursor.fetchone()
42 | if row:
43 | quota, last_reset, reset_period, max_quota = row
44 | if time.time() - last_reset >= reset_period:
45 | quota = max_quota
46 | last_reset = time.time()
47 | cursor.execute("UPDATE quotas SET quota = ?, last_reset = ? WHERE user_id = ?",
48 | (quota, last_reset, user_id))
49 | self.conn.commit()
50 | else:
51 | quota, last_reset, reset_period, max_quota = self.default_quota, time.time(), self.default_reset_period, self.default_quota
52 | cursor.execute("INSERT INTO quotas (user_id, quota, last_reset, reset_period, max_quota) VALUES (?, ?, ?, ?, ?)",
53 | (user_id, quota, last_reset, reset_period, max_quota))
54 | self.conn.commit()
55 | return quota
56 |
57 | def use_quota(self, user_id, amount):
58 | if self.check_quota(user_id) >= amount:
59 | cursor = self.conn.cursor()
60 | cursor.execute("UPDATE quotas SET quota = quota - ? WHERE user_id = ?",
61 | (amount, user_id))
62 | self.conn.commit()
63 | return True
64 | else:
65 | return False
66 |
67 | def reset_quota(self, user_id):
68 | cursor = self.conn.cursor()
69 | cursor.execute("UPDATE quotas SET quota = max_quota, last_reset = ? WHERE user_id = ?",
70 | (time.time(), user_id))
71 | self.conn.commit()
72 |
73 | def period_to_seconds(self, period):
74 | if period == 'monthly':
75 | return 30 * 86400
76 | elif period == 'weekly':
77 | return 7 * 86400
78 | elif period == 'daily':
79 | return 86400
80 | elif period == 'hourly':
81 | return 3600
82 | else:
83 | raise ValueError("Invalid period. Choose from 'daily', 'weekly', 'monthly'.")
84 |
85 | if __name__ == '__main__':
86 | # Testing the functionality with asserts
87 | quota_manager = QuotaManager(vip_list=['userVIP'], default_quota=2.0, default_reset_period='daily')
88 | # quota_manager.set_user_quota('user123', 1.0, 'daily')
89 | quota_manager.set_user_quota('user234', 10.0, 'weekly')
90 | quota_manager.set_user_quota('user345', 30.0, 'monthly')
91 |
92 | # Assert initial quotas are set correctly
93 | assert quota_manager.check_quota('userVIP') == float('inf'), "VIP user quota should be infinite"
94 | assert quota_manager.check_quota('user123') == 2.0, "user123 initial quota should be 2.0"
95 | assert quota_manager.check_quota('user234') == 10.0, "user234 initial quota should be 10.0"
96 | assert quota_manager.check_quota('user345') == 30.0, "user345 initial quota should be 30.0"
97 |
98 | # Assert usage and quota management
99 | assert quota_manager.use_quota('user123', 0.5) == True, "Should allow using 0.5 quota"
100 | assert quota_manager.check_quota('user123') == 2-0.5, "user123 quota after use should be 2-0.5"
101 | assert quota_manager.use_quota('user123', 3.6) == False, "Should not allow using 3.6 quota (not enough left)"
102 | assert quota_manager.use_quota('userVIP', 1000) == True, "VIP should always be allowed to use quota"
103 | assert quota_manager.check_quota('user234') == 10.0, "user234 should still have full quota"
104 |
105 | # Test manual reset
106 | quota_manager.reset_quota('user123')
107 | assert quota_manager.check_quota('user123') == 2.0, "user123 should have full quota after reset"
108 |
109 | # Display final test results
110 | print("Tests completed successfully.")
111 |
--------------------------------------------------------------------------------
/bioimageio_chatbot/chatbot_extensions/web_search_extension/langchain_websearch.py:
--------------------------------------------------------------------------------
1 | import re
2 | import asyncio
3 | from typing import Union
4 |
5 | import httpx
6 | from bs4 import BeautifulSoup
7 | from langchain_community.document_transformers import EmbeddingsRedundantFilter
8 | from langchain.retrievers.document_compressors import DocumentCompressorPipeline
9 | from langchain.retrievers.ensemble import EnsembleRetriever
10 | from langchain_openai import OpenAIEmbeddings # HuggingFaceEmbeddings
11 | from langchain.text_splitter import RecursiveCharacterTextSplitter
12 | from langchain_community.vectorstores import FAISS
13 | from langchain.retrievers.document_compressors.embeddings_filter import EmbeddingsFilter
14 | from langchain.retrievers import ContextualCompressionRetriever
15 | from langchain.schema import Document
16 | try:
17 | from langchain_community.retrievers import BM25Retriever
18 | except ImportError:
19 | BM25Retriever = None
20 |
21 |
22 | class LangchainCompressor:
23 |
24 | def __init__(self, device="cuda"):
25 | self.embeddings = OpenAIEmbeddings() # HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2", model_kwargs={"device": device})
26 | self.spaces_regex = re.compile(r" {3,}")
27 |
28 | def preprocess_text(self, text: str) -> str:
29 | text = text.replace("\n", " \n")
30 | text = self.spaces_regex.sub(" ", text)
31 | text = text.strip()
32 | return text
33 |
34 | async def faiss_embedding_query_urls(self, query: str, url_list: list[str], num_results: int = 5,
35 | similarity_threshold: float = 0.5, chunk_size: int = 500) -> list[Document]:
36 | html_url_tuples = []
37 |
38 | # Creating a list of tasks for each URL
39 | tasks = [download_html(url) for url in url_list]
40 |
41 | # Using asyncio.gather to run all tasks concurrently
42 | results = await asyncio.gather(*tasks, return_exceptions=True)
43 |
44 | # Processing results and exceptions
45 | for result, url in zip(results, url_list):
46 | if isinstance(result, Exception):
47 | print(f'LLM_Web_search | An exception occurred for {url}: {result}')
48 | else:
49 | html_url_tuples.append((result, url))
50 |
51 | if not html_url_tuples:
52 | return []
53 |
54 | documents = [html_to_plaintext_doc(html, url) for html, url in html_url_tuples]
55 |
56 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=10,
57 | separators=["\n\n", "\n", ".", ", ", " ", ""])
58 | split_docs = text_splitter.split_documents(documents)
59 | # filtered_docs = pipeline_compressor.compress_documents(documents, query)
60 | faiss_retriever = FAISS.from_documents(split_docs, self.embeddings).as_retriever(
61 | search_kwargs={"k": num_results}
62 | )
63 | if not BM25Retriever:
64 | raise ImportError("Could not import BM25Retriever. Please ensure that you have installed "
65 | "langchain==0.0.352")
66 |
67 | # This sparse retriever is good at finding relevant documents based on keywords,
68 | # while the dense retriever is good at finding relevant documents based on semantic similarity.
69 | bm25_retriever = BM25Retriever.from_documents(split_docs, preprocess_func=self.preprocess_text)
70 | bm25_retriever.k = num_results
71 |
72 | redundant_filter = EmbeddingsRedundantFilter(embeddings=self.embeddings)
73 | embeddings_filter = EmbeddingsFilter(embeddings=self.embeddings, k=None,
74 | similarity_threshold=similarity_threshold)
75 | pipeline_compressor = DocumentCompressorPipeline(
76 | transformers=[redundant_filter, embeddings_filter]
77 | )
78 |
79 | compression_retriever = ContextualCompressionRetriever(base_compressor=pipeline_compressor,
80 | base_retriever=faiss_retriever)
81 |
82 | ensemble_retriever = EnsembleRetriever(
83 | retrievers=[bm25_retriever, compression_retriever], weights=[0.4, 0.5]
84 | )
85 |
86 | compressed_docs = await ensemble_retriever.aget_relevant_documents(query)
87 |
88 | # Ensemble may return more than "num_results" results, so cut off excess ones
89 | return compressed_docs[:num_results]
90 |
91 |
92 | async def download_html(url: str) -> bytes:
93 | headers = {
94 | "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0",
95 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
96 | "Accept-Language": "en-US,en;q=0.5"
97 | }
98 |
99 | async with httpx.AsyncClient() as client:
100 | response = await client.get(url, headers=headers, timeout=8)
101 | response.raise_for_status()
102 |
103 | content_type = response.headers.get("Content-Type", "")
104 | if not content_type.startswith("text/html"):
105 | raise ValueError(f"Expected content type text/html. Got {content_type}.")
106 | return response.content
107 |
108 | def html_to_plaintext_doc(html_text: Union[str, bytes], url: str) -> Document:
109 | soup = BeautifulSoup(html_text, features="lxml")
110 | for script in soup(["script", "style"]):
111 | script.extract()
112 |
113 | strings = '\n'.join([s.strip() for s in soup.stripped_strings])
114 | webpage_document = Document(page_content=strings, metadata={"source": url})
115 | return webpage_document
116 |
--------------------------------------------------------------------------------
/bioimageio_chatbot/chatbot_extensions/vision_extension.py:
--------------------------------------------------------------------------------
1 | from bioimageio_chatbot.utils import ChatbotExtension
2 | from openai import AsyncOpenAI
3 | from schema_agents import schema_tool
4 | import base64
5 | from pydantic import Field, BaseModel
6 | from typing import Optional, List
7 | import httpx
8 | from PIL import Image
9 | from io import BytesIO
10 | import matplotlib.pyplot as plt
11 | # make sure matplotlib is operating headless (no GUI)
12 | plt.switch_backend("agg")
13 |
14 | # Function to encode the image
15 | def encode_image(image_path):
16 | with open(image_path, "rb") as image_file:
17 | return base64.b64encode(image_file.read()).decode('utf-8')
18 |
19 | async def aask(images, messages, max_tokens=1024):
20 | aclient = AsyncOpenAI()
21 | user_message = []
22 | # download the images and save it into a list of PIL image objects
23 | img_objs = []
24 | for image in images:
25 | async with httpx.AsyncClient() as client:
26 | response = await client.get(image.url)
27 | response.raise_for_status()
28 | try:
29 | img = Image.open(BytesIO(response.content))
30 | except Exception as e:
31 | raise ValueError(f"Failed to read image {image.title or ''} from {image.url}. Error: {e}")
32 | img_objs.append(img)
33 |
34 | if len(img_objs) == 1:
35 | # plot the image with matplotlib
36 | plt.imshow(img_objs[0])
37 | if images[0].title:
38 | plt.title(images[0].title)
39 | fig = plt.gcf()
40 | else:
41 | # plot them in subplots with matplotlib in a row
42 | fig, ax = plt.subplots(1, len(img_objs), figsize=(15, 5))
43 | for i, img in enumerate(img_objs):
44 | ax[i].imshow(img)
45 | if images[0].title:
46 | ax[i].set_title(images[i].title)
47 | # save the plot to a buffer as png format and convert to base64
48 | buffer = BytesIO()
49 | fig.tight_layout()
50 | # if the image size (width or height) is smaller than 512, use the original size and aspect ratio
51 | # otherwise set the maximun width of the image to n*512 pixels, where n is the number of images; the maximum total width is 1024 pixels
52 | fig_width = min(1024, len(img_objs)*512, fig.get_figwidth()*fig.dpi)
53 | # make sure the pixel size (not inches)
54 | fig.set_size_inches(fig_width/fig.dpi, fig.get_figheight(), forward=True)
55 |
56 | # save fig
57 | fig.savefig(buffer, format="png")
58 | buffer.seek(0)
59 | base64_image = base64.b64encode(buffer.read()).decode("utf-8")
60 | # append the image to the user message
61 | user_message.append({
62 | "type": "image_url",
63 | "image_url": {
64 | "url": f"data:image/png;base64,{base64_image}"
65 | }
66 | })
67 |
68 |
69 | for message in messages:
70 | assert isinstance(message, str), "Message must be a string."
71 | user_message.append({"type": "text", "text": message})
72 |
73 | response = await aclient.chat.completions.create(
74 | model="gpt-4o",
75 | messages=[
76 | {
77 | "role": "system",
78 | "content": "You are a helpful AI assistant that help user to inspect the provided images visually based on the context, make insightful comments and answer questions about the provided images."
79 | },
80 | {
81 | "role": "user",
82 | "content": user_message
83 | }
84 | ],
85 | max_tokens=max_tokens,
86 | )
87 | return response.choices[0].message.content
88 |
89 | class ImageInfo(BaseModel):
90 | """Image information."""
91 | url: str=Field(..., description="The URL of the image.")
92 | title: Optional[str]=Field(None, description="The title of the image.")
93 |
94 | @schema_tool
95 | async def inspect_tool(images: List[ImageInfo]=Field(..., description="A list of images to be inspected, each with a http url and title"), query: str=Field(..., description="user query about the image"), context_description: str=Field(..., description="describe the context for the visual inspection task")) -> str:
96 | """Inspect an image using GPT4-Vision."""
97 | # assert image_url.startswith("http"), "Image URL must start with http."
98 | for image in images:
99 | assert image.url.startswith("http"), "Image URL must start with http."
100 |
101 | response = await aask(images, [context_description, query])
102 | return response
103 |
104 | def get_extension():
105 | return ChatbotExtension(
106 | id="vision",
107 | name="Vision Inspector",
108 | description="Perform visual inspection on images using GPT4-Vision model, used for describing images and answer image related questions. The images will be plotted using matplotlib and then sent to the GPT4-Vision model for inspection.",
109 | tools=dict(
110 | inspect=inspect_tool
111 | )
112 | )
113 |
114 | if __name__ == "__main__":
115 | import asyncio
116 | async def main():
117 | extension = get_extension()
118 | print(await extension.tools["inspect"](images=[ImageInfo(url="https://bioimage.io/static/img/bioimage-io-icon.png", title="BioImage.io Icon"), ImageInfo(url="https://bioimage.io/static/img/bioimage-io-logo.png", title="BioImage.io Logo")], query="What are these?", context_description="Inspect the BioImage.io icon and logo."))
119 | # test only one image
120 | # print(await extension.tools["inspect"](images=[ImageInfo(url="https://bioimage.io/static/img/bioimage-io-icon.png", title="BioImage.io Icon")], query="What is this?", context_description="Inspect the BioImage.io icon."))
121 | # Run the async function
122 | asyncio.run(main())
--------------------------------------------------------------------------------
/bioimageio_chatbot/chatbot_extensions/bia_extension.py:
--------------------------------------------------------------------------------
1 | import httpx
2 | from pydantic import BaseModel, Field
3 | from typing import Dict, Any, Optional
4 | from bioimageio_chatbot.utils import ChatbotExtension
5 | from schema_agents import schema_tool
6 |
7 | class BioImageArchiveClient:
8 | def __init__(self):
9 | self._base_url = "https://www.ebi.ac.uk/biostudies/api/v1"
10 |
11 | async def search_bioimage_archive(self,
12 | query: str = Field(..., description="The search query string."),
13 | pageSize: int = Field(10, gt=0, description="Number of search results per page."),
14 | page: int = Field(1, description="Page number of the search results."),
15 | sortOrder: Optional[str] = Field("descending", description="Sort order: ascending or descending.")
16 | ) -> Dict[str, Any]:
17 | """Search the BioImage Archive for studies and image datasets, returning a list of studies. The link format to each study in the results is: https://www.ebi.ac.uk/biostudies/bioimages/studies/{accession}."""
18 | url = f"{self._base_url}/bioimages/search"
19 | params = {
20 | "query": query,
21 | "pageSize": pageSize,
22 | "page": page,
23 | "sortOrder": sortOrder
24 | }
25 | async with httpx.AsyncClient() as client:
26 | response = await client.get(url, params=params)
27 | response.raise_for_status()
28 | return self._simplify_search_results(response.json())
29 |
30 | def _simplify_search_results(self, results: Dict[str, Any]) -> Dict[str, Any]:
31 | simplified_results = {
32 | "hits": [
33 | {
34 | "title": hit["title"],
35 | "author": hit["author"],
36 | "content": hit["content"],
37 | "accession": hit["accession"]
38 | } for hit in results.get("hits", [])
39 | ],
40 | "totalHits": results.get("totalHits"),
41 | "page": results.get("page"),
42 | "pageSize": results.get("pageSize")
43 | }
44 | return simplified_results
45 |
46 | async def read_bioimage_archive_study(self, accession: str = Field(..., description="Accession number of the study.")) -> Dict[str, Any]:
47 | """Read detailed information about a specific study from the BioImage Archive, returning a simplified dictionary. The link format to the study is: https://www.ebi.ac.uk/biostudies/bioimages/studies/{accession}."""
48 | url = f"{self._base_url}/studies/{accession}"
49 | async with httpx.AsyncClient() as client:
50 | response = await client.get(url)
51 | response.raise_for_status()
52 | return self._simplify_study_details(response.json())
53 |
54 | def _simplify_study_details(self, study_details: Dict[str, Any]) -> Dict[str, Any]:
55 | # Initialize simplified details with placeholders for title and description
56 | simplified_details = {
57 | "title": "",
58 | "description": "",
59 | "accession": study_details.get("accno", ""),
60 | "link": f"https://www.ebi.ac.uk/biostudies/bioimages/studies/{study_details.get('accno', '')}",
61 | "authors": []
62 | }
63 |
64 | # Extract title and description from the attributes array by name
65 | for attribute in study_details.get("section", {}).get("attributes", []):
66 | if attribute.get("name") == "Title":
67 | simplified_details["title"] = attribute.get("value", "")
68 | elif attribute.get("name") == "Description":
69 | simplified_details["description"] = attribute.get("value", "")
70 |
71 | # Extracting author information
72 | author_subsections = [sub for sub in study_details.get("section", {}).get("subsections", []) if sub.get("type") == "Author"]
73 | for author in author_subsections:
74 | author_attributes = {attr["name"]: attr["value"] for attr in author.get("attributes", [])}
75 | simplified_details["authors"].append(author_attributes.get("Name", ""))
76 |
77 | return simplified_details
78 |
79 |
80 |
81 | def get_extension():
82 | bioimage_archive_client = BioImageArchiveClient()
83 | search_tool = schema_tool(bioimage_archive_client.search_bioimage_archive)
84 | read_tool = schema_tool(bioimage_archive_client.read_bioimage_archive_study)
85 |
86 | async def get_schema():
87 | return {
88 | "search": search_tool.input_model.schema(),
89 | "read": read_tool.input_model.schema(),
90 | }
91 |
92 | return ChatbotExtension(
93 | id="bioimage_archive",
94 | name="Search BioImage Archive",
95 | description="Search for biological images related studies in the BioImage Archive, it provide studies and image datasets related to microscopy images and other imaging modalities.",
96 | get_schema=get_schema, # This is optional, exists only for testing purposes
97 | tools=dict(
98 | search=search_tool,
99 | read=read_tool
100 | )
101 | )
102 |
103 | if __name__ == "__main__":
104 | import asyncio
105 | async def main():
106 | bioimage_archive_client = BioImageArchiveClient()
107 | # Example to search in BioImage Archive with simplified results
108 | search_results = await bioimage_archive_client.search_bioimage_archive(query="cells", pageSize=1)
109 | print(search_results)
110 |
111 | # Example to read a specific study from BioImage Archive with simplified details
112 | study_details = await bioimage_archive_client.read_bioimage_archive_study(accession="S-BSST314")
113 | print(study_details)
114 |
115 | # Run the async function
116 | asyncio.run(main())
117 |
--------------------------------------------------------------------------------
/docs/technical-overview.md:
--------------------------------------------------------------------------------
1 | # Design and Functionality of BioImage.IO Chatbot: A User Guide and Technical Overview
2 |
3 | ## Chatbot Interface
4 |
5 | After following the installation guidelines from the [README](/README.md), the chat interface will resemble Figure 1.
6 |
7 | 
8 | *Figure 1. The chat interface of the BioImage.IO Chatbot.*
9 |
10 | Users can input their profiles as depicted in Figure 2.
11 | 
12 | *Figure 2. Users can personalize responses by clicking `Edit Profile` and save their settings for future conversations by clicking `Save`.*
13 |
14 | As of today, our chatbot integrates 6 extensions including document search in bioimage.io knowledge base, tools search on Bioimage Informatics Index (biii.eu), bioimage topics search in Bioimage Archive and Image.cs Forum, web search, and information search in Bioimage Model Zoo. The document search utilizes knowledge bases from the following pivotal communities: bioimage.io [2], Imjoy [3], deepimageJ [4], ImageJ [5], bio.tools [6], and scikit-image [7]. We also allow users to specify a preferred extension for information retrieval, as shown in Figure 3. If an extension is designated, the chatbot sources information using the specific extension and its corresponding source. Otherwise, it uses an intelligent selection process driven by a schema-based agent to choose the most relevant extension based on the user's query.
15 |
16 | 
17 | *Figure 3. Users can personalize the conversation by selecting a specific channel from the ‘Knowledge Base Channel’.*
18 |
19 | ### Building the Knowledge Base
20 |
21 | The knowledge base is efficiently and collaboratively constructed by downloading documentation from given URLs. These can be repositories, PDFs, or other forms of documentation. We use a regular expression splitter to segment the documentation into manageable chunks for efficient and accurate retrieval. These chunks are then embedded and stored as vectors in a FAISS [1]-based vector database.
22 |
23 | ## Schema-Based Agent Design
24 |
25 | The chatbot's ability to understand and respond to user queries is substantially improved by employing a schema-based agent design. Unlike traditional context-based models, our approach utilizes predefined schemas to guide the conversation and information retrieval process.
26 |
27 | The schema-based agent operates on the function-call LLM [8], and uses input and output schemas to generate text output. Within this implementation, we construct a customer service chatbot by defining a role class, as shown in Figure 4.
28 |
29 | 
30 | *Figure 4. Creation of a chatbot role class named ‘CustomerServiceRole’ by defining fields of the role class.*
31 |
32 | ## Extensions
33 | The BioImage.IO Chatbot employs diverse methods to generate responses, currently encompassing five distinct response modes. The response mode is chosen by the schema-based agent based on the user's query and the selected channel.
34 |
35 | ### Search BioImage Docs
36 | This extension allows the chatbot to search information in a community-driven bioimage related knowledge base. With a specific query, the chatbot extracts essential elements from the user's question to fetch information from the relevant documentation.
37 | 
38 | *Figure 6. Search in Bioimage Knolwedge base documentation.*
39 |
40 | ### Search BioImage Information Index (biii.eu)
41 | This extension allows the chatbot to search online software tool in biii.eu.
42 | 
43 | *Figure 7. Search in biii.eu.*
44 |
45 | The process begins with an initial response based on the user's query (`request`), which serves as a foundation for generating a new `query` for targeted information retrieval. This is combined with user profile data (`user_info`) and the query to produce a comprehensive final response.
46 |
47 | ### Search Bioimage Archive
48 | This extension allows the chatbot to search for dataset index in bioimage archive.
49 | 
50 | *Figure 8. Search in bioimage archive.*
51 |
52 | ### Search image.sc Forum
53 | This extension allows the chatbot to search bioimage related topics and software issues in the image.sc forum.
54 | 
55 | *Figure 9. Search in image.sc forum.*
56 |
57 | ### Search Web
58 | This extension allows the chatbot to search for information from the web. This extension is triggered while the chatbot realizes it can not find relevant information from the knowledge base.
59 | 
60 | *Figure 10. Search in the web.*
61 |
62 |
63 | ### BioImage Model Zoo
64 | This mode is designed for queries requiring detailed model information or specific actions, generating and executing Python scripts for tailored solutions.
65 | 
66 | *Figure 11. Scripting retrieval for complex queries.*
67 |
68 | It involves creating a `ModelZooInfoScript` schema with fields like `request`, `user info`, and `script`, where `script` is Python code for API interactions or data manipulation. The final response is formulated by integrating the script's output with the `request` and `user info`.
69 |
70 | ## References
71 |
72 | 1. [FAISS](https://engineering.fb.com/2017/03/29/data-infrastructure/faiss-a-library-for-efficient-similarity-search/)
73 | 2. [Bioimage.io](https://bioimage.io/docs/#/)
74 | 3. [Imjoy](https://imjoy.io/docs/#/)
75 | 4. [DeepImageJ](https://deepimagej.github.io/)
76 | 5. [ImageJ](https://imagej.net)
77 | 6. [bio.tools](https://bio.tools)
78 | 7. [scikit-image](https://scikit-image.org/docs/stable/)
79 | 8. [Function-Calling API](https://openai.com/blog/function-calling-and-other-api-updates)
80 | 9. [CellPose](https://www.cellpose.org)
81 |
--------------------------------------------------------------------------------
/tests/test_chatbot.py:
--------------------------------------------------------------------------------
1 | import os
2 | from bioimageio_chatbot.chatbot import create_assistants, get_builtin_extensions, QuestionWithHistory, UserProfile
3 | from schema_agents.schema import Message
4 | import pytest
5 |
6 | KNOWLEDGE_BASE_PATH = "./bioimageio-knowledge-base"
7 |
8 | @pytest.fixture
9 | def builtin_extensions():
10 | return get_builtin_extensions()
11 |
12 | @pytest.fixture
13 | def melman(builtin_extensions):
14 | assistants = create_assistants(builtin_extensions)
15 | # find an assistant name Melman
16 | m = [assistant for assistant in assistants if assistant['name'] == "Melman"][0]
17 | return m['agent']
18 |
19 | @pytest.mark.asyncio
20 | async def test_chatbot(builtin_extensions, melman):
21 | select_extensions = [
22 | {"id": "bioimage_archive"}
23 | ]
24 | chat_history=[]
25 | question = "Which tool can I use to analyse western blot image?"
26 | profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI")
27 | m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions)
28 | resp = await melman.handle(Message(content="", data=m , role="User"))
29 | assert resp
30 | str_resp = [str(element) for element in resp]
31 | assert any(["BioimageArchiveSearch" in element for element in str_resp])
32 |
33 | question = "Which tool can I use to segment an cell image?"
34 | profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI")
35 | m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions)
36 | resp = await melman.handle(Message(content="", data=m , role="User"))
37 | assert resp
38 | str_resp = [str(element) for element in resp]
39 | assert any(["BioimageArchiveSearch" in element for element in str_resp])
40 |
41 | question = "How can I test the models?"
42 | profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI")
43 | m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions)
44 | resp = await melman.handle(Message(content="", data=m , role="User"))
45 | assert resp
46 | str_resp = [str(element) for element in resp]
47 | assert any(["BioimageArchiveSearch" in element for element in str_resp])
48 |
49 | question = "What are Model Contribution Guidelines?"
50 | m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions)
51 | resp = await melman.handle(Message(content="", data=m , role="User"))
52 | assert resp
53 | str_resp = [str(element) for element in resp]
54 | assert any(["BioimageArchiveSearch" in element for element in str_resp])
55 |
56 |
57 | # test biii extension
58 | select_extensions = [
59 | {"id": "biii"}
60 | ]
61 | question = "What bioimage analysis tools are available for quantifying cell migration?"
62 | profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI")
63 | m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions)
64 | resp = await melman.handle(Message(content="", data=m , role="User"))
65 | assert resp
66 | str_resp = [str(element) for element in resp]
67 | assert any(["BiiiSearch" in element for element in str_resp])
68 |
69 | question = "Are there any workflows on biii.eu for 3D reconstruction of neuronal networks from electron microscopy images?"
70 | profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI")
71 | m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions)
72 | resp = await melman.handle(Message(content="", data=m , role="User"))
73 | assert resp
74 | str_resp = [str(element) for element in resp]
75 | assert any(["BiiiSearch" in element for element in str_resp])
76 |
77 |
78 | # test image_sc extension
79 | select_extensions = [
80 | {"id": "image_sc_forum"}
81 | ]
82 | question = "I got a problem, StarDist stops working! help me find it in image.sc forum."
83 | profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI")
84 | m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions)
85 | resp = await melman.handle(Message(content="", data=m , role="User"))
86 | assert resp
87 | # make resp a string
88 | resp = [str(element) for element in resp]
89 | assert any(["ImageScForumSearch" in element for element in resp])
90 | assert any(['''posts":''' in element for element in resp])
91 |
92 |
93 | # test web extension
94 | select_extensions = [
95 | {"id": "web"}
96 | ]
97 | question = "I want to know more about the BioImage Archive"
98 | profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI")
99 | m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions)
100 | resp = await melman.handle(Message(content="", data=m , role="User"))
101 | assert resp
102 | str_resp = [str(element) for element in resp]
103 | assert any(["WebSearch" in element for element in str_resp])
104 | assert any(['''"content": ''' in element for element in str_resp])
105 |
--------------------------------------------------------------------------------
/docs/figure-2-use-cases.md:
--------------------------------------------------------------------------------
1 | # Reproducing Example Usage Scenarios of the BioImage.IO Chatbot Figure 2
2 |
3 | This section provides detailed instructions for reproducing the example usage scenarios of the BioImage.IO Chatbot illustrated in Figure 2 of the main text:
4 |
5 |
6 |
7 |
8 | These steps will guide users through querying documents, utilizing online services, executing AI models, and developing extensions.
9 |
10 | ## Access the BioImage.IO Chatbot Interface
11 | Launch the chatbot through the BioImage.IO website [here](https://bioimage.io/chat/) or use the dedicated user interface.
12 |
13 | ## Video for Reproducing the Scenarios
14 | * **[A video showcasing information retrieval (as described in senario a-c)](https://zenodo.org/records/10967840/files/Supplementary-Video-1-bioimageio-chatbot-information-retrieval.mp4?download=1)**
15 | * **[A video showcasing AI model execution (as described in senario d)](https://zenodo.org/records/10967840/files/Supplementary-Video-2-bioimageio-chatbot-ai-image-analysis.mp4?download=1)**
16 |
17 |
18 | ### Scenario (a): Querying Bioimage Analysis Documentation
19 |
20 | - **Initiate a Query**: Type a question related to bioimage analysis, e.g., "What are the best practices for optimizing model performance on bioimage.io?"
21 | - **Review the Chatbot's Response**: The chatbot will provide an answer that includes information extracted from the BioImage Model Zoo documentation.
22 |
23 | ### Scenario (b): Exploring the Human Protein Atlas
24 |
25 | - **Initiate a Query**: Ask the chatbot to find protein information in the Human Protein Atlas by typing "Tell me about PML protein and show me the cell images"
26 | - **Interpret the Results**: The chatbot will respond by constructing an API call to the Protein Atlas database and displaying the relevant information about the PML protein, including cell images.
27 |
28 | ### Scenario (c): Querying the BioImage Archive
29 |
30 | - **Initiate a Query**: Ask the chatbot to find cell images at the G1 phase by typing "Please, find datasets of cell images at G1 phase."
31 | - **Interpret the Results**: The chatbot will initiate an API call to the BioImage Archive server, and return results such as a study titled "DeepCycle: Deep learning reconstruction of the closed cell cycle trajectory from single-cell unsegmented microscopy images."
32 |
33 | ### Scenario (d): Running AI Models for Image Analysis
34 |
35 | - **Prereqsitues**: Ensure you have Chrome or a Chromium-based browser installed on your computer.
36 | - **Download Image Data**: Begin by creating a new folder on your computer named `test-images`. Download the image data file from [this link](https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/main/docs/example-data/nuclei.tif) and save it into the `test-images` folder.
37 | - **Initiate Bioimage Analyst**: Navigate to the BioImage.IO chatbot interface at https://bioimage.io/chat/. Note that only Chrome or Chromium-based browser is supported at the moment. Select "Bioimage Analyst(Bridget)" located in the upper right corner of the chatbot interface.
38 | - **Mount your Data Folder**: Within the chat interface, click on the "Mount Files" button located below the dialog window. This action will allow you to mount the test-image folder that contains your downloaded image data. The chatbot will confirm the successful mounting of the folder, you can now ask it to list the files contained within, and ensuring that your data is ready for analysis.
39 | - **Perform segmentation using Cellpose model**: Type "Segment the image `/mnt/nuclei.tif` using Cellpose" to run the Cellpose model on the image data. Upon successful execution of the model, the chatbot will notify you that the segmentation process is complete and will display the analyzed results. Optionally, you can ask it to "count the number of nuclei in the image" if successfully segmented, "plot the size distribution of nuclei", or you can tell it to "use the visual inspection tool to analyze the figure and create a report about the size distribution".
40 |
41 | ### Scenario (e): Developing New Extensions
42 |
43 | Follow the steps below to develop a new extension for microscope stage control and image capture. For a detailed tutorial, visit our [GitHub repository](https://github.com/bioimage-io/bioimageio-chatbot/blob/main/docs/bioimage-chatbot-extension-tutorial.ipynb) or access the Jupyter Notebook directly through ImJoy [here](https://imjoy-notebook.netlify.app/lab/index.html?load=https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/main/docs/bioimage-chatbot-extension-tutorial.ipynb&open=1).
44 |
45 | ### Scenario (f): Controlling a Microscope Stage and Capturing Images
46 |
47 | - **Pre-requisites**: You will need a microscope and the squid control software
48 |
49 | - **Create microscope extension**: Following the example in the above [chatbot extension example notebook](https://imjoy-notebook.netlify.app/lab/index.html?load=https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/main/docs/bioimage-chatbot-extension-tutorial.ipynb&open=1), create a hypha service extension for controlling the microscope:
50 | 1. **Setup the Developer Environment**: Open a Jupyter Notebook. Install and import the `imjoy_rpc`, `hypha_rpc` and `pydantic` packages.
51 | 2. **Define Input Schemas**: Create classes for `MoveStageInput` and `SnapImageInput` to structure the user input. (Note: To help the chatbot understand the "center", you will need to tell the chatbot about the boundaries of the stage via the docstring of the `MoveStageInput` class)
52 | 3. **Implement Control Functions**: Write asynchronous functions `move_stage` and `snap_image`.
53 | 4. **Setup Extension Interface**: Develop the extension interface and define a schema getter function.
54 | 5. **Register the Extension**: Register the extension as hypha server and connect to the the chatbot.
55 | - **Initiate a Query**: Ask the chatbot to "Please move to the center and snap an image".
56 | - **Interpret the Results**: The chatbot will execute the `move_stage` function to move the microscope stage to the center and then capture an image using the `snap_image` function. The chatbot will confirm the successful completion of the tasks.
57 |
--------------------------------------------------------------------------------
/bioimageio_chatbot/chatbot_extensions/hpa_extension.py:
--------------------------------------------------------------------------------
1 | from bioimageio_chatbot.utils import ChatbotExtension
2 | from schema_agents import schema_tool
3 | from pydantic import Field, BaseModel
4 | from typing import Optional, List, Dict, Any
5 | import pandas as pd
6 | from pathlib import Path
7 | import requests
8 | import re
9 | import os
10 | from bioimageio_chatbot.utils import download_file
11 |
12 | class HPAClient:
13 | def __init__(self):
14 | self._base_url = 'https://www.proteinatlas.org/download/proteinatlas.tsv.zip'
15 | folder = Path('./data')
16 | file_path = os.path.join(folder, 'proteinatlas.tsv.zip')
17 | # firstly check if the data is already downloaded in the /data folder
18 | if not os.path.exists(file_path):
19 | os.makedirs(folder, exist_ok=True)
20 | # download the data
21 | download_file(self._base_url, file_path)
22 | # Load and preprocess data at startup
23 | self.data = pd.read_csv(file_path, delimiter='\t')
24 | # Convert all textual data to lowercase strings for faster case-insensitive searching
25 | self.preprocessed_data = self.data.apply(lambda x: x.astype(str).str.lower())
26 |
27 | async def search_hpa(self,
28 | query: str = Field(..., description="Enter gene names, functions, or disease terms to search in the Human Protein Atlas."),
29 | limitSize: int = Field(10, gt=0, description="Number of returned items per search.")
30 | ) -> Dict[str, Any]:
31 | """Search the Human Protein Atlas for proteins based on a query string, return the top search results."""
32 | query = query.lower()
33 |
34 | # Search for the query in the preprocessed data
35 | query_results = self.preprocessed_data.apply(lambda x: x.str.contains(query)).sum(axis=1)
36 | query_results = query_results.sort_values(ascending=False)
37 | query_results = query_results.head(limitSize)
38 |
39 | selected_columns = ['Gene', 'Gene synonym', 'Ensembl',
40 | 'Gene description', 'Subcellular location', 'Subcellular main location', 'Subcellular additional location',
41 | 'Biological process', 'Molecular function', 'Uniprot', 'Antibody',
42 | 'Disease involvement', 'Secretome function', 'CCD Protein', 'CCD Transcript',
43 | 'Evidence', 'Protein class']
44 |
45 | info_list = []
46 | for index in query_results.index:
47 | items = self.data.loc[index, selected_columns]
48 | info_list.append(items.to_dict())
49 | return info_list
50 |
51 | async def read_protein_info(self,
52 | ensembl: str = Field(..., description="Ensembl ID of the protein.")
53 | )-> Dict[str, Any]:
54 | """Get detailed information about a protein from the Human Protein Atlas."""
55 | json_link = f"https://www.proteinatlas.org/{ensembl}.json"
56 | response = requests.get(json_link)
57 | # check if the request was successful
58 | response.raise_for_status()
59 | # return the content
60 | return response.json()
61 |
62 |
63 |
64 | async def get_cell_image(self,
65 | gene: str = Field(..., description="Gene name of the protein."),
66 | ensembl: str = Field(..., description="Ensembl ID of the protein."),
67 | section: str = Field("subcellular", description="Section of the Human Protein Atlas to search for the protein. Valid options are 'subcellular', 'tissue',")
68 | ) -> List[str]:
69 | """Retrieve a list of cell image links from the Human Protein Atlas, where a specific protein is tagged in the green channel.
70 | ALWAYS render the result thumbnail images as a horizatal table and create link (format: `[](http://....jpg)`) to the full-size image without the '_thumb' suffix."""
71 | link_name = f"{ensembl}-{gene}"
72 | http_link = f"https://www.proteinatlas.org/{link_name}/{section}"
73 | # read the source code of the page
74 | response = requests.get(http_link)
75 | if '
Not available
' in response.text:
76 | return 'No cell image available.'
77 | # Search for image links, capturing the part after 'src="'
78 | pattern = r'src="(?P//images\.proteinatlas\.org/.*?_red_green_thumb\.jpg)"'
79 | image_links = re.findall(pattern, response.text)
80 | # replace the 'red_green' with 'blue_red_green_yellow' if 'blue' not in the link, otherwise replace 'blue_red_green' with 'blue_red_green_yellow'
81 | image_links = [link.replace('red_green', 'blue_red_green_yellow') if 'blue' not in link else link.replace('blue_red_green', 'blue_red_green_yellow') for link in image_links]
82 | # Remove '_thumb' from each link and print or process them
83 | final_image_links = []
84 | for link in image_links:
85 | final_image_links.append(f"https:{link}")
86 | return final_image_links
87 |
88 |
89 | def get_extension():
90 | hpa_client = HPAClient()
91 | search_tool = schema_tool(hpa_client.search_hpa)
92 | read_tool = schema_tool(hpa_client.read_protein_info)
93 | get_cell_image_tool = schema_tool(hpa_client.get_cell_image)
94 |
95 | return ChatbotExtension(
96 | id="hpa",
97 | name="Human Protein Atlas",
98 | description="Search the Human Protein Atlas to find human protein-related information, including gene expressions, functions, locations, disease associations, and cell images etc. When searching for cell images, always search for the gene name and Ensembl ID of the protein.",
99 | tools=dict(
100 | search=search_tool,
101 | read=read_tool,
102 | get_cell_image=get_cell_image_tool
103 | )
104 | )
105 |
106 | if __name__ == "__main__":
107 | import asyncio
108 | async def main():
109 | extension = get_extension()
110 | query = "brain"
111 | limitSize = 2
112 | print(await extension.tools["search"](query=query, limitSize=limitSize))
113 | # test only one image
114 | # print(await extension.tools["inspect"](images=[ImageInfo(url="https://bioimage.io/static/img/bioimage-io-icon.png", title="BioImage.io Icon")], query="What is this?", context_description="Inspect the BioImage.io icon."))
115 | # Run the async function
116 | asyncio.run(main())
117 |
--------------------------------------------------------------------------------
/bioimageio_chatbot/chatbot_extensions/biii_extension.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import requests
3 | from bs4 import BeautifulSoup
4 | import pandas as pd
5 | from typing import List, Optional
6 | from pydantic import BaseModel, Field
7 | from bioimageio_chatbot.utils import ChatbotExtension
8 | from schema_agents import schema_tool
9 |
10 | class BiiiQuery(BaseModel):
11 | """Queries parameters for biii.eu search"""
12 |
13 | queries: List[str] = Field(description="A list of keywords to search for")
14 |
15 |
16 | class BiiiRow(BaseModel):
17 | """Search result row from biii.eu"""
18 |
19 | name: str = Field(description="Name")
20 | relevance: str = Field(description="Relevance score")
21 | image_dimension: Optional[str] = Field(
22 | None, description="Supported image dimension"
23 | )
24 | requires: Optional[str] = Field(description="Dependent software")
25 | excerpt: str = Field(description="Description")
26 |
27 |
28 | def extract_table_with_links(table, base_url) -> pd.DataFrame:
29 | """
30 | Extracts a table from HTML and includes hyperlinks in the cells if available.
31 |
32 | Args:
33 | table (bs4.element.Tag): A BeautifulSoup Tag object representing a table.
34 |
35 | Returns:
36 | pd.DataFrame: A DataFrame representation of the table with text and hyperlinks.
37 | """
38 | rows = table.find_all("tr")
39 | data = []
40 |
41 | for index, row in enumerate(rows):
42 | columns = row.find_all(["td", "th"])
43 | row_data = []
44 |
45 | for column in columns:
46 | cell_text = column.get_text(strip=True)
47 |
48 | # Check for a hyperlink in the cell
49 | link = column.find("a", href=True)
50 | if index != 0 and link and cell_text:
51 | cell_text += f"({link['href'] if link['href'].startswith('http') else base_url + link['href']})"
52 |
53 | row_data.append(cell_text)
54 |
55 | data.append(row_data)
56 |
57 | if data:
58 | columns = data[0]
59 | columns[0] = "Name"
60 | if columns[3] == "Supported Image Dimension":
61 | columns[2] = "Logo"
62 | df = pd.DataFrame(data[1:], columns=columns) if data and columns else pd.DataFrame()
63 | # remove column named "Content type" if exists
64 | if "Content type" in df.columns:
65 | df = df.drop(columns=["Content type"])
66 |
67 | # convert to list of BiiiRow
68 | df = df.to_dict(orient="records")
69 | return [
70 | BiiiRow(
71 | name=row["Name"],
72 | relevance=row["Relevance"],
73 | image_dimension=row.get("Supported Image Dimension"),
74 | requires=row.get("Requires"),
75 | excerpt=row["Excerpt"],
76 | )
77 | for row in df
78 | ]
79 |
80 |
81 | def search_biii_with_links(
82 | queries: List[str], content_type="software", base_url="https://biii.eu"
83 | ) -> dict:
84 | """
85 | Modified search function to include hyperlinks in the extracted tables.
86 |
87 | Args:
88 | queries (List[str]): A list of search queries.
89 |
90 | Returns:
91 | dict: A dictionary where each key is a "Content type" and value is a pandas dataframe of the table with links.
92 | """
93 | search_base_url = "https://biii.eu/search?search_api_fulltext="
94 |
95 | for query in queries:
96 | url = search_base_url + ",".join(query.split())
97 | response = requests.get(url)
98 | soup = BeautifulSoup(response.text, "html.parser")
99 | tables = soup.find_all("table")
100 |
101 | for table in tables:
102 | caption = table.find("caption")
103 | if caption:
104 | caption = (
105 | caption.get_text().strip().replace("Content type: ", "").lower()
106 | )
107 | else:
108 | continue # Skip tables without a caption
109 |
110 | if caption != content_type:
111 | continue
112 |
113 | df = extract_table_with_links(table, base_url)
114 | return df
115 |
116 |
117 | class BiiiSearchResult(BaseModel):
118 | """Search results from biii.eu"""
119 | results: List[BiiiRow] = Field(description="Search results from biii.eu")
120 | base_url: str = Field(
121 | description="The based URL of the search results, e.g. ImageJ (/imagej) will become /imagej"
122 | )
123 |
124 |
125 | class BiiiResponse(BaseModel):
126 | """Summarize the search results from biii.eu"""
127 |
128 | response: str = Field(
129 | description="The answer to the user's question based on the search results. Can be either a detailed response in markdown format if the search results are relevant to the user's question or 'I don't know'. It should resolve relative URLs in the search results using the base_url."
130 | )
131 |
132 | @schema_tool
133 | async def search_biii(
134 | keywords: List[str] = Field(
135 | description="A list of search keywords, no space allowed in each keyword."
136 | ),
137 | top_k: int = Field(
138 | 10,
139 | description="The maximum number of search results to return. Should use a small number to avoid overwhelming the user.",
140 | )):
141 | """Search software tools on BioImage Informatics Index (biii.eu) is a platform for sharing bioimage analysis software and tools."""
142 | # limit top_k from 1 to 15
143 | top_k = max(1, min(top_k, 15))
144 | print(f"Searching biii.eu with keywords: {keywords}, top_k: {top_k}")
145 | loop = asyncio.get_running_loop()
146 | # steps.append(ResponseStep(name="Search on biii.eu", details=dict()))
147 | results = await loop.run_in_executor(
148 | None, search_biii_with_links, keywords, "software", ""
149 | )
150 | if results:
151 | results = BiiiSearchResult(
152 | results=results[: top_k],
153 | base_url="https://biii.eu",
154 | )
155 | return results
156 | else:
157 | return f"Sorry I didn't find relevant information in biii.eu about {keywords}"
158 |
159 | def get_extension():
160 | return ChatbotExtension(
161 | id="biii",
162 | name="Search BioImage Informatics Index (biii.eu)",
163 | description="Search software tools on BioImage Informatics Index (biii.eu) is a platform for sharing bioimage analysis software and tools. Provide a list of keywords to search for software tools on biii.eu. Returns a list of relevant documents.",
164 | tools=dict(search=search_biii),
165 | )
166 |
167 |
168 | if __name__ == "__main__":
169 | results = search_biii_with_links(["image segmentation"])
170 | # Index(['Name', 'Relevance', 'Logo', 'Supported Image Dimension', 'requires',
171 | # 'Content type', 'Excerpt'],
172 | # dtype='object')
173 | print(results)
174 |
--------------------------------------------------------------------------------
/bioimageio_chatbot/chatbot_extensions/image_sc_extension.py:
--------------------------------------------------------------------------------
1 | import httpx
2 | import os
3 | import os
4 | import urllib.parse
5 | import asyncio
6 | import html2text
7 | import logging
8 | from pydantic import Field
9 | from bioimageio_chatbot.utils import ChatbotExtension
10 | from typing import List, Dict, Any, Optional
11 | from schema_agents import schema_tool
12 |
13 | logger = logging.getLogger(__name__)
14 |
15 | class DiscourseClient:
16 | def __init__(self, base_url: str, username: str, api_key: str):
17 | self._base_url = base_url
18 | self._username = username
19 | self._api_key = api_key
20 |
21 | def _build_query_string(self, query: str, order: str, status: str) -> str:
22 | # Construct the query string with the provided parameters.
23 | # Note: `urllib.parse.quote` is used to ensure the query is URL encoded.
24 | query_components = [
25 | f"{query}",
26 | f"order:{order}",
27 | ]
28 | if status:
29 | query_components.append(f"status:{status}")
30 | return "q=" + urllib.parse.quote(" ".join(query_components))
31 |
32 | def _get_headers(self) -> Dict[str, str]:
33 | return {
34 | "Content-Type": "application/json",
35 | "Api-Username": self._username,
36 | "Api-Key": self._api_key,
37 | }
38 |
39 | def _cleanup_search_results(self, results: Dict[str, Any], top_k: int=10) -> Dict[str, Any]:
40 | cleaned_results = {
41 | "posts": [
42 | {"id": post["id"], "topic_id": post["topic_id"], "blurb": post["blurb"]}
43 | for post in results.get("posts", [])
44 | if "id" in post and "topic_id" in post and "blurb" in post
45 | ],
46 | "topics": [
47 | {"title": topic["title"], "slug": topic["slug"]}
48 | for topic in results.get("topics", [])
49 | if "title" in topic and "slug" in topic
50 | ]
51 | }
52 | cleaned_results["posts"] = cleaned_results["posts"][:top_k]
53 | cleaned_results["topics"] = cleaned_results["topics"][:top_k]
54 | return cleaned_results
55 |
56 | async def search_image_sc(self, query: str = Field(..., description="The search query string."),
57 | top_k: int = Field(..., gt=0, description="Maximum number of search results to return."),
58 | order: Optional[str] = Field("latest", description="Order of the search results, options: latest, likes, views, latest_topic."),
59 | status: Optional[str] = Field(None, description="The status filter for the search results, options: solved, unsolved, open, closed."),
60 | ):
61 | """Search the Image.sc Forum(a forum for scientific image software) for posts and topics."""
62 | # Prepare headers for authentication
63 | headers = self._get_headers()
64 |
65 | # Build the query string
66 | query_string = self._build_query_string(query, order, status)
67 |
68 | # Construct the full URL
69 | url = f"{self._base_url}/search.json?{query_string}"
70 | logger.info(f"Searching Image.sc Forum for: {query}")
71 |
72 | # Perform the asynchronous HTTP GET request
73 | async with httpx.AsyncClient() as client:
74 | response = await client.get(url, headers=headers)
75 |
76 | # Check if the request was successful
77 | if response.status_code == 200:
78 | return self._cleanup_search_results(response.json(), top_k) # Return the JSON response
79 | else:
80 | response.raise_for_status() # Raise an error for bad responses
81 |
82 | async def read_image_sc_posts(self,
83 | type: str = Field(..., description="type: `post` or `topic`"),
84 | id: int = Field(..., description="topic id")
85 | ):
86 | """Read a single or all the posts in a topic from the Image.sc Forum (a discussion forum for scientific image software)."""
87 | if type == "post":
88 | return await self.get_post_content(id)
89 | elif type == "topic":
90 | return await self.get_topic_content(id)
91 |
92 | async def get_topic_content(self, topic_id: int) -> Dict[str, Any]:
93 | url = f"{self._base_url}/t/{topic_id}.json"
94 | headers = self._get_headers()
95 | async with httpx.AsyncClient() as client:
96 | response = await client.get(url, headers=headers)
97 | response.raise_for_status()
98 | topic_data = response.json()
99 |
100 | post_ids = [post['id'] for post in topic_data['post_stream']['posts']]
101 | messages = await asyncio.gather(*[self.get_post_content(post_id) for post_id in post_ids])
102 | posts = []
103 | for msg in messages:
104 | posts.append(f"{msg['username']}: {html2text.html2text(msg['content'])}")
105 | return {"posts": posts, "url": f"{self._base_url}/t/{topic_data['slug']}"}
106 |
107 | async def get_post_content(self, post_id: int) -> str:
108 | url = f"{self._base_url}/posts/{post_id}.json"
109 | headers = self._get_headers()
110 | async with httpx.AsyncClient() as client:
111 | response = await client.get(url, headers=headers)
112 | response.raise_for_status()
113 | post_data = response.json()
114 | return {"username": post_data["username"], "content": post_data["cooked"], "url": f"{self._base_url}/t/{post_data['topic_slug']}"}
115 |
116 | def get_extension():
117 | username = os.environ.get("DISCOURSE_USERNAME")
118 | api_key = os.environ.get("DISCOURSE_API_KEY")
119 | if not username or not api_key:
120 | print("WARNING: Image.sc Forum extensions require DISCOURSE_USERNAME and DISCOURSE_API_KEY environment variables to be set, disabling it for now.")
121 | return None
122 |
123 | discourse_client = DiscourseClient(base_url="https://forum.image.sc/", username=username, api_key=api_key)
124 | return ChatbotExtension(
125 | id="image_sc_forum",
126 | name="Search image.sc Forum",
127 | description="Search the Image.sc Forum for posts and topics. Provide a search query to search the Image.sc Forum for posts or post, and read a specific topic",
128 | tools=dict(
129 | search=schema_tool(discourse_client.search_image_sc),
130 | read=schema_tool(discourse_client.read_image_sc_posts)
131 | )
132 | )
133 |
134 | if __name__ == "__main__":
135 | import json
136 | async def main():
137 | discourse_client = DiscourseClient(base_url="https://forum.image.sc", username="oeway", api_key="1b8819f9f95bc7f4eb51d3f9bac6d4dd0245569314a7801f670c1067d06c8268")
138 | results = await discourse_client.search_image_sc("python", 5, "latest")
139 | print(json.dumps(results))
140 | results = await discourse_client.read_image_sc_posts('topic', 44826)
141 | print(results)
142 |
143 | # Run the async function
144 | asyncio.run(main())
145 |
--------------------------------------------------------------------------------
/bioimageio_chatbot/jsonschema_pydantic.py:
--------------------------------------------------------------------------------
1 | """Jsonschema to pydantic schema from https://github.com/c32168/dyntamic"""
2 | from typing import Annotated, Union, Any, Optional
3 |
4 | import typing
5 | from pydantic import create_model
6 | from pydantic.fields import Field, PydanticUndefined
7 |
8 | Model = typing.TypeVar('Model', bound='BaseModel')
9 |
10 |
11 | class DyntamicFactory:
12 |
13 | TYPES = {
14 | 'string': str,
15 | 'array': list,
16 | 'boolean': bool,
17 | 'integer': int,
18 | 'float': float,
19 | 'number': float,
20 | 'null': None,
21 | }
22 |
23 | def __init__(self,
24 | json_schema: dict,
25 | base_model: Union[type[Model], tuple[type[Model], ...], None] = None,
26 | ref_template: str = "definitions"
27 | ) -> None:
28 | """
29 | Creates a dynamic pydantic model from a JSONSchema, dumped from and existing Pydantic model elsewhere.
30 | JSONSchema dump must be called with ref_template='{model}' like:
31 |
32 | SomeSampleModel.model_json_schema(ref_template='{model}')
33 | Use:
34 | >> _factory = DyntamicFactory(schema)
35 | >> _factory.make()
36 | >> _model = create_model(_factory.class_name, **_factory.model_fields)
37 | >> _instance = dynamic_model.model_validate(json_with_data)
38 | >> validated_data = model_instance.model_dump()
39 | """
40 | self.class_name = json_schema.get('title')
41 | self.description = json_schema.get('description')
42 | self.class_type = json_schema.get('type')
43 | self.required = json_schema.get('required', [])
44 | self.default = json_schema.get('default')
45 | self.raw_fields = json_schema.get('properties')
46 | self.ref_template = ref_template
47 | self.definitions = json_schema.get(ref_template)
48 | self.fields = {}
49 | self.model_fields = {}
50 | self._base_model = base_model
51 |
52 | def get_factory(self, field_name, field) -> Any:
53 | """Get the factory for a given type"""
54 | f_type = field.get('type')
55 | if f_type is None and 'anyOf' in field:
56 | factory = tuple([self.get_factory(None, t) for t in field.get('anyOf')])
57 | if None in factory and len(factory) == 2:
58 | if field_name and field_name not in self.required:
59 | factory = [f for f in factory if f is not None][0]
60 | else:
61 | factory = Optional[[f for f in factory if f is not None][0]]
62 | else:
63 | factory = Union[factory]
64 | else:
65 | factory = self.TYPES.get(f_type)
66 | return factory
67 |
68 | def make(self) -> Model:
69 | """Factory method, dynamically creates a pydantic model from JSON Schema"""
70 | for field in self.raw_fields:
71 | if field not in self.required:
72 | default = self.raw_fields[field].get('default')
73 | else:
74 | default = PydanticUndefined
75 | if '$ref' in self.raw_fields[field]:
76 | model_name = self.raw_fields[field].get('$ref')
77 | # resolve $ref
78 | # consider all the cases in standard json schema
79 |
80 | if model_name.startswith('#/'):
81 | model_name = model_name.replace('#/', '')
82 | elif model_name.startswith('#'):
83 | model_name = model_name.replace('#', '')
84 |
85 | if model_name.startswith(self.ref_template+"/"):
86 | model_name = model_name.replace(self.ref_template+"/", '')
87 |
88 | self._make_nested(model_name, field, default)
89 | else:
90 | factory = self.get_factory(field, self.raw_fields[field])
91 | if factory is None:
92 | factory = Any
93 | if factory == list:
94 | items = self.raw_fields[field].get('items')
95 | if self.ref_template in items:
96 | self._make_nested(items.get(self.ref_template), field)
97 |
98 | self._make_field(factory, field, self.raw_fields.get('title'), self.raw_fields.get(field).get('description'), default=default)
99 | model = create_model(self.class_name, __base__=self._base_model, **self.model_fields)
100 | model.__doc__ = self.description
101 | return model
102 |
103 | def _make_nested(self, model_name: str, field, default) -> None:
104 | """Create a nested model"""
105 | level = DyntamicFactory({self.ref_template: self.definitions} | self.definitions.get(model_name),
106 | ref_template=self.ref_template)
107 | level.make()
108 | model = create_model(model_name, **level.model_fields)
109 | model.__doc__ = level.description
110 | self._make_field(model, field, field, level.description, default)
111 |
112 | def _make_field(self, factory, field, alias, description, default) -> None:
113 | """Create an annotated field"""
114 | # if field not in self.required:
115 | # factory_annotation = Annotated[Union[factory, None], factory]
116 | # else:
117 | factory_annotation = factory
118 | self.model_fields[field] = (
119 | Annotated[factory_annotation, Field(default_factory=None, alias=alias, description=description)], default)
120 |
121 | def json_schema_to_pydantic_model(schema):
122 | f = DyntamicFactory(schema)
123 | return f.make()
124 |
125 | if __name__ == "__main__":
126 | input_schema = {
127 | "title": "RunMacro",
128 | "description": "Run a macro",
129 | "type": "object",
130 | "properties": {
131 | "macro": {
132 | "type": "string",
133 | "description": "The macro to run"
134 | },
135 | "args": {"$ref": "#/definitions/Args"},
136 | "query": {"description": "The search query string.", "title": "Query", "type": "string"},
137 | "pageSize": {"default": 10, "description": "Number of search results per page.", "exclusiveMinimum": 0, "title": "Pagesize", "type": "integer"},
138 | "page": {"default": 1, "description": "Page number of the search results.", "title": "Page", "type": "integer"},
139 | "sortOrder": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": "descending", "description": "Sort order: ascending or descending.", "title": "Sortorder"}
140 | },
141 | "required": ["macro", "query"],
142 | "definitions": {
143 | "Args": {
144 | "title": "Args",
145 | "type": "object",
146 | "description": "Arguments for the macro",
147 | "properties": {
148 | "arg1": {
149 | "type": "string",
150 | "description": "arg1"
151 | }
152 | }
153 | }
154 | }
155 | }
156 | RunMacroClass = json_schema_to_pydantic_model(input_schema)
157 | assert RunMacroClass.__name__ == input_schema["title"]
158 | # assert RunMacroClass.__doc__ == input_schema["description"]
159 | m = RunMacroClass(macro="test", args={"test": "test"}, query="test")
160 | schema = RunMacroClass.model_json_schema()
161 | print(schema)
162 | assert schema['title'] == input_schema['title']
163 | assert schema['description'] == input_schema['description']
164 | assert schema['properties']['macro']["description"] == input_schema['properties']['macro']["description"]
165 | assert schema['properties']['args']['allOf'][0]['$ref'] == "#/$defs/Args"
166 | assert m.macro == "test"
167 |
--------------------------------------------------------------------------------
/bioimageio_chatbot/chatbot_extensions/docs_extension.py:
--------------------------------------------------------------------------------
1 | import os
2 | import asyncio
3 | from functools import partial
4 | from pydantic import BaseModel, Field
5 | from typing import Any, Dict, Optional
6 | from bioimageio_chatbot.knowledge_base import load_knowledge_base
7 | from bioimageio_chatbot.utils import get_manifest
8 | from bioimageio_chatbot.utils import ChatbotExtension
9 | from schema_agents import schema_tool
10 |
11 | class DocWithScore(BaseModel):
12 | """A document with an associated relevance score."""
13 |
14 | doc: str = Field(description="The document retrieved.")
15 | score: float = Field(description="The relevance score of the retrieved document.")
16 |
17 |
18 | async def run_extension(
19 | docs_store_dict,
20 | channel_id,
21 | query: str = Field(
22 | description="The query used to retrieve documents related to the user's request. It should be a sentence which will be used to match descriptions using the OpenAI text embedding to match document chunks in a vector database."
23 | ),
24 | top_k: int = Field(
25 | 3,
26 | description="The maximum number of search results to return. Should use a small number to avoid overwhelming the user.",
27 | ),
28 | ):
29 | channel_results = []
30 | # channel_urls = []
31 | # limit top_k from 1 to 15
32 | top_k = max(1, min(top_k, 15))
33 | docs_store = docs_store_dict[channel_id]
34 |
35 | print(f"Retrieving documents from database {channel_id} with query: {query}")
36 | channel_results.append(
37 | await docs_store.asimilarity_search_with_relevance_scores(
38 | query, k=top_k
39 | )
40 | )
41 |
42 | docs_with_score = [
43 | DocWithScore(
44 | doc=doc.page_content,
45 | score=round(score, 2),
46 | metadata=doc.metadata, # , base_url=base_url
47 | )
48 | for results_with_scores in channel_results
49 | for doc, score in results_with_scores
50 | ]
51 | # sort by relevance score
52 | docs_with_score = sorted(docs_with_score, key=lambda x: x.score, reverse=True)[
53 | : top_k
54 | ]
55 |
56 | if len(docs_with_score) > 2:
57 | print(
58 | f"Retrieved documents:\n{docs_with_score[0].doc[:20] + '...'} (score: {docs_with_score[0].score})\n{docs_with_score[1].doc[:20] + '...'} (score: {docs_with_score[1].score})\n{docs_with_score[2].doc[:20] + '...'} (score: {docs_with_score[2].score})"
59 | )
60 | else:
61 | print(f"Retrieved documents:\n{docs_with_score}")
62 | return docs_with_score
63 |
64 |
65 | def title_case(s):
66 | return s.replace(".", " ").replace("-", " ").title().replace(" ", "")
67 |
68 | def create_tool(docs_store_dict, collection):
69 | async def run_extension(
70 | query: str = Field(
71 | description="The query used to retrieve documents related to the user's request. It should be a sentence which will be used to match descriptions using the OpenAI text embedding to match document chunks in a vector database."
72 | ),
73 | top_k: int = Field(
74 | 3,
75 | description="The maximum number of search results to return. Should use a small number to avoid overwhelming the user.",
76 | ),
77 | ):
78 | channel_results = []
79 | # channel_urls = []
80 | # limit top_k from 1 to 15
81 | top_k = max(1, min(top_k, 15))
82 | docs_store = docs_store_dict[collection["id"]]
83 |
84 | print(f"Retrieving documents from database {collection['id']} with query: {query}")
85 | channel_results.append(
86 | await docs_store.asimilarity_search_with_relevance_scores(
87 | query, k=top_k
88 | )
89 | )
90 |
91 | docs_with_score = [
92 | DocWithScore(
93 | doc=doc.page_content,
94 | score=round(score, 2),
95 | metadata=doc.metadata, # , base_url=base_url
96 | )
97 | for results_with_scores in channel_results
98 | for doc, score in results_with_scores
99 | ]
100 | # sort by relevance score
101 | docs_with_score = sorted(docs_with_score, key=lambda x: x.score, reverse=True)[
102 | : top_k
103 | ]
104 |
105 | if len(docs_with_score) > 2:
106 | print(
107 | f"Retrieved documents:\n{docs_with_score[0].doc[:20] + '...'} (score: {docs_with_score[0].score})\n{docs_with_score[1].doc[:20] + '...'} (score: {docs_with_score[1].score})\n{docs_with_score[2].doc[:20] + '...'} (score: {docs_with_score[2].score})"
108 | )
109 | else:
110 | print(f"Retrieved documents:\n{docs_with_score}")
111 | return docs_with_score
112 |
113 | channel_id = collection["id"]
114 | base_url = collection.get("base_url")
115 | reference = collection.get("reference")
116 | if base_url:
117 | base_url_prompt = f" The documentation is available at {base_url}."
118 | else:
119 | base_url_prompt = ""
120 |
121 | if reference:
122 | reference_prompt = f" The reference is available at {reference}."
123 | else:
124 | reference_prompt = ""
125 | run_extension.__name__ = "Search" + title_case(channel_id)
126 | run_extension.__doc__ = f"""Searching documentation for {channel_id}: {collection['description']}.{base_url_prompt}. {reference_prompt}"""
127 | return schema_tool(run_extension)
128 |
129 | INFO_KEYS = ["name","description", "authors", "license", "reference"]
130 |
131 | def get_extension():
132 | collections = get_manifest()["collections"]
133 | knowledge_base_path = os.environ.get(
134 | "BIOIMAGEIO_KNOWLEDGE_BASE_PATH", "./bioimageio-knowledge-base"
135 | )
136 | assert (
137 | knowledge_base_path is not None
138 | ), "Please set the BIOIMAGEIO_KNOWLEDGE_BASE_PATH environment variable to the path of the knowledge base."
139 | if not os.path.exists(knowledge_base_path):
140 | print(
141 | f"The knowledge base is not found at {knowledge_base_path}, will download it automatically."
142 | )
143 | os.makedirs(knowledge_base_path, exist_ok=True)
144 |
145 | knowledge_base_path = os.environ.get(
146 | "BIOIMAGEIO_KNOWLEDGE_BASE_PATH", "./bioimageio-knowledge-base"
147 | )
148 | docs_store_dict = load_knowledge_base(knowledge_base_path)
149 |
150 | docs_tools = {}
151 | docs_info = {}
152 | books_tools = {}
153 | books_info = {}
154 | for col in collections:
155 | info = {k: col[k] for k in INFO_KEYS if k in col}
156 | if "book" in col["id"]:
157 | books_tools["search_" + col["id"]] = create_tool(docs_store_dict, col)
158 | if info:
159 | books_info["search_" + col["id"]] = info
160 | else:
161 | docs_tools["search_" + col["id"]] = create_tool(docs_store_dict, col)
162 | if info:
163 | docs_info["search_" + col["id"]] = info
164 |
165 |
166 | if docs_tools:
167 | sinfo1 = ChatbotExtension(
168 | id="docs",
169 | name="Search BioImage Docs",
170 | description="Search information in the documents of the bioimage.io knowledge base. Provide a list of keywords to search information in the documents. Returns a list of relevant documents. Ensure that the reference to the document is ALWAYS included!",
171 | tools=docs_tools,
172 | info=docs_info
173 | )
174 | if books_tools:
175 | sinfo2 = ChatbotExtension(
176 | id="books",
177 | name="Search BioImage Books",
178 | description="Search information in BioImage books. Provide a list of keywords to search information in the books. Returns a list of relevant documents. Ensure that the reference to the book is ALWAYS included!",
179 | tools=books_tools,
180 | info=books_info
181 | )
182 |
183 | return sinfo1, sinfo2
184 |
--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
1 | # Installation Guide
2 |
3 | ## Setup the Chatbot locally
4 |
5 | If you want to run the chatbot server locally, you need to have an OpenAI API key. You can get one by signing up at [OpenAI](https://beta.openai.com/). Once you have your API key, you can install the chatbot package via pip and set the environment variables:
6 |
7 | ```bash
8 | pip install bioimageio-chatbot
9 | ```
10 |
11 | ```bash
12 | export OPENAI_API_KEY=sk-xxxxxxxx # Required
13 | export BIOIMAGEIO_KNOWLEDGE_BASE_PATH=/path/to/bioimageio-knowledge-base # Optional, default to ./bioimageio-knowledge-base
14 | export BIOIMAGEIO_CHAT_LOGS_PATH=./chat-logs # Optional, default to ./chat-logs
15 | ```
16 |
17 | The chatbot server backend has been tested on Ubuntu and MacOS, it should work on Windows as well.
18 |
19 | ## Command-line Interface
20 |
21 | BioImage.IO Chatbot comes with a command-line interface to facilitate server management, connection to external servers, and knowledge base creation.
22 |
23 | You can access the command-line interface by running `python -m bioimageio_chatbot` or the `bioimageio-chatbot` command.
24 |
25 | Below are the available commands and options:
26 |
27 | ### Initialize Knowledge Base
28 |
29 | To initialize the knowledge base, use the `init` command:
30 |
31 | ```bash
32 | python -m bioimageio_chatbot init
33 | ```
34 |
35 | This will load the knowledge base from the location specified by the `BIOIMAGEIO_KNOWLEDGE_BASE_PATH` environment variable, or use the default path `./bioimageio-knowledge-base`. If the knowledge base is not found, it will be downloaded from the predefined URL (by default, it uses https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimageio-knowledge-base. It can be overridden with `BIOIMAGEIO_KNOWLEDGE_BASE_URL`).
36 |
37 | NOTE: It may take some time to download the knowledge base depending on your internet connection.
38 | **Example:**
39 |
40 | ```bash
41 | export BIOIMAGEIO_KNOWLEDGE_BASE_PATH="./my_knowledge_base"
42 | python -m bioimageio_chatbot init
43 | ```
44 |
45 | After running the `init` command, it will list the databases loaded into the knowledge base.
46 |
47 | #### Start Server
48 |
49 | To start your own server entirely, use the `start-server` command:
50 |
51 | ```bash
52 | python -m bioimageio_chatbot start-server [--host HOST] [--port PORT] [--public-base-url PUBLIC_BASE_URL]
53 | ```
54 |
55 | **Options:**
56 |
57 | - `--host`: The host address to run the server on (default: `0.0.0.0`)
58 | - `--port`: The port number to run the server on (default: `9000`)
59 | - `--public-base-url`: The public base URL of the server (default: `http://127.0.0.1:9000`)
60 | - `--login-required`: Whether to require users to log in before accessing the chatbot (default to not require login)
61 |
62 | **Example:**
63 |
64 | ```bash
65 | export OPENAI_API_KEY=sk-xxxxxxxx
66 | export BIOIMAGEIO_KNOWLEDGE_BASE_PATH=./bioimageio-knowledge-base
67 | export BIOIMAGEIO_CHAT_LOGS_PATH=./chat-logs
68 | python -m bioimageio_chatbot start-server --host=0.0.0.0 --port=9000
69 | ```
70 | This will create a local server, and the BioImage.IO Chatbot is available at: https://bioimage.io/chat?server=http://127.0.0.1:9000
71 |
72 | Open the link in a browser, and you will see the chat interface.
73 |
74 | Please note that the chatbot server may not be accessible to users outside your local network.
75 |
76 | A user guide and technical overview can be found [here](./technical-overview.md).
77 |
78 | To be able to share your chatbot service over the internet (especially for users outside your local network), you will need to expose your server publicly. Please, see [Connect to Server](#connect-to-server)
79 |
80 |
81 | #### Connect to Server
82 |
83 | To help you share your chatbot with users external to your local network, you can use our public [BioEngine](https://aicell.io/project/bioengine/) server as a proxy.
84 |
85 | To connect to an external BioEngine server, use the `connect-server` command:
86 |
87 | ```bash
88 | python -m bioimageio_chatbot connect-server [--server-url SERVER_URL]
89 | ```
90 |
91 | **Options:**
92 |
93 | - `--server-url`: The URL of the external BioEngine server to connect to (default: `https://ai.imjoy.io`)
94 | - `--login-required`: Whether to require users to log in before accessing the chatbot (default to not require login)
95 |
96 | **Example:**
97 |
98 | ```bash
99 | export OPENAI_API_KEY=sk-xxxxxxxx
100 | export BIOIMAGEIO_KNOWLEDGE_BASE_PATH=./bioimageio-knowledge-base
101 | export BIOIMAGEIO_CHAT_LOGS_PATH=./chat-logs
102 | python -m bioimageio_chatbot connect-server --server-url=https://ai.imjoy.io
103 | ```
104 |
105 | First, you will be asked to log in with a hypha account. Either your GitHub or Google account can be reused. Then, the following message containing a link to the chatbot will be displayed: 'The BioImage.IO Chatbot is available at: https://bioimage.io/chat?server=https://ai.imjoy.io'
106 |
107 | Leave your chatbot running to enable users inside or outside your network to access it from this URL.
108 |
109 | #### User Management
110 |
111 | If you set `--login-required` when running `start-server` or `connect-server`, users will be required to log in before accessing the chatbot. The chatbot will then collect the user's GitHub or Google account information and store it its logs for future analysis.
112 |
113 | You can also provide an optional environment variable `BIOIMAGEIO_AUTHORIZED_USERS_PATH` for the chatbot to load a list of authorized users. The file should be a JSON file containing a list of GitHub or Google account names. For example:
114 |
115 | ```json
116 | {
117 | "users": [
118 | {"email": "user1@email.org"}
119 | ]
120 | }
121 | ```
122 |
123 | #### Create Knowledge Base
124 |
125 | To create a new knowledge base, use the `create-knowledge-base` command:
126 |
127 | ```bash
128 | python -m bioimageio_chatbot create-knowledge-base [--output-dir OUTPUT_DIR]
129 | ```
130 |
131 | **Options:**
132 |
133 | - `--output-dir`: The directory where the knowledge base will be created (default: `./bioimageio-knowledge-base`)
134 |
135 | **Example:**
136 |
137 | ```bash
138 | export OPENAI_API_KEY=sk-xxxxxxxx
139 | export BIOIMAGEIO_KNOWLEDGE_BASE_PATH=./bioimageio-knowledge-base
140 | python -m bioimageio_chatbot create-knowledge-base --output-dir=./bioimageio-knowledge-base
141 | ```
142 |
143 |
144 | ### Running the BioImage.IO Chatbot in a Docker Container
145 |
146 | #### Step 1: Build the Docker Image
147 |
148 | To run the BioImage.IO Chatbot using a Docker container, follow these steps. First, build the Docker image by running the following command in your terminal:
149 |
150 | ```bash
151 | docker build -t bioimageio-chatbot:latest .
152 | ```
153 |
154 | If you prefer to use a pre-built Docker image from Docker Hub, you can pull the image using the following command:
155 |
156 | ```bash
157 | docker pull alalulu/bioimageio-chatbot:latest
158 | ```
159 |
160 |
161 | #### Step 2: Start the Chatbot Server
162 |
163 | After building the Docker image, you can start the chatbot server with the following command:
164 |
165 | ```bash
166 | docker run -e OPENAI_API_KEY=sk-xxxxxxxxxxxxx -e BIOIMAGEIO_KNOWLEDGE_BASE_PATH=/knowledge-base -p 3000:9000 -v /path/to/local/knowledge-base:/knowledge-base bioimageio-chatbot:latest python -m bioimageio_chatbot start-server --host=0.0.0.0 --port=9000 --public-base-url=http://localhost:3000
167 | ```
168 |
169 | Replace the placeholders in the command with the following values:
170 |
171 | - `sk-xxxxxxxxxxxxx`: Your OpenAI API key.
172 | - `/path/to/local/knowledge-base`: The local path to your knowledge base folder.
173 |
174 | Optionally, for improved reproducibility, you can change `latest` to a version tag such as `v0.1.18`.
175 |
176 | #### Step 3: Access the Chatbot
177 |
178 | The BioImage.IO Chatbot is now running in the Docker container. You can access it locally in your web browser by visiting:
179 |
180 | ```
181 | https://bioimage.io/chat?server=http://localhost:3000
182 | ```
183 |
184 | Make sure to replace `3000` with the host port you specified in the `docker run` command.
185 |
186 |
187 | Enjoy using the BioImage.IO Chatbot!
188 |
189 |
--------------------------------------------------------------------------------
/bioimageio_chatbot/static/bioimage-model-zoo-extension.imjoy.html:
--------------------------------------------------------------------------------
1 |
2 | [TODO: write documentation for this plugin.]
3 |
4 |
5 |
6 | {
7 | "name": "SearchBioImageModelZoo",
8 | "type": "web-python",
9 | "version": "0.1.0",
10 | "description": "BioImage.IO Chatbot Extension for getting information about models, applications, datasets, etc. in the BioImage Model Zoo.",
11 | "tags": [],
12 | "ui": "",
13 | "cover": "",
14 | "inputs": null,
15 | "outputs": null,
16 | "flags": [],
17 | "icon": "extension",
18 | "api_version": "0.1.8",
19 | "env": "",
20 | "permissions": [],
21 | "requirements": ["pydantic"],
22 | "dependencies": []
23 | }
24 |
25 |
26 |
188 |
--------------------------------------------------------------------------------
/docs/usage-example.md:
--------------------------------------------------------------------------------
1 | # BioImage.IO Chatbot Usage Example Guide
2 |
3 | ## Introduction
4 | Welcome to the world of BioImage.IO Chatbot, a revolutionary conversational assistant designed exclusively for the bioimage community. Our chatbot is more than just a virtual conversationalist; it's a sophisticated AI-powered tool that offers personalized, context-aware responses by drawing from a diverse array of databases, tool-specific documentation, and structured data sources.
5 |
6 | The BioImage.IO Chatbot is all about efficiency, knowledge enrichment, and accessibility. In this guide, we will walk you through various aspects of its functionality, so you can harness its capabilities to enhance your computational bioimaging experience. Whether you're a biologist, bioimage analyst, or developer, this guide is your key to unlocking the full potential of the BioImage.IO Chatbot.
7 |
8 |
9 | ## Basic Interaction
10 | ### How to Start a Conversation
11 | Firstly, to make the most of your chatbot experience, consider setting your profile information. This helps the chatbot understand your background and tailor responses accordingly.
12 |
13 | To start a conversation, simply type `Hi` or `Hello`, and the chatbot will warmly greet you in return.
14 |
15 | 
16 |
17 |
18 | ## Asking Questions
19 | ### Simple Questions
20 | You can ask the BioImage.IO Chatbot a wide range of general and straightforward questions. The chatbot will promptly retrieve and provide you with the relevant information. For example, you can inquire about a "Community Partner" in the BioImage Model Zoo.
21 |
22 | 
23 |
24 | ### Technical Questions
25 | If you have more technical questions, the chatbot is equipped to assist you effectively. It retrieves the necessary information, summarizes it, and presents it in a simple and understandable manner. For instance, you can ask the chatbot about procedures like "How to segment an image using Fiji" or "How to upload a model to the bioimage.io repository."
26 |
27 | 
28 |
29 |
30 | ### Personalization: tailored Responses
31 | The BioImage.IO Chatbot is designed to understand and cater to the unique backgrounds and needs of its users. Whether you're a biologist, bioimage analyst, or developer, the chatbot tailors its responses to provide you with the most relevant and context-aware information.
32 |
33 | To illustrate this personalized approach, we have two screenshots of the chatbot answering the same question: "How can I analyze biological images?" In the first screenshot, we have Abby, a biologist, posing this query. In the second screenshot, it's Abby again, but this time, she identifies as a developer. As expected, the chatbot is able to provide different feedback to Abby based on her background and needs.
34 |
35 | 
36 | 
37 |
38 | ### How to switch chatbot's assistants
39 | We have three assistants built in the BioImage.IO Chatbot to cater to the different purpose and needs of the users. BioImage Seeker is designated for users that seek for information about bioimaging, it is equiped with 6+ extensions including community-driven knowledge base, as well as online source like image.sc forum, web search. BioImage Executor is desginated for users that want to execute model using BioEngine such as CellPose. BioImage Tutor is designated for users that seek for tutoring in bioimaging, it is equiped with Bioimage Books extension combined with web search and Bioimage Model Zoo search.
40 | 
41 | 
42 |
43 | ## Database Integration
44 | Our system harnesses knowledge from a variety of valuable sources, including established databases such as [ELIXIR bio.tools](https://bio.tools) and the [ImageJ Wiki](https://imagej.net/software/imagej2/), tool-specific documentation like [deepImageJ](https://deepimagej.github.io), [ilastik](https://www.ilastik.org), and [ImJoy](https://imjoy.io/#/), and structured databases such as the [BioImage Model Zoo repository](https://bioimage.io).
45 | In the chatbot's user interface, you'll find a list of the available knowledge base channels that the chatbot can access. s. You can choose to select a specific knowledge base channel or opt for the `auto` mode to query information from all integrated databases.
46 |
47 | ## Extension Selection
48 |
49 | If you are interested in using specific extensions in the chatbot, you can click 'More Options' and select one or several extensions from the extension list. This way, the information provided will be exclusively by using the specified extensions you have selected.
50 |
51 | For this example, you can select the `Search Image.cs Forum` extension and ask the chatbot for software issues.
52 |
53 | 
54 | 
55 |
56 | ### Steps details of Chatbot's working trajectory
57 | In order to check the chatbot working trajectories, with every query, all steps of chatbot's working trajectory are integrated in the response. Click 'More Details' on the bottom of the response, users can see the detail of each step.
58 |
59 | 
60 |
61 | ### Search in Knowledge Base
62 | Our system harnesses knowledge from a variety of valuable sources, including established databases such as [ELIXIR bio.tools](https://bio.tools) and the [ImageJ Wiki](https://imagej.net/software/imagej2/), tool-specific documentation like [deepImageJ](https://deepimagej.github.io), [ilastik](https://www.ilastik.org), and [ImJoy](https://imjoy.io/#/), and structured databases such as the [BioImage Model Zoo repository](https://bioimage.io). This comprehensive integration enables our chatbot to not only respond to questions related to each of these individual tools but also tackle complex inquiries that demand information from multiple databases.
63 |
64 | To illustrate this capability, consider the following scenario: You have a Whole Slide Image (WSI) that you'd like to open with [QuPath](https://qupath.github.io), apply the Chatty-Frog model ([StarDist H&E Nuclei Segmentation](https://bioimage.io/#/?tags=chatty-frog&id=10.5281%2Fzenodo.6338614)) from BioImage.IO using deepImageJ within Fiji. How can this be accomplished? You can present this intricate question to the BioImage.IO Chatbot.
65 |
66 | The chatbot provides a detailed breakdown of the steps required to complete the requested pipeline. This example demonstrates how our chatbot excels at efficiently retrieving and summarizing information from various sources, offering a valuable solution for your bioimaging needs.
67 |
68 | 
69 |
70 | ### Retrieving Models
71 | The BioImage.IO Chatbot is a versatile tool that can generate and execute code in response to user queries. This means that when a user asks about specific models available in bioimage.io, the chatbot can fetch this information by generating a custom script.
72 |
73 | For instance, if a user inquires about the total number of models in the BioImage Model Zoo, the chatbot can effortlessly retrieve this information using a straightforward script. However, the questions can become more intricate. Users might want to know which models in bioimage.io are suitable for a particular tag or task (*i.e., segmentation*)or similar criteria. The chatbot is adept at handling these more complex queries, too.
74 |
75 | 
76 |
77 |
78 | ### Model execution
79 | TODO: Update the description for model execution and screenshot for cellpose example.
80 |
81 | The BioImage.IO Chatbot can also execute functions in external APIs, analysis pipelines, plugins, and user code. Cellpose image segmentation has been added as a Chatbot function calling extension and can be used as a template for additional APIs. Users may currently use the Cellpose API function calling by uploading a file and requesting the Chatbot to segment it either from the `auto` or `cellpose` channels.
82 |
83 | ## Conclusion
84 | The BioImage.IO Chatbot is a powerful tool designed to provide you with accurate and personalized information from a wide range of databases. Whether you're a biologist or a developer, our chatbot is here to assist you. Feel free to explore its capabilities, ask questions, and customize your experience.
85 |
86 | Explore more, learn more, and enjoy the benefits of BioImage.IO Chatbot!
87 |
88 | ## Additional Resources
89 | - For documentation, visit our [GitHub repository](https://github.com/bioimage-io/bioimageio-chatbot).
90 | - Do you have questions or need assistance? Contact us through [GitHub Issues]((https://github.com/bioimage-io/bioimageio-chatbot/issues)).
91 |
--------------------------------------------------------------------------------
/bioimageio_chatbot/static/worker-manager.js:
--------------------------------------------------------------------------------
1 | class PyodideWorkerManager {
2 | hyphaServices = {}
3 | workers = {}
4 | workerApps = {}
5 | subscribers = []
6 | workerRecords = {}
7 | // native file system handle
8 | constructor(dirHandle, mountPoint) {
9 | this.workers = {}
10 | this.workerRecords = {}
11 | this.dirHandle = dirHandle
12 | this.mountPoint = mountPoint || "/mnt"
13 | }
14 |
15 | getDirHandle() {
16 | return this.dirHandle
17 | }
18 |
19 | // Subscribe method
20 | subscribe(callback) {
21 | this.subscribers.push(callback)
22 |
23 | // Return an unsubscribe function
24 | return () => {
25 | this.subscribers = this.subscribers.filter(sub => sub !== callback)
26 | }
27 | }
28 |
29 | // Call this method whenever the workers list changes
30 | notify() {
31 | this.subscribers.forEach(callback => callback())
32 | }
33 |
34 | getWorkerApps() {
35 | // return appInfo
36 | return Object.values(this.workerApps)
37 | }
38 |
39 | async createWorker(info) {
40 | const id = Math.random().toString(36).substring(7)
41 | console.log("Creating worker:", id)
42 | const worker = new Worker("/chat/pyodide-worker.js")
43 | await new Promise(resolve => (worker.onmessage = () => resolve()))
44 | this.workers[id] = worker
45 | worker.kill = () => {
46 | worker.terminate()
47 | worker.terminated = true;
48 | }
49 | this.workerRecords[id] = []
50 | this.hyphaServices[id] = []
51 | const self = this
52 | const appService = {
53 | id,
54 | appInfo: info,
55 | worker,
56 | async runScript(script, ioContext) {
57 | return await self.runScript(id, script, ioContext)
58 | },
59 | async run_script(script, io_context) {
60 | return await self.runScript(id, script, io_context)
61 | },
62 | async mount(mountPoint, dirHandle) {
63 | return await self.mountNativeFs(id, mountPoint, dirHandle)
64 | },
65 | async render(container) {
66 | self.render(id, container)
67 | },
68 | async renderSummary(container) {
69 | return self.renderSummary(id, container)
70 | },
71 | async close() {
72 | await self.closeWorker(id)
73 | },
74 | getLogs() {
75 | return self.workerRecords[id]
76 | },
77 | get_logs() {
78 | return self.workerRecords[id]
79 | },
80 | async listHyphaServices() {
81 | return self.hyphaServices[id]
82 | },
83 | async list_hypha_services() {
84 | return self.hyphaServices[id]
85 | }
86 | }
87 | this.workerApps[id] = appService
88 | if (this.dirHandle) {
89 | await this.mountNativeFs(id)
90 | }
91 | this.notify()
92 | return appService
93 | }
94 |
95 | async closeWorker(id) {
96 | if (this.workers[id]) {
97 | this.workers[id].kill();
98 | delete this.workers[id]
99 | delete this.workerRecords[id]
100 | delete this.workerApps[id]
101 | this.notify()
102 | }
103 | }
104 |
105 | async getWorker(id) {
106 | if (id && this.workers[id]) {
107 | return this.workers[id]
108 | } else {
109 | throw new Error("No worker found with ID: " + id)
110 | }
111 | }
112 |
113 | async mountNativeFs(workerId, mountPoint, dirHandle) {
114 | if (!workerId) {
115 | throw new Error("No worker ID provided and no current worker available.")
116 | }
117 | const worker = await this.getWorker(workerId)
118 | return new Promise((resolve, reject) => {
119 | const handler = e => {
120 | if (e.data.mounted) {
121 | worker.removeEventListener("message", handler)
122 | resolve(true)
123 | } else if (e.data.mountError) {
124 | worker.removeEventListener("message", handler)
125 | reject(new Error(e.data.mountError))
126 | }
127 | }
128 | worker.addEventListener("message", handler)
129 | worker.postMessage({
130 | mount: {
131 | mountPoint: mountPoint || this.mountPoint,
132 | dirHandle: dirHandle || this.dirHandle
133 | }
134 | })
135 | })
136 | }
137 |
138 | addToRecord(workerId, record) {
139 | if (!this.workerRecords[workerId]) {
140 | this.workerRecords[workerId] = []
141 | }
142 | this.workerRecords[workerId].push(record)
143 | }
144 |
145 | renderOutputSummary(container, record) {
146 | // return a string preview of the output
147 | if (record.type === "store") {
148 | return `Store: ${record.key}`
149 | }
150 | else if (record.type === "script") {
151 | return `Script>>>:\n\`\`\`python\n${record.content}\n\`\`\`\n`
152 | } else if (record.type === "stdout") {
153 | if(record.content.trim() === "\n") {
154 | return "\n"
155 | }
156 | return `${record.content}\n`
157 | } else if (record.type === "stderr") {
158 | if(record.content.trim() === "\n") {
159 | return "\n"
160 | }
161 | return `${record.content}\n`
162 | } else if (record.type === "service") {
163 | return `Service: ${record.content}`
164 | } else if (record.type === "audio" || record.type === "img") {
165 | return `Image: