├── docs
    ├── .nojekyll
    ├── Elixir-Presentation.pdf
    ├── example-data
    │   └── nuclei.tif
    ├── screenshots
    │   ├── bia-tutor.png
    │   ├── channels.png
    │   ├── chatbot-hi.png
    │   ├── extensions.png
    │   ├── gpts_json.png
    │   ├── web-search.png
    │   ├── bia-executor.png
    │   ├── more-details.png
    │   ├── role_create.png
    │   ├── search-biii.png
    │   ├── user-profile.png
    │   ├── SimilarityScore.png
    │   ├── chat-interface.png
    │   ├── configure_gpt.png
    │   ├── direct-response.png
    │   ├── respond_to_user.png
    │   ├── retrieval-text.png
    │   ├── screenshot-hi.png
    │   ├── chatbot-animation.gif
    │   ├── chatbot-biologist.png
    │   ├── chatbot-developer.png
    │   ├── gpts_landing_page.png
    │   ├── search-model-zoo.png
    │   ├── select-extension.png
    │   ├── chatbot-model-upload.png
    │   ├── chatbot-wsi-pipeline.png
    │   ├── search-bioimage-docs.png
    │   ├── search-image-forum.png
    │   ├── customization1_student.png
    │   ├── customization_biotool.png
    │   ├── chatbot-community-partner.png
    │   ├── customization2_developer.png
    │   ├── script-gen-exe-retrieval.png
    │   ├── search-bioimage-archive.png
    │   ├── similarity-score-results.png
    │   ├── chatbot-channel-bioimageio.png
    │   ├── chatbot-technical-question.png
    │   ├── chatbot-channel-scikit-image.png
    │   └── chatgpt-vs-bioimageiochatbot.png
    ├── _sidebar.md
    ├── beta-testing-guidelines.md
    ├── creating_GPTs.md
    ├── CONTRIBUTING.md
    ├── DISCLAIMER.md
    ├── index.html
    ├── technical-overview.md
    ├── figure-2-use-cases.md
    ├── installation.md
    ├── usage-example.md
    ├── README.md
    └── development.md
├── bioimageio_chatbot
    ├── __init__.py
    ├── tools.py
    ├── gpts_action.py
    ├── static
    │   ├── imagej-js-extension.imjoy.html
    │   ├── bioimage-model-zoo-extension.imjoy.html
    │   ├── worker-manager.js
    │   └── pyodide-worker.js
    ├── evaluation.py
    ├── chatbot_extensions
    │   ├── web_search_extension
    │   │   ├── __init__.py
    │   │   └── langchain_websearch.py
    │   ├── __init__.py
    │   ├── vision_extension.py
    │   ├── bia_extension.py
    │   ├── hpa_extension.py
    │   ├── biii_extension.py
    │   ├── image_sc_extension.py
    │   └── docs_extension.py
    ├── __main__.py
    ├── utils.py
    ├── quota.py
    ├── jsonschema_pydantic.py
    └── knowledge_base.py
├── MANIFEST.in
├── requirements_test.txt
├── Dockerfile
├── tests
    ├── __pycache__
    │   ├── test_chatbot.cpython-39-pytest-7.2.1.pyc
    │   └── test_chatbot.cpython-310-pytest-7.4.2.pyc
    ├── test_knowledge_base.py
    ├── test_chatbot_answer.py
    └── test_chatbot.py
├── scripts
    └── publish.sh
├── requirements.txt
├── pyproject.toml
├── .github
    └── workflows
    │   ├── publish.yml
    │   └── build.yml
├── LICENSE
└── .gitignore


/docs/.nojekyll:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/bioimageio_chatbot/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include bioimageio_chatbot/static/*


--------------------------------------------------------------------------------
/requirements_test.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | pytest
3 | pytest-asyncio
4 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM continuumio/miniconda3:latest
2 | RUN pip install bioimageio-chatbot
3 | 
4 | EXPOSE 9000


--------------------------------------------------------------------------------
/docs/Elixir-Presentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/Elixir-Presentation.pdf


--------------------------------------------------------------------------------
/docs/example-data/nuclei.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/example-data/nuclei.tif


--------------------------------------------------------------------------------
/docs/screenshots/bia-tutor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/bia-tutor.png


--------------------------------------------------------------------------------
/docs/screenshots/channels.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/channels.png


--------------------------------------------------------------------------------
/docs/screenshots/chatbot-hi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-hi.png


--------------------------------------------------------------------------------
/docs/screenshots/extensions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/extensions.png


--------------------------------------------------------------------------------
/docs/screenshots/gpts_json.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/gpts_json.png


--------------------------------------------------------------------------------
/docs/screenshots/web-search.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/web-search.png


--------------------------------------------------------------------------------
/docs/screenshots/bia-executor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/bia-executor.png


--------------------------------------------------------------------------------
/docs/screenshots/more-details.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/more-details.png


--------------------------------------------------------------------------------
/docs/screenshots/role_create.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/role_create.png


--------------------------------------------------------------------------------
/docs/screenshots/search-biii.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/search-biii.png


--------------------------------------------------------------------------------
/docs/screenshots/user-profile.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/user-profile.png


--------------------------------------------------------------------------------
/docs/screenshots/SimilarityScore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/SimilarityScore.png


--------------------------------------------------------------------------------
/docs/screenshots/chat-interface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chat-interface.png


--------------------------------------------------------------------------------
/docs/screenshots/configure_gpt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/configure_gpt.png


--------------------------------------------------------------------------------
/docs/screenshots/direct-response.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/direct-response.png


--------------------------------------------------------------------------------
/docs/screenshots/respond_to_user.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/respond_to_user.png


--------------------------------------------------------------------------------
/docs/screenshots/retrieval-text.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/retrieval-text.png


--------------------------------------------------------------------------------
/docs/screenshots/screenshot-hi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/screenshot-hi.png


--------------------------------------------------------------------------------
/docs/screenshots/chatbot-animation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-animation.gif


--------------------------------------------------------------------------------
/docs/screenshots/chatbot-biologist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-biologist.png


--------------------------------------------------------------------------------
/docs/screenshots/chatbot-developer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-developer.png


--------------------------------------------------------------------------------
/docs/screenshots/gpts_landing_page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/gpts_landing_page.png


--------------------------------------------------------------------------------
/docs/screenshots/search-model-zoo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/search-model-zoo.png


--------------------------------------------------------------------------------
/docs/screenshots/select-extension.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/select-extension.png


--------------------------------------------------------------------------------
/docs/screenshots/chatbot-model-upload.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-model-upload.png


--------------------------------------------------------------------------------
/docs/screenshots/chatbot-wsi-pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-wsi-pipeline.png


--------------------------------------------------------------------------------
/docs/screenshots/search-bioimage-docs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/search-bioimage-docs.png


--------------------------------------------------------------------------------
/docs/screenshots/search-image-forum.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/search-image-forum.png


--------------------------------------------------------------------------------
/docs/screenshots/customization1_student.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/customization1_student.png


--------------------------------------------------------------------------------
/docs/screenshots/customization_biotool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/customization_biotool.png


--------------------------------------------------------------------------------
/docs/screenshots/chatbot-community-partner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-community-partner.png


--------------------------------------------------------------------------------
/docs/screenshots/customization2_developer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/customization2_developer.png


--------------------------------------------------------------------------------
/docs/screenshots/script-gen-exe-retrieval.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/script-gen-exe-retrieval.png


--------------------------------------------------------------------------------
/docs/screenshots/search-bioimage-archive.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/search-bioimage-archive.png


--------------------------------------------------------------------------------
/docs/screenshots/similarity-score-results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/similarity-score-results.png


--------------------------------------------------------------------------------
/bioimageio_chatbot/tools.py:
--------------------------------------------------------------------------------
1 | 
2 | # example function
3 | def example_function(address):
4 |     pass
5 | 
6 | 
7 | TOOL_MAP = {"example_function": example_function}


--------------------------------------------------------------------------------
/docs/screenshots/chatbot-channel-bioimageio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-channel-bioimageio.png


--------------------------------------------------------------------------------
/docs/screenshots/chatbot-technical-question.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-technical-question.png


--------------------------------------------------------------------------------
/docs/screenshots/chatbot-channel-scikit-image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-channel-scikit-image.png


--------------------------------------------------------------------------------
/docs/screenshots/chatgpt-vs-bioimageiochatbot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatgpt-vs-bioimageiochatbot.png


--------------------------------------------------------------------------------
/tests/__pycache__/test_chatbot.cpython-39-pytest-7.2.1.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/tests/__pycache__/test_chatbot.cpython-39-pytest-7.2.1.pyc


--------------------------------------------------------------------------------
/tests/__pycache__/test_chatbot.cpython-310-pytest-7.4.2.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/tests/__pycache__/test_chatbot.cpython-310-pytest-7.4.2.pyc


--------------------------------------------------------------------------------
/scripts/publish.sh:
--------------------------------------------------------------------------------
1 | pip install -U twine
2 | pip install -U wheel
3 | python3 -m pip install --upgrade build
4 | rm -rf ./build
5 | rm ./dist/*
6 | python3 -m build
7 | twine upload dist/*
8 | rm -rf ./build


--------------------------------------------------------------------------------
/docs/_sidebar.md:
--------------------------------------------------------------------------------
1 | <!-- docs/_sidebar.md -->
2 | * [Overview](/README)
3 | * [Installation](/installation)
4 | * [Usage guide](/usage-example)
5 | * [Technical Overview](/technical-overview)
6 | * [Extension Development](/development)
7 | * [Contribution Guidelines](/CONTRIBUTING)
8 | * [Use Cases](/figure-2-use-cases)
9 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | schema-agents>=0.1.59
 2 | hypha-rpc==0.20.38
 3 | requests
 4 | pypdf
 5 | pillow
 6 | matplotlib
 7 | hypha==0.20.38.post19
 8 | tqdm
 9 | aiofiles
10 | serpapi
11 | langchain>=0.1.11
12 | langchain-community==0.0.27
13 | langchain-core==0.1.31
14 | beautifulsoup4
15 | pandas
16 | duckduckgo-search==6.1.5
17 | langchain-openai==0.0.8
18 | rank-bm25==0.2.2
19 | html2text==2020.1.16
20 | setuptools
21 | 


--------------------------------------------------------------------------------
/tests/test_knowledge_base.py:
--------------------------------------------------------------------------------
1 | from langchain_community.vectorstores import FAISS
2 | from langchain_openai import OpenAIEmbeddings
3 | 
4 | def test_knowledge_base():
5 |     """Test the knowledge base"""
6 |     vectordb = FAISS.load_local(folder_path="./bioimageio-knowledge-base", index_name="bioimage.io", embeddings=OpenAIEmbeddings(), allow_dangerous_deserialization=True)
7 |     retriever = vectordb.as_retriever(score_threshold=0.4)
8 |     items = retriever.get_relevant_documents("community partner", verbose=True)
9 |     assert len(items) > 0


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools", "wheel"]
 3 | 
 4 | [project]
 5 | name = "bioimageio-chatbot"
 6 | version = "0.2.13"
 7 | readme = "README.md"
 8 | description = "Your Personal Assistant in Computational BioImaging."
 9 | dependencies = [
10 |   "schema-agents>=0.1.59",
11 |   "hypha-rpc>=0.20.38",
12 |   "requests",
13 |   "pypdf",
14 |   "pillow",
15 |   "matplotlib",
16 |   "hypha>=0.20.38.post19",
17 |   "tqdm",
18 |   "aiofiles",
19 |   "langchain>=0.1.6",
20 |   "beautifulsoup4",
21 |   "pandas",
22 |   "duckduckgo-search>=6.1.5",
23 |   "rank-bm25",
24 |   "langchain-openai",
25 |   "langchain-core>=0.1.31",
26 |   "langchain-community>=0.0.27",
27 |   "html2text",
28 | ]
29 | 
30 | [tool.setuptools]
31 | include-package-data = true
32 | 
33 | [tool.setuptools.packages.find]
34 | include = ["bioimageio_chatbot*"]
35 | exclude = ["tests*", "scripts*"]
36 | 
37 | [options.entry_points]
38 | console_scripts = [
39 |     "bioimageio-chatbot = bioimageio_chatbot.__main__:main",
40 | ]
41 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPI
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |     # Optional inputs, you can add more according to your needs
 6 |     inputs:
 7 |       # version:
 8 |       #   description: 'Version of the package to release'
 9 |       #   required: true
10 |       #   default: '1.0.0'
11 | 
12 | jobs:
13 |   publish:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |     - name: Check out code
17 |       uses: actions/checkout@v2
18 | 
19 |     # Add steps for any necessary setup, like installing dependencies
20 |     - name: Build
21 |       run: |
22 |         python -m pip install --upgrade pip
23 |         python -m pip install -U twine
24 |         python -m pip install -U wheel
25 |         python3 -m pip install build==1.0.3 # pin build
26 |         rm -rf ./build
27 |         rm -rf ./dist/*
28 |         python3 -m build
29 |   
30 |     - name: Publish to PyPI
31 |       uses: pypa/gh-action-pypi-publish@master
32 |       with:
33 |         user: __token__
34 |         password: ${{ secrets.PYPI_API_TOKEN }}
35 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 BioImage.IO
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 | 
 6 | name: Build
 7 | 
 8 | permissions:
 9 |   contents: read
10 | 
11 | jobs:
12 |   build:
13 |     permissions:
14 |       contents: write # for Git to git push
15 |     name: Build
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - name: Checkout code
19 |         uses: actions/checkout@v3
20 |       - name: Setup conda
21 |         uses: s-weigand/setup-conda@v1
22 |         with:
23 |           update-conda: true
24 |           python-version: 3.9
25 |           conda-channels: anaconda, conda-forge
26 |       - name: Install dependencies
27 |         run: |
28 |           python -m pip install --upgrade pip
29 |           python -m pip install -r requirements.txt
30 |       - name: Trigger workflow at bioimage.io
31 |         uses: benc-uk/workflow-dispatch@v1
32 |         with:
33 |           workflow: build-site.yml
34 |           repo: bioimage-io/bioimage.io
35 |           inputs: '{}'
36 |           # Required when using the `repo` option. Either a PAT or a token generated from the GitHub app or CLI
37 |           token: "${{ secrets.PAT_TOKEN_WORKFLOW }}"
38 |       - name: Run Init
39 |         run: python -m bioimageio_chatbot init
40 |       - name: Test server
41 |         env:
42 |           OPENAI_API_KEY: sk-xxxxxxxx
43 |         run: python -m bioimageio_chatbot start-server --host=0.0.0.0 --port=9000 & wget http://127.0.0.1:9000/
44 | 
45 |       # - name: Build vector database
46 |       #   if: github.event_name == 'push' && github.ref == 'refs/heads/main'
47 |       #   run: |
48 |       #     python -m bioimageio_chatbot create-knowledge-base --output-dir=./dist/bioimageio-knowledge-base
49 |       #   env:
50 |       #     OPENAI_API_KEY: ${{ secrets.OPENAI_SECRET_API_KEY }}
51 |       # - name: Deploy
52 |       #   if: github.event_name == 'push' && github.ref == 'refs/heads/main'
53 |       #   uses: peaceiris/actions-gh-pages@v3
54 |       #   with:
55 |       #     github_token: ${{ secrets.GITHUB_TOKEN }}
56 |       #     publish_dir: ./dist
57 | 


--------------------------------------------------------------------------------
/bioimageio_chatbot/gpts_action.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from openai import AsyncOpenAI
 3 | from bioimageio_chatbot.chatbot_extensions import extension_to_tools
 4 | from schema_agents.utils.schema_conversion import get_service_openapi_schema
 5 | from hypha_rpc import login, connect_to_server
 6 | 
 7 | client = AsyncOpenAI()
 8 | 
 9 | async def convert_extensions(builtin_extensions):
10 |     extension_services = {}
11 |     for extension in builtin_extensions:
12 |         tools = await extension_to_tools(extension)
13 |         for tool in tools:
14 |             extension_services[tool.__name__] = tool
15 |     return extension_services
16 | 
17 | async def serve_actions(server, server_url, builtin_extensions):
18 |     extension_services = await convert_extensions(builtin_extensions)
19 |     svc = {
20 |         "id": "bioimageio-chatbot-extensions-api",
21 |         "name": "BioImage.io Chatbot Extensions",
22 |         "description": "A collection of chatbot extensions for facilitate user interactions with external documentation, services and tools.",
23 |         "config": {
24 |             "visibility": "public",
25 |             "require_context": False
26 |         },
27 |     }
28 |     svc.update(extension_services)
29 |     workspace = server.config['workspace']
30 |     service_id = "bioimageio-chatbot-extensions-api"
31 |     openapi_schema = get_service_openapi_schema(svc, f"{server_url}/{workspace}/services/{service_id}")
32 |     svc["get_openapi_schema"] = lambda : openapi_schema
33 | 
34 |     service_info = await server.register_service(svc)
35 |     print(f"Service registered, openapi schema: {server_url}/services/call?service_id={service_info['id']}&function_key=get_openapi_schema")
36 | 
37 | async def start_server(server_url):
38 |     token = await login({"server_url": server_url})
39 |     server = await connect_to_server({"server_url": server_url, "token": token, "method_timeout": 100})
40 |     print(f"Connected to server: {server_url}")
41 |     await serve_actions(server, server_url)
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     server_url = "https://staging.chat.bioimage.io/"
46 |     loop = asyncio.get_event_loop()
47 |     loop.create_task(start_server(server_url))
48 |     loop.run_forever()


--------------------------------------------------------------------------------
/docs/beta-testing-guidelines.md:
--------------------------------------------------------------------------------
 1 | # BioImage.IO Chatbot Beta Testing Guidelines
 2 | 
 3 | Thank you for participating in the beta testing phase of the BioImage.IO Chatbot! Your feedback is invaluable in helping us improve and refine the chatbot. Please follow these guidelines to ensure a smooth testing experience:
 4 | 
 5 | ## Prerequisites
 6 | 
 7 | 1. Please Read the [Disclaimer for BioImage.IO Chatbot](./DISCLAIMER.md).
 8 | 2. **Login with Your Email Account:** First, please create an account [here](https://ai.imjoy.io/public/apps/hypha-login/). Then use the chatbot via [https://bioimage.io/chat](https://bioimage.io/chat). If you have any issues with the login, please contact us.
 9 | 
10 | ## Beta Testing Steps
11 | 
12 | 1. **Access the Chatbot:**
13 |    - Explore the example usage guide: [Usage Example Guide](https://github.com/bioimage-io/bioimageio-chatbot/blob/main/docs/usage-example.md)
14 |    - Access the BioImage.IO Chatbot through the link we provided via email.
15 | 
16 | 2. **Testing Scenarios:**
17 |    - Try to edit your profile which mostly suitable for you.
18 |    - Test the chatbot with various scenarios relevant to your field (biologist, developer, etc.).
19 |    - Try different knowledge base channels and observe the responses.
20 | 
21 | 3. **Provide Feedback:**
22 |    - For a specific chatbot response, you can also like/dislike a response from the BioImage.IO Chatbot by clicking the thumbs up/down button under each message. Then, a pop up will appear for you to provide feedback on the response.
23 |    - For general feedback, please use the "Feedback" button on the bottom of the chatbot interface. Feel free to comment on the current chat session, or provide general feedbacks or ideas. E.g. we would be happy to hear about your experience and ideas on how to improve the chatbot.
24 |    - If you encounter any bugs, issues, or unexpected behavior, please use the [Github issues](https://github.com/bioimage-io/bioimageio-chatbot/issues) section to create a new issue.
25 | 
26 | 4. **Community Contributions:**
27 |    - If you have data sources or documentation relevant to the chatbot, feel free to propose community contributions.
28 | 
29 | ## Conclusion
30 | 
31 | Thank you for your valuable contribution to the BioImage.IO Chatbot beta testing! Your input helps us enhance the functionality and user experience for the entire community.
32 | 


--------------------------------------------------------------------------------
/docs/creating_GPTs.md:
--------------------------------------------------------------------------------
 1 | # Creating OpenAI GPTs with Chatbot Extensions
 2 | 
 3 | ## Introduction
 4 | In addition to standalone usage, the BioImage.IO Chatbot supports porting extensions to OpenAI custom [GPTs](https://chat.openai.com/gpts) for users with OpenAI accounts. Chatbot extensions following the development model specified in the [development guidelines](./development.md) and [notebook tutorial](./bioimage-chatbot-extension-tutorial.ipynb) are automatically converted to `openapi` schema which can be used to create OpenAI GPTs using the online GPT creator. 
 5 | 
 6 | `openapi` schemas for extensions are generated on Chatbot server startup via the `register_service` function in  [gpts_action.py](../bioimageio_chatbot/gpts_action.py). These schemas are then made available for OpenAI GPT creator import directly via url. This process for creating a custom GPT from the public BioImage.IO Chatbot instance extensions is shown below. Users are encouraged to submit their extensions to the BioImage.IO team for incorporation into the public Chatbot instance. 
 7 | 
 8 | Note that GPT actions are run through the hosted server instance (chat.bioimage.io in the case of the public Chatbot instance). Also note that the creation of custom OpenAI GPTs requires a paid OpenAI account. 
 9 | 
10 | ## Creating a Custom GPT from the public Chatbot Instance
11 | The public Chatbot instance's `openapi` extension schema are available at the following link: `https://chat.bioimage.io/public/services/bioimageio-chatbot-extensions-api/get_openapi_schema`
12 | 
13 | After logging in to their OpenAI accounts, users can navigate to the GPTs [page](https://chat.openai.com/gpts) and click `Create` as shown below:
14 | 
15 | ![gpt_landing_page](./screenshots/gpts_landing_page.png)
16 | 
17 | To add GPT actions from Chatbot extensions, navigate to the `Configure` tab and select `Create new action`:
18 | 
19 | ![configure_gpt](./screenshots/configure_gpt.png)
20 | 
21 | The Chatbot-generated `openapi` schema can then be imported direct by selecting `Import from URL` and inputting the public Chatbot's extension schema `https://chat.bioimage.io/public/services/bioimageio-chatbot-extensions-api/get_openapi_schema`
22 | 
23 | Users can edit the JSON content to select individual actions from the Chatbot extensions if desired:
24 | 
25 | ![gpts_json](./screenshots/gpts_json.png)
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/docs/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # BioImage.IO Chatbot Contribution Guidelines
 2 | 
 3 | Thank you for your interest in contributing to the BioImage.IO Chatbot. Your contributions help us enhance the chatbot's knowledge base and provide more accurate and detailed responses. This document outlines how you can contribute new databases or information for retrieval by the chatbot.
 4 | 
 5 | ## Introduction
 6 | 
 7 | The BioImage.IO Chatbot relies on a comprehensive knowledge base to provide accurate responses to user queries. We encourage contributions to expand this knowledge base by adding new databases, information, or resources. Whether you're a researcher, developer, or domain expert, your contributions can help improve the chatbot's functionality.
 8 | 
 9 | ## Contribution Process
10 | ### Knowledge Base
11 | 
12 | You can contribute to the chatbot's knowledge base by adding new databases or information.
13 | 
14 | We use the [`knowledge-base-manifest.yaml`](../knowledge-base-manifest.yaml) file to keep track of the databases and their details.
15 | 
16 | Follow these steps to contribute to the BioImage.IO Chatbot:
17 | 
18 | 1. Take a look at the [`knowledge-base-manifest.yaml`](../knowledge-base-manifest.yaml) file to see the databases that are currently integrated with the chatbot. The existing data sources are markdown files hosted on github, json files etc.
19 | 2. Prepare your database by organising your information to ensure it is accurate, relevant, and structured in a manner that can be easily retrived. You can find some URLs for the existing data sources, please use those as examples.
20 | 3. Fork this repository and edit the manifest to include the details of your database, including the name, URL and description.
21 | 4. You can submit your contribution with a Pull Request (PR) with the updated manifest. Our team will review and integrate the changes.
22 | 5. Once your contribution is accepted and the chatbot's knowledge is updated, test that the chatbot is accurate on its responses when retrieving information from your database.
23 | 
24 | Remember that, in any step of the process you can contact us to look for feedback or assistance. We deeply appreciate your contribution!
25 | 
26 | ### Develop Custom Extenstion
27 | 
28 | The BioImage.IO Chatbot offers a framework designed for easy extensibility, allowing developers to enrich its capabilities with custom extensions. Please check details on how to contribute to the chatbot by developing custom extension [`Developing Chatbot Extensions`](./development.md).
29 | 
30 | 
31 | ## Contact Us
32 | 
33 | If you have any questions, need assistance, or want to contribute to the chatbot's knowledge base, please don't hesitate to contact us via [Github issues](https://github.com/bioimage-io/bioimageio-chatbot/issues). Our team is here to help you get started and make valuable contributions.
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | quota_manager.db
  2 | # Byte-compiled / optimized / DLL files
  3 | __pycache__/
  4 | *.py[cod]
  5 | *$py.class
  6 | tests/*.pyc
  7 | *.csv
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | data/
 16 | dist/
 17 | chat_sessions/
 18 | 
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | lib/
 23 | lib64/
 24 | parts/
 25 | sdist/
 26 | var/
 27 | wheels/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | cover/
 58 | 
 59 | # Translations
 60 | *.mo
 61 | *.pot
 62 | 
 63 | # Django stuff:
 64 | *.log
 65 | local_settings.py
 66 | db.sqlite3
 67 | db.sqlite3-journal
 68 | 
 69 | # Flask stuff:
 70 | instance/
 71 | .webassets-cache
 72 | 
 73 | # Scrapy stuff:
 74 | .scrapy
 75 | 
 76 | # Sphinx documentation
 77 | docs/_build/
 78 | 
 79 | # PyBuilder
 80 | .pybuilder/
 81 | target/
 82 | 
 83 | # Jupyter Notebook
 84 | .ipynb_checkpoints
 85 | 
 86 | # IPython
 87 | profile_default/
 88 | ipython_config.py
 89 | 
 90 | # pyenv
 91 | #   For a library or package, you might want to ignore these files since the code is
 92 | #   intended to run in multiple environments; otherwise, check them in:
 93 | # .python-version
 94 | 
 95 | # pipenv
 96 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 97 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 98 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 99 | #   install all needed dependencies.
100 | #Pipfile.lock
101 | 
102 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
103 | __pypackages__/
104 | 
105 | # Celery stuff
106 | celerybeat-schedule
107 | celerybeat.pid
108 | 
109 | # SageMath parsed files
110 | *.sage.py
111 | 
112 | # Environments
113 | .env
114 | .venv
115 | env/
116 | venv/
117 | ENV/
118 | env.bak/
119 | venv.bak/
120 | 
121 | # Spyder project settings
122 | .spyderproject
123 | .spyproject
124 | 
125 | # Rope project settings
126 | .ropeproject
127 | 
128 | # mkdocs documentation
129 | /site
130 | 
131 | # mypy
132 | .mypy_cache/
133 | .dmypy.json
134 | dmypy.json
135 | 
136 | # Pyre type checker
137 | .pyre/
138 | 
139 | # pytype static type analyzer
140 | .pytype/
141 | 
142 | # Cython debug symbols
143 | cython_debug/
144 | 
145 | # Visual Studio Code
146 | .vscode/
147 | 
148 | # others
149 | *.bin
150 | logs/
151 | bioimageio-knowledge-base/
152 | .DS_Store
153 | chat_logs
154 | authorized_users.json
155 | .pypirc
156 | 


--------------------------------------------------------------------------------
/bioimageio_chatbot/static/imagej-js-extension.imjoy.html:
--------------------------------------------------------------------------------
 1 | 
 2 | <docs>
 3 | [TODO: write documentation for this plugin.]
 4 | </docs>
 5 | 
 6 | <config lang="json">
 7 | {
 8 |   "name": "ImageJ.JS Chatbot Extension",
 9 |   "type": "web-worker",
10 |   "tags": [],
11 |   "ui": "",
12 |   "version": "0.1.0",
13 |   "cover": "",
14 |   "description": "Run ImageJ.JS macro in the chatbot",
15 |   "icon": "extension",
16 |   "inputs": null,
17 |   "outputs": null,
18 |   "api_version": "0.1.8",
19 |   "env": "",
20 |   "permissions": [],
21 |   "requirements": [],
22 |   "dependencies": []
23 | }
24 | </config>
25 | 
26 | <script lang="javascript">
27 | class ImJoyPlugin {
28 |   async setup() {
29 |     if (api.registerChatbotExtension) {
30 |       const chatbot = api
31 |       await this.registerExtensions(chatbot.registerChatbotExtension)
32 |     } else {
33 |       let chatbot = await api.getWindow("BioImage.IO Chatbot")
34 |       if (chatbot) {
35 |         await this.registerExtensions(chatbot.registerExtension)
36 |       } else {
37 |         chatbot = await api.createWindow({src: "https://bioimage.io/chat", w: 28, h: 20, name: "BioImage.IO Chatbot"})
38 |         await this.registerExtensions(chatbot.registerExtension)
39 |       }
40 |       let ij = await api.getWindow("ImageJ.JS")
41 |       if (!ij) {
42 |         ij = await api.createWindow({src: "https://ij.imjoy.io/", name: "ImageJ.JS"})
43 |         await ij.runMacro('run("Blobs (25K)")')
44 |       }
45 |     }
46 |   }
47 | 
48 |   async registerExtensions(register) {
49 |     await register({
50 |       _rintf: true,
51 |       id: "imagej_js",
52 |       type: "bioimageio-chatbot-extension",
53 |       name: "ImageJ.JS",
54 |       description: "Run ImageJ macro for image analysis",
55 |       async get_schema() {
56 |         return {
57 |             run_macro: {
58 |               type: "object",
59 |               title: "run_macro",
60 |               description: "Run ImageJ macro",
61 |               properties: {
62 |                 macro: {
63 |                   type: "string",
64 |                   description: "ImageJ macro (for running inside imagej1, with no plugin support)",
65 |                 },
66 |               },
67 |               required: ["macro"]
68 |             }
69 |         }
70 |       },
71 |       tools: {
72 |           async run_macro(config) {
73 |             let ij = await api.getWindow("ImageJ.JS")
74 |             if(!ij){
75 |               ij = await api.createWindow({src: "https://ij.imjoy.io/", name: "ImageJ.JS"})
76 |             }
77 |             // patch example images
78 |             // replace https://imagej.nih.gov/ij/images/* to https://imagej.net/images/*
79 |             config.macro = config.macro.replace(/https:\/\/imagej.nih.gov\/ij\/images\//g, "https://imagej.net/images/")
80 |             try{
81 |                 await ij.runMacro(config.macro)
82 |                 return "Macro executed successfully."
83 |             }
84 |             catch(e){
85 |                 return "Failed to execute the macro: "+e
86 |             }
87 |           },
88 |       }
89 |     })
90 | 
91 |     api.log('initialized')
92 |   }
93 | 
94 |   async run(ctx) {  }
95 | }
96 | 
97 | api.export(new ImJoyPlugin())
98 | </script>
99 | 


--------------------------------------------------------------------------------
/bioimageio_chatbot/evaluation.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | from schema_agents.schema import Message
 3 | from typing import Any, Dict, List, Optional, Union
 4 | from schema_agents.role import Role
 5 | 
 6 | class EvaluationCriteria(BaseModel):
 7 |     relevance_and_accuracy: str = Field(
 8 |         default="Score 0-100: 0 means completely irrelevant, providing no useful information. "
 9 |                 "100 means the answer is comprehensive, accurate, and closely matches the reference answer.",
10 |         description="Assess how relevant and accurate the chatbot's answer is compared to the reference answer."
11 |     )
12 |     coverage_of_key_points: str = Field(
13 |         default="Baseline >60: Answers covering the main points from the reference should score above 60, "
14 |                 "indicating they address the primary aspects of the question.",
15 |         description="Evaluate whether the chatbot's answer includes the main points mentioned in the reference answer."
16 |     )
17 |     additional_information: str = Field(
18 |         default="Variable Impact: Additional helpful information can increase the score. "
19 |                 "Irrelevant or unhelpful information should lead to a reduced score.",
20 |         description="Assess the impact of additional information not present in the reference answer."
21 |     )
22 |     evaluation_guidelines: str = Field(
23 |         default="Apply scoring criteria consistently and impartially. "
24 |                 "Provide justification for scores, especially for significant deviations from the baseline.",
25 |         description="Guidelines for objective and transparent evaluation."
26 |     )
27 |     
28 | class EvalInput(BaseModel):
29 |     """Input for evaluating scores of LLM-based system."""
30 |     question: str = Field(description="The question that was asked.")
31 |     reference_answer: str = Field(description="The answer that was expected.")
32 |     llm_answer: str = Field(description="The answer that was generated by the LLM-based system.")
33 |     
34 | class EvalScores(BaseModel):
35 |     """Scores of evaluating llm answer."""
36 |     criteria: EvaluationCriteria = Field(description="Criteria for evaluating the performance of the LLM-based system.")
37 |     similarity_score: float = Field(description="Following the criteria, access the llm_answer. Float between 0 and 100 representing the similarity score. ")
38 |     
39 | def create_eval_agent():
40 |     async def bot_answer_evaluate(req: EvalInput, role: Role) -> EvalScores:
41 |         """Return the answer to the question."""
42 |         response = await role.aask(req, EvalScores)
43 |         return response
44 |     
45 |     eval_bot = Role(
46 |         name="Thomas",
47 |         profile="Evaluator",
48 |         goal="Evaluate the performance of the LLM-based system.",
49 |         constraints=None,
50 |         actions=[bot_answer_evaluate],
51 |         model="gpt-4-1106-preview"
52 |     )
53 |     return eval_bot
54 | 
55 | async def evaluate(question, reference_answer, llm_answer):
56 |     eval_bot = create_eval_agent()
57 |     eval_input = EvalInput(question=question, reference_answer=reference_answer, llm_answer=llm_answer)
58 |     scores = await eval_bot.handle(Message(content=eval_input.model_dump_json(), data=eval_input, role="User"))
59 |     similarity_score = scores[0].data.similarity_score
60 |     return similarity_score
61 |     


--------------------------------------------------------------------------------
/bioimageio_chatbot/chatbot_extensions/web_search_extension/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from schema_agents import schema_tool
 3 | from bioimageio_chatbot.utils import ChatbotExtension
 4 | from pydantic import Field
 5 | from typing import Optional
 6 | 
 7 | import httpx
 8 | from bs4 import BeautifulSoup
 9 | 
10 | from .langchain_websearch import LangchainCompressor
11 | 
12 | default_langchain_compressor = None
13 | 
14 | @schema_tool
15 | async def search_web(query: str=Field(description="space separated keywords for the duckduckgo search engine"), max_results: int = Field(description="maximum number of results to return")):
16 |     """Search the web for information using duckduckgo."""
17 |     from duckduckgo_search import AsyncDDGS
18 |     query = query.strip("\"'")
19 |     results = await AsyncDDGS(proxy=None).atext(query, region='wt-wt', safesearch='moderate', timelimit=None,
20 |                             max_results=max_results)
21 |     if not results:
22 |         return "No relevant information found."
23 |     docs = []
24 |     for d in results:
25 |         docs.append({"title": d['title'], "body": d['body'], "url": d['href']})
26 |     return docs
27 | 
28 | @schema_tool
29 | async def browse_web_pages(query: str=Field(description="keywords or a sentence describing the information to be retrieved"), urls: list[str]=Field(description="list of web page urls to analyse"), num_results_to_process: Optional[int]=Field(5, description="number of results to process")):
30 |     """Read web pages and return compressed documents with most relevant information."""
31 |     global default_langchain_compressor
32 |     default_langchain_compressor = default_langchain_compressor or LangchainCompressor(device="cpu")
33 | 
34 |     documents = await default_langchain_compressor.faiss_embedding_query_urls(query, urls,
35 |                                                                num_results=num_results_to_process)
36 |     
37 |     if not documents:    # Fall back to old simple search rather than returning nothing
38 |         print("LLM_Web_search | Could not find any page content "
39 |               "similar enough to be extracted, using basic search fallback...")
40 |         return "No relevant information found."
41 |     #return the json serializable documents
42 |     return [doc.page_content + '\nsource: ' + doc.metadata.get('source') for doc in documents]
43 | 
44 | @schema_tool
45 | async def read_webpage(url: str=Field(description="the web url to read")) -> str:
46 |     """Read the full content of a web page converted to plain text."""
47 |     headers = {
48 |         "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0",
49 |         "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
50 |         "Accept-Language": "en-US,en;q=0.5"
51 |     }
52 | 
53 |     async with httpx.AsyncClient() as client:
54 |         response = await client.get(url, headers=headers)
55 | 
56 |     soup = BeautifulSoup(response.content, features="lxml")
57 |     for script in soup(["script", "style"]):
58 |         script.extract()
59 | 
60 |     strings = soup.stripped_strings
61 |     return '\n'.join([s.strip() for s in strings])
62 | 
63 | 
64 | def get_extension():
65 |     return ChatbotExtension(
66 |         id="web",
67 |         name="Search Web",
68 |         description="Search the web for information using duckduckgo. Search by keywords and returns a list of relevant documents.",
69 |         tools=dict(search=search_web, browse=browse_web_pages)
70 |     )
71 | 


--------------------------------------------------------------------------------
/bioimageio_chatbot/__main__.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import argparse
 3 | import asyncio
 4 | import subprocess
 5 | import os
 6 | from bioimageio_chatbot.knowledge_base import load_knowledge_base
 7 | 
 8 | def start_server(args):
 9 |     if args.login_required:
10 |         os.environ["BIOIMAGEIO_LOGIN_REQUIRED"] = "true"
11 |     else:
12 |         os.environ["BIOIMAGEIO_LOGIN_REQUIRED"] = "false"
13 |     # get current file path so we can get the path of apps under the same directory
14 |     current_dir = os.path.dirname(os.path.abspath(__file__))
15 |     command = [
16 |         sys.executable,
17 |         "-m",
18 |         "hypha.server",
19 |         f"--host={args.host}",
20 |         f"--port={args.port}",
21 |         f"--public-base-url={args.public_base_url}",
22 |         f"--static-mounts=/chat:{current_dir}/static",
23 |         "--startup-functions=bioimageio_chatbot.chatbot:register_chat_service"
24 |     ]
25 |     subprocess.run(command)
26 | 
27 | def connect_server(args):
28 |     from bioimageio_chatbot.chatbot import connect_server
29 |     if args.login_required:
30 |         os.environ["BIOIMAGEIO_LOGIN_REQUIRED"] = "true"
31 |     else:
32 |         os.environ["BIOIMAGEIO_LOGIN_REQUIRED"] = "false"
33 |     server_url = args.server_url
34 |     loop = asyncio.get_event_loop()
35 |     loop.create_task(connect_server(server_url))
36 |     loop.run_forever()
37 | 
38 | def create_knowledge_base(args):
39 |     from bioimageio_chatbot.knowledge_base import create_vector_knowledge_base
40 |     create_vector_knowledge_base(args.output_dir)
41 | 
42 | def init(args):
43 |     knowledge_base_path = os.environ.get("BIOIMAGEIO_KNOWLEDGE_BASE_PATH", "./bioimageio-knowledge-base")
44 |     assert knowledge_base_path is not None, "Please set the BIOIMAGEIO_KNOWLEDGE_BASE_PATH environment variable to the path of the knowledge base."
45 |     if not os.path.exists(knowledge_base_path):
46 |         print(f"The knowledge base is not found at {knowledge_base_path}, will download it automatically.")
47 |         os.makedirs(knowledge_base_path, exist_ok=True)
48 |     docs_store_dict = load_knowledge_base(knowledge_base_path)
49 |     
50 |     print("Databases loaded in the knowledge base:")
51 |     for key in docs_store_dict.keys():
52 |         print(f" - {key}")
53 | 
54 | def main():
55 |     parser = argparse.ArgumentParser(description="BioImage.IO Chatbot utility commands.")
56 |     
57 |     subparsers = parser.add_subparsers()
58 | 
59 |     # Init command
60 |     parser_init = subparsers.add_parser("init")
61 |     parser_init.set_defaults(func=init)
62 | 
63 |     # Start server command
64 |     parser_start_server = subparsers.add_parser("start-server")
65 |     parser_start_server.add_argument("--host", type=str, default="0.0.0.0")
66 |     parser_start_server.add_argument("--port", type=int, default=9000)
67 |     parser_start_server.add_argument("--public-base-url", type=str, default="")
68 |     parser_start_server.add_argument("--login-required", action="store_true")
69 |     parser_start_server.set_defaults(func=start_server)
70 |     
71 |     # Connect server command
72 |     parser_connect_server = subparsers.add_parser("connect-server")
73 |     parser_connect_server.add_argument("--server-url", default="https://ai.imjoy.io")
74 |     parser_connect_server.add_argument("--login-required", action="store_true")
75 |     parser_connect_server.set_defaults(func=connect_server)
76 |     
77 |     # Create knowledge base command
78 |     parser_create_kb = subparsers.add_parser("create-knowledge-base")
79 |     parser_create_kb.add_argument("--output-dir", default="./bioimageio-knowledge-base")
80 |     parser_create_kb.set_defaults(func=create_knowledge_base)
81 |     
82 |     args = parser.parse_args()
83 |     if hasattr(args, 'func'):
84 |         args.func(args)
85 |     else:
86 |         parser.print_help()
87 |         
88 | if __name__ == '__main__':
89 |     main()


--------------------------------------------------------------------------------
/docs/DISCLAIMER.md:
--------------------------------------------------------------------------------
 1 | # Disclaimer for BioImage.IO Chatbot
 2 | 
 3 | ## Research Purpose and Use of Copyrighted Material
 4 | 
 5 | The BioImage.IO Chatbot ("Chatbot") is part of a research project focused on Text and Data Mining (TDM) to support advancements in bioimage analysis. The primary purpose of this Chatbot is to assist users in navigating resources, tools, and workflows related to bioimage analysis for research purposes. In compliance with the European Union's copyright exception on TDM as outlined in [Directive (EU) 2019/790](https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A32019L0790), the Chatbot utilizes copyrighted materials to which we have lawful access. These materials are used exclusively for non-commercial research purposes.
 6 | 
 7 | ## Evaluation-Only Use of Source Code and Live Demo
 8 | 
 9 | The BioImage.IO Chatbot's source code and live demo instance are provided solely for evaluation purposes. These services are intended to support ongoing research in Text and Data Mining and are not intended for production use or commercial purposes. Users are encouraged to explore the capabilities of the Chatbot but should refrain from deploying it in any production environment or for commercial gain.
10 | 
11 | ## General Usage
12 | 
13 | While we strive for accuracy, the Chatbot is not a substitute for professional advice, consultation, diagnosis, or any kind of formal scientific interpretation. Users should independently verify the accuracy and completeness of the information provided by the Chatbot.
14 | 
15 | ## No Warranties
16 | 
17 | The Chatbot service is provided "as is" and "as available" without any warranties of any kind, either express or implied, including but not limited to the implied warranties of merchantability, fitness for a particular purpose, or non-infringement. We make no warranty that the service will meet your requirements or be available on an uninterrupted, secure, or error-free basis.
18 | 
19 | ## Liability
20 | 
21 | Under no circumstances will we be liable for any loss or damage incurred as a result of the use of this Chatbot, including but not limited to any errors or omissions in the content, any unauthorized access to or use of our servers, or any loss of data or profits.
22 | 
23 | ## User Responsibility
24 | 
25 | The user assumes all responsibility and risk for the use of this Chatbot. It is the user's responsibility to evaluate the accuracy, completeness, or usefulness of any information, opinion, or content available through the Chatbot service. Users are reminded to carefully check with the original sources and to respect the respective licenses of any copyrighted materials. If you are the author of any material used by the Chatbot and wish to opt-out, please contact us via [this form](https://oeway.typeform.com/to/K3j2tJt7).
26 | 
27 | ## Third-Party Links
28 | 
29 | The Chatbot may provide links to external websites or resources for your convenience. We have no control over these sites and resources, and we are not responsible for their availability, reliability, or the content provided.
30 | 
31 | ## Data Privacy
32 | 
33 | User interactions with the Chatbot may be stored for analysis and improvement of the service. All data will be handled in accordance with our Privacy Policy.
34 | 
35 | ## Privacy Policy
36 | 
37 | The personal data you may provide will be used to disseminate information pertaining to the execution of the Horizon Europe Funded AI4Life project (Grant number: 101057970). In accordance with the Grant Agreement, your data will be retained during the project and deleted when it has ended as soon as the retention period established by the EC is over. If you would like to update or delete your data during the course of the project, please contact us using [this form](https://oeway.typeform.com/to/K3j2tJt7?typeform-source=bioimage.io).
38 | 
39 | ## Modifications
40 | 
41 | We reserve the right to modify this disclaimer at any time, effective upon posting of an updated version on this website. Continued use of the Chatbot after any such changes shall constitute your consent to such changes.
42 | 


--------------------------------------------------------------------------------
/bioimageio_chatbot/utils.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import requests
  3 | import yaml
  4 | import os
  5 | from tqdm import tqdm
  6 | from pydantic import BaseModel, Field
  7 | from typing import Callable, Optional
  8 | import typing
  9 | from inspect import signature
 10 | from typing import Any, Callable, Dict, Optional
 11 | from bioimageio_chatbot.jsonschema_pydantic import json_schema_to_pydantic_model
 12 | from schema_agents import schema_tool
 13 | 
 14 | def get_manifest():
 15 |     # If no manifest is provided, download from the repo
 16 |     if not os.path.exists("./knowledge-base-manifest.yaml"):
 17 |         print("Downloading the knowledge base manifest...")
 18 |         response = requests.get("https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/main/knowledge-base-manifest.yaml")
 19 |         assert response.status_code == 200
 20 |         with open("./knowledge-base-manifest.yaml", "wb") as f:
 21 |             f.write(response.content)
 22 |     
 23 |     return yaml.load(open("./knowledge-base-manifest.yaml", "r"), Loader=yaml.FullLoader)
 24 | 
 25 | 
 26 | def download_file(url, filename):
 27 |     response = requests.get(url, stream=True)
 28 |     file_size = int(response.headers.get('content-length', 0))
 29 | 
 30 |     # Initialize the progress bar
 31 |     progress = tqdm(response.iter_content(1024), f"Downloading {filename}", total=file_size, unit="B", unit_scale=True, unit_divisor=1024)
 32 |     
 33 |     with open(filename, 'wb') as f:
 34 |         for data in progress:
 35 |             # Update the progress bar
 36 |             progress.update(len(data))
 37 |             f.write(data)
 38 | 
 39 | 
 40 | def extract_schemas(function):
 41 |     sig = signature(function)
 42 |     positional_annotation = [
 43 |         p.annotation
 44 |         for p in sig.parameters.values()
 45 |         if p.kind == p.POSITIONAL_OR_KEYWORD
 46 |     ][0]
 47 |     output_schemas = (
 48 |         [sig.return_annotation]
 49 |         if not isinstance(sig.return_annotation, typing._UnionGenericAlias)
 50 |         else list(sig.return_annotation.__args__)
 51 |     )
 52 |     input_schemas = (
 53 |         [positional_annotation]
 54 |         if not isinstance(positional_annotation, typing._UnionGenericAlias)
 55 |         else list(positional_annotation.__args__)
 56 |     )
 57 |     return input_schemas, output_schemas
 58 | 
 59 | class ChatbotExtension(BaseModel):
 60 |     """Chatbot extension."""
 61 | 
 62 |     id: str
 63 |     name: str
 64 |     description: str
 65 |     tools: Optional[Dict[str, Any]] = {}
 66 |     get_schema: Optional[Callable] = None
 67 |     get_state: Optional[Callable] = None
 68 |     info: Optional[Dict[str, Any]] = {}
 69 | 
 70 | class LegacyChatbotExtension(BaseModel):
 71 |     """A class that defines the interface for a user extension"""
 72 |     name: str = Field(..., description="The name of the extension")
 73 |     description: str = Field(..., description="A description of the extension")
 74 |     get_schema: Optional[Callable] = Field(None, description="A function that returns the schema for the extension")
 75 |     execute: Callable = Field(..., description="The extension's execution function")
 76 |     schema_class: Optional[BaseModel] = Field(None, description="The schema class for the extension")
 77 | 
 78 | def convert_to_dict(obj):
 79 |     if isinstance(obj, BaseModel):
 80 |         return obj.dict()
 81 |     if isinstance(obj, dict):
 82 |         return {k: convert_to_dict(v) for k, v in obj.items()}
 83 |     if isinstance(obj, list):
 84 |         return [convert_to_dict(v) for v in obj]
 85 |     return obj
 86 | 
 87 | 
 88 | async def legacy_extension_to_tool(extension: LegacyChatbotExtension):
 89 |     if extension.get_schema:
 90 |         schema = await extension.get_schema()
 91 |         extension.schema_class = json_schema_to_pydantic_model(schema)
 92 |     else:
 93 |         input_schemas, _ = extract_schemas(extension.execute)
 94 |         extension.schema_class = input_schemas[0]
 95 | 
 96 |     assert extension.schema_class, f"Extension {extension.name} has no valid schema class."
 97 | 
 98 |     # NOTE: Right now, the first arguments has to be req
 99 |     async def execute(req: extension.schema_class):
100 |         print("Executing extension:", extension.name, req)
101 |         # req = extension.schema_class.parse_obj(req)
102 |         result = await extension.execute(req)
103 |         return convert_to_dict(result)
104 | 
105 |     execute.__name__ = extension.name
106 | 
107 |     if extension.get_schema:
108 |         execute.__doc__ = schema['description']
109 |     
110 |     if not execute.__doc__:
111 |         # if extension.execute is partial
112 |         if hasattr(extension.execute, "func"):
113 |             execute.__doc__ = extension.execute.func.__doc__ or extension.description
114 |         else:
115 |             execute.__doc__ = extension.execute.__doc__ or extension.description
116 |     return schema_tool(execute)


--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | 
 4 | <head>
 5 |   <meta charset="utf-8">
 6 |   <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no">
 7 |   <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
 8 |   <meta name="apple-mobile-web-app-capable" content="yes">
 9 |   <meta name="mobile-web-app-capable" content="yes">
10 |   <meta name="description" content="BioImage.IO Chatbot">
11 |   <meta name="author" content="BioImage.IO Team">
12 |   <link rel="apple-touch-icon" sizes="180x180" href="/static/icons/apple-touch-icon.png">
13 |   <link rel="icon" type="image/png" sizes="32x32" href="/static/icons/favicon-32x32.png">
14 |   <link rel="icon" type="image/png" sizes="16x16" href="/static/icons/favicon-16x16.png">
15 |   <link rel="mask-icon" href="/static/icons/safari-pinned-tab.svg" color="#5bbad5">
16 |   <link rel="shortcut icon" href="/static/icons/favicon.ico" type="image/x-icon">
17 |   <link rel="icon" href="/static/icons/favicon.ico" type="image/x-icon">
18 |   <meta name="theme-color" content="#ffffff">
19 |   <link rel="stylesheet" href="https://imjoy-team.github.io/imjoy-docs/static/vue.css" />
20 |   <link rel="stylesheet" href="https://imjoy-team.github.io/imjoy-docs/static/style.css" />
21 | </head>
22 | 
23 | <body>
24 |   <div id="app"></div>
25 |   <script>
26 |     //---------------------Configurations------------------------------
27 |     const githubUser = 'bioimage-io' // your github user name
28 |     const githubRepo = 'bioimageio-chatbot' // your github repo name
29 |     const homepage = '/README.md' // the home page markdown file (relative path to the current folder)
30 |     const name = 'BioImage.IO Chatbot' // name of this documentation
31 |     const logo = 'https://bioimage.io/static/img/bioimage-io-logo.png' // a logo show on the upper-left corner
32 |     //--------------------------End------------------------------------
33 |     const repo = `https://github.com/${githubUser}/${githubRepo}`;
34 |     const basePath = window.location.pathname;
35 |     document.title = `${githubUser}|${name}`;
36 |     window.$docsify = window.$docsify || {
37 |       name,
38 |       nameLink: '/',
39 |       logo,
40 |       basePath: basePath,
41 |       loadSidebar: true,
42 |       coverpage: false,
43 |       auto2top: true,
44 |       autoHeader: false,
45 |       subMaxLevel: 4,
46 |       maxLevel: 5,
47 |       repo: repo,
48 |       homepage: homepage,
49 |       alias: {
50 |         '/overview': 'https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/main/README.md'
51 |       },
52 |       search: {
53 |         paths: 'auto',
54 |         placeholder: 'Search',
55 |         noData: 'No Results.',
56 |         depth: 2
57 |       },
58 |       tabs: {
59 |         persist: true,
60 |         sync: true,
61 |         theme: 'classic',
62 |         tabComments: true,
63 |         tabHeadings: true
64 |       },
65 |       markdown: {
66 |         renderer: {
67 |           image(src, title, alt) {
68 |             // resolve src to basePath
69 |             // e.g. http://localhost:8080/docs#/user_guide/a.jpg -> docs/user_guide/a.jpg
70 |             if(!src.startsWith('http') && !src.startsWith('/')){
71 |               const url = new URL(src, window.location.href.replace(/#\//, '/')); // remove hash
72 |               src = url.pathname;
73 |             }
74 |             const [width, height] = (title && title.startsWith('=')) ? title.slice(1).split('x').map(v => v.trim()).filter(Boolean) : [];
75 |             return `<img src="${src}" alt="${alt}"${width ? ` width="${width}"` : ''}${height ? ` height="${height}"` : ''}>`;
76 |           }
77 |         }
78 |       }
79 |     };
80 |   </script>
81 |   <script src="https://imjoy-team.github.io/imjoy-docs/static/docsify-run-code.js"></script>
82 |   <script src="https://imjoy-team.github.io/imjoy-docs/static/docsify-copy-code.min.js"></script>
83 |   <script src="https://imjoy-team.github.io/imjoy-docs/static/docsify.min.js"></script>
84 |   <script src="https://imjoy-team.github.io/imjoy-docs/static/prism-python.min.js"></script>
85 |   <script src="https://imjoy-team.github.io/imjoy-docs/static/prism-json.min.js"></script>
86 |   <script src="https://imjoy-team.github.io/imjoy-docs/static/prism-bash.min.js"></script>
87 |   <script src="https://imjoy-team.github.io/imjoy-docs/static/prism-json.min.js"></script>
88 |   <script src="https://imjoy-team.github.io/imjoy-docs/static/search.min.js"></script>
89 |   <script src="https://imjoy-team.github.io/imjoy-docs/static/external-script.min.js"></script>
90 |   <script src="https://imjoy-team.github.io/imjoy-docs/static/docsify-pagination.min.js"></script>
91 |   <script src="https://imjoy-team.github.io/imjoy-docs/static/docsify-tabs@1.js"></script>
92 |   <script src="https://imjoy-team.github.io/imjoy-docs/static/zoom-image.min.js"></script>
93 |   <script src="https://imjoy-team.github.io/imjoy-docs/static/docsify-sidebar-collapse.min.js"></script>
94 |   <script src="https://imjoy-team.github.io/imjoy-docs/static/require.min.js"></script>
95 |   <script src="https://imjoy-team.github.io/imjoy-docs/static/imjoy-app.js"></script>
96 | </body>
97 | 
98 | </html>
99 | 


--------------------------------------------------------------------------------
/bioimageio_chatbot/chatbot_extensions/__init__.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import re
  3 | import pkgutil
  4 | import importlib.util
  5 | from pydantic import BaseModel
  6 | from bioimageio_chatbot.utils import ChatbotExtension
  7 | from bioimageio_chatbot.jsonschema_pydantic import json_schema_to_pydantic_model
  8 | from schema_agents import schema_tool
  9 | 
 10 | def get_builtin_extensions():
 11 |     extensions = []
 12 |     for module in pkgutil.walk_packages(__path__, __name__ + '.'):
 13 |         if module.name.endswith('_extension'):
 14 |             if hasattr(module.module_finder, 'find_module'):
 15 |                 ext_module = module.module_finder.find_module(module.name).load_module(module.name)
 16 |             else:
 17 |                 # for newer python versions, find_spec is used instead of find_module
 18 |                 module_spec = importlib.util.find_spec(module.name)
 19 |                 ext_module = importlib.util.module_from_spec(module_spec)
 20 |                 module_spec.loader.exec_module(ext_module)
 21 |             exts = ext_module.get_extension() or []
 22 |             if isinstance(exts, ChatbotExtension):
 23 |                 exts = [exts]
 24 |             for ext in exts:
 25 |                 if not isinstance(ext, ChatbotExtension):
 26 |                     print(f"Failed to load chatbot extension: {module.name}.")
 27 |                     continue
 28 |                 if ext.id in [e.id for e in extensions]:
 29 |                     raise ValueError(f"Extension name {ext.id} already exists.")
 30 |                 extensions.append(ext)
 31 |             
 32 |     return extensions
 33 | 
 34 | def convert_to_dict(obj):
 35 |     if isinstance(obj, BaseModel):
 36 |         return obj.model_dump()
 37 |     if isinstance(obj, dict):
 38 |         return {k: convert_to_dict(v) for k, v in obj.items()}
 39 |     if isinstance(obj, list):
 40 |         return [convert_to_dict(v) for v in obj]
 41 |     return obj
 42 | 
 43 | def create_tool_name(ext_id, tool_id=""):
 44 |     text = f"{ext_id}_{tool_id}"
 45 |     text = text.replace("-", " ").replace("_", " ").replace(".", " ")
 46 |     words = re.findall(r'[A-Z]?[a-z]+|[A-Z]+(?=[A-Z]|$)|\d+', text)
 47 |     return ''.join(word if word.istitle() else word.capitalize() for word in words)
 48 | 
 49 | def tool_factory(ext_id, tool_id, ext_tool, schema):
 50 |     input_model = json_schema_to_pydantic_model(schema)
 51 |     ext_tool.__name__ = create_tool_name(ext_id, tool_id)
 52 |     ext_tool.__doc__ = input_model.__doc__
 53 |     return schema_tool(ext_tool, input_model=input_model)
 54 | 
 55 | async def extension_to_tools(extension: ChatbotExtension):
 56 | 
 57 |     if extension.get_schema:
 58 |         schemas = await extension.get_schema()
 59 |         tools = []
 60 |         for k in schemas:
 61 |             assert k in extension.tools, f"Tool `{k}` not found in extension `{extension.id}`."
 62 |             ext_tool = extension.tools[k]
 63 |             tool = tool_factory(extension.id, k, ext_tool, schemas[k])
 64 |             tool.__tool_id__ = k
 65 |             tools.append(tool)
 66 |     else:
 67 |         tools = []
 68 |         for k in extension.tools:
 69 |             ext_tool = extension.tools[k]
 70 |             ext_tool.__name__ = create_tool_name(extension.id, k)
 71 |             ext_tool.__tool_id__ = k
 72 |             tools.append(ext_tool)
 73 |     
 74 |     return tools
 75 | 
 76 | async def main():
 77 |     extensions = get_builtin_extensions()
 78 |     tools = []
 79 |     for svc in extensions:
 80 |         tool = await extension_to_tools(svc)
 81 |         tools.append(tool)
 82 |     print(tools)
 83 | 
 84 | if __name__ == "__main__":
 85 |     import json
 86 | 
 87 |     schema = {
 88 |         "type": "object",
 89 |         "title": "RunScript",
 90 |         "description": "description",
 91 |         "properties": {
 92 |             "script": {
 93 |                 "type": "string",
 94 |                 "description": "Python script to execute",
 95 |             },
 96 |             "inputs": {
 97 |                 "type": "array",
 98 |                 "description": "Input objects to be restored into the script",
 99 |                 "items": {
100 |                     "type": "string",
101 |                     "properties": {
102 |                         "key": {
103 |                             "type": "string",
104 |                             "description": "Key of the object from the store to be restored",
105 |                         },
106 |                         "name": {
107 |                             "type": "string",
108 |                             "description": "Variable name of the object",
109 |                         }
110 |                     }
111 |                 }
112 |             },
113 |             "outputs": {
114 |                 "type": "array",
115 |                 "description": "Objects produced by the script as outputs or for further use",
116 |                 "items": {
117 |                     "type": "string"
118 |                 }
119 |             }
120 |         },
121 |         "required": ["script", "outputs"],
122 |         "allow_additional_properties": False,
123 |     }
124 | 
125 |     model = json_schema_to_pydantic_model(schema)
126 |     print(model)
127 |     asyncio.run(main())


--------------------------------------------------------------------------------
/tests/test_chatbot_answer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from bioimageio_chatbot.chatbot import create_customer_service, get_builtin_extensions, QuestionWithHistory, UserProfile
 3 | from bioimageio_chatbot.evaluation import evaluate
 4 | from schema_agents.schema import Message
 5 | import json
 6 | import pandas as pd
 7 | import asyncio
 8 | import pytest
 9 | 
10 | KNOWLEDGE_BASE_PATH = "./bioimageio-knowledge-base"
11 | builtin_extensions = get_builtin_extensions()
12 | extensions = [{key:value for key, value in ext.model_dump().items() if key in ["name", "description"]} for ext in builtin_extensions]
13 | customer_service = create_customer_service(builtin_extensions)
14 | 
15 | dir_path = os.path.dirname(os.path.realpath(__file__))
16 | 
17 | @pytest.fixture
18 | def eval_questions():
19 |     
20 |     eval_file = os.path.join(dir_path, "Minimal-Eval-Test-20240111.csv")
21 |     if os.path.exists(eval_file):
22 |         query_answer = pd.read_csv(eval_file)
23 |     else:
24 |         query_answer = pd.read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vTVgE2_eqBiAktHmg13jLrFrQJhbANkByY40f9vxptC6pShcjLzEuHzx93ATo0c0XcSYs9W1RRbaDdu/pub?gid=1280822572&single=true&output=csv")
25 |     eval_index = range(1,10)
26 |     query_answer = query_answer.iloc[eval_index]
27 |     
28 |     question_col = "Question"
29 |     channel_id_col = "GT: Retrieved channel id"
30 |     question_list = list(query_answer[question_col])
31 |     reference_answer_list = list(query_answer["GPT-4-turbo Answer (With Context)- GT"])
32 |     # ground_type = "Document Retrieval"
33 |     # make it as list as the length equals to question_list
34 |     # ground_type_list = [ground_type] * len(question_list)
35 |     channel_id_list_gt = list(query_answer[channel_id_col])
36 |     return question_list, reference_answer_list, channel_id_list_gt
37 | 
38 | 
39 | async def validate_chatbot_answer(question, reference_answer, use_tools_gt, channel_id_gt, relevance_gt, similary_score_gt):
40 |     chat_history=[]
41 |     profile = UserProfile(name="", occupation="", background="")
42 |     
43 |     m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), chatbot_extensions=extensions)
44 |     resp = await customer_service.handle(Message(content=m.model_dump_json(), data=m , role="User"))
45 |     # use_tools =resp[0].data.steps[0].details["use_tools"]
46 |     # assert use_tools == use_tools_gt
47 |     # execute_tool = resp[0].data.steps[1].name
48 |     # # get the string after 'Execute: '
49 |     # channel_id = execute_tool.split(": ")[1]
50 |     # assert channel_id == channel_id_gt+"(docs)"
51 |     
52 |     # eval score
53 |     # relevance = resp[0].data.steps[-1].details["relevant"]
54 |     # assert relevance == relevance_gt
55 |     chatbot_answer = resp[0].data.steps[-1].details['details'][0]['response']
56 |     similary_score = await evaluate(question, reference_answer, chatbot_answer)
57 |     assert similary_score >= similary_score_gt
58 | 
59 |    
60 | @pytest.mark.asyncio    
61 | async def test_chatbot1(eval_questions):
62 |     
63 | #     await validate_chatbot_answer(
64 | #         question="What is deepImageJ?",
65 | #         reference_answer="DeepImageJ is a user-friendly plugin designed to facilitate the utilization of pre-trained neural networks within ImageJ and Fiji. It serves as a bridge between developers of deep-learning models and end-users in life-science applications, promoting the sharing of trained models across research groups. DeepImageJ is particularly valuable in various imaging domains and does not necessitate deep learning expertise or programming skills.",
66 | #         use_tools_gt=True,
67 | #         channel_id_gt="deepimagej(docs)",
68 | #         relevance_gt=True,
69 | #         similary_score_gt=4.0
70 | #     )
71 |     
72 | #     await validate_chatbot_answer(
73 | #         question="What is a Bioimage Model Zoo community partner?",
74 | #         reference_answer="A BioImage Model Zoo community partner is an organization, company, research group, or software team that can consume and/or produce resources of the BioImage.IO model zoo. These partners continuously and openly contribute resources of their own, and they can participate in the decision-making process of the model specification. Additionally, they can show their logo in BioImage.IO, connect CI to automatically test new model compatibility with their software, and use other infrastructure features provided by BioImage.IO. The community partners can host their own Github repository for storing models and other relevant resources, which are then dynamically linked to the central repository of BioImage.IO. Each community partner is responsible for maintaining the resources that are relevant.",
75 | #         use_tools_gt=True,
76 | #         channel_id_gt="bioimage.io(docs)",
77 | #         relevance_gt=True,
78 | #         similary_score_gt=4.0
79 | #     )
80 |     
81 |     questions, reference_answers, channel_id_list_gt = eval_questions
82 |     for question, reference_answer, channel_id_gt in zip(questions, reference_answers, channel_id_list_gt):
83 |         await validate_chatbot_answer(
84 |             question=question,
85 |             reference_answer=reference_answer,
86 |             use_tools_gt=True,
87 |             channel_id_gt=channel_id_gt,
88 |             relevance_gt=True,
89 |             similary_score_gt=80
90 |         )


--------------------------------------------------------------------------------
/bioimageio_chatbot/quota.py:
--------------------------------------------------------------------------------
  1 | import sqlite3
  2 | import time
  3 | 
  4 | class QuotaManager:
  5 |     def __init__(self, db_file=':memory:', vip_list=None, default_quota=1.0, default_reset_period='daily'):
  6 |         self.db_file = db_file
  7 |         self.conn = sqlite3.connect(self.db_file)
  8 |         self.vip_list = vip_list or []
  9 |         self.default_quota = default_quota
 10 |         self.default_reset_period = self.period_to_seconds(default_reset_period)
 11 |         self._setup_database()
 12 | 
 13 |     def _setup_database(self):
 14 |         cursor = self.conn.cursor()
 15 |         cursor.execute('''
 16 |             CREATE TABLE IF NOT EXISTS quotas (
 17 |                 user_id TEXT PRIMARY KEY, 
 18 |                 quota REAL, 
 19 |                 last_reset REAL,
 20 |                 reset_period INTEGER,  -- Reset period in seconds
 21 |                 max_quota REAL
 22 |             )
 23 |         ''')
 24 |         self.conn.commit()
 25 | 
 26 |     def set_user_quota(self, user_id, max_quota, reset_period):
 27 |         reset_seconds = self.period_to_seconds(reset_period)
 28 |         cursor = self.conn.cursor()
 29 |         cursor.execute('''
 30 |             INSERT OR REPLACE INTO quotas (user_id, quota, last_reset, reset_period, max_quota)
 31 |             VALUES (?, ?, ?, ?, ?)
 32 |         ''', (user_id, max_quota, time.time(), reset_seconds, max_quota))
 33 |         self.conn.commit()
 34 | 
 35 |     def check_quota(self, user_id):
 36 |         if user_id in self.vip_list:
 37 |             return float('inf')
 38 | 
 39 |         cursor = self.conn.cursor()
 40 |         cursor.execute("SELECT quota, last_reset, reset_period, max_quota FROM quotas WHERE user_id = ?", (user_id,))
 41 |         row = cursor.fetchone()
 42 |         if row:
 43 |             quota, last_reset, reset_period, max_quota = row
 44 |             if time.time() - last_reset >= reset_period:
 45 |                 quota = max_quota
 46 |                 last_reset = time.time()
 47 |                 cursor.execute("UPDATE quotas SET quota = ?, last_reset = ? WHERE user_id = ?",
 48 |                                (quota, last_reset, user_id))
 49 |                 self.conn.commit()
 50 |         else:
 51 |             quota, last_reset, reset_period, max_quota = self.default_quota, time.time(), self.default_reset_period, self.default_quota
 52 |             cursor.execute("INSERT INTO quotas (user_id, quota, last_reset, reset_period, max_quota) VALUES (?, ?, ?, ?, ?)",
 53 |                            (user_id, quota, last_reset, reset_period, max_quota))
 54 |             self.conn.commit()
 55 |         return quota
 56 | 
 57 |     def use_quota(self, user_id, amount):
 58 |         if self.check_quota(user_id) >= amount:
 59 |             cursor = self.conn.cursor()
 60 |             cursor.execute("UPDATE quotas SET quota = quota - ? WHERE user_id = ?",
 61 |                            (amount, user_id))
 62 |             self.conn.commit()
 63 |             return True
 64 |         else:
 65 |             return False
 66 | 
 67 |     def reset_quota(self, user_id):
 68 |         cursor = self.conn.cursor()
 69 |         cursor.execute("UPDATE quotas SET quota = max_quota, last_reset = ? WHERE user_id = ?",
 70 |                        (time.time(), user_id))
 71 |         self.conn.commit()
 72 | 
 73 |     def period_to_seconds(self, period):
 74 |         if period == 'monthly':
 75 |             return 30 * 86400
 76 |         elif period == 'weekly':
 77 |             return 7 * 86400
 78 |         elif period == 'daily':
 79 |             return 86400
 80 |         elif period == 'hourly':
 81 |             return 3600
 82 |         else:
 83 |             raise ValueError("Invalid period. Choose from 'daily', 'weekly', 'monthly'.")
 84 | 
 85 | if __name__ == '__main__':
 86 |     # Testing the functionality with asserts
 87 |     quota_manager = QuotaManager(vip_list=['userVIP'], default_quota=2.0, default_reset_period='daily')
 88 |     # quota_manager.set_user_quota('user123', 1.0, 'daily')
 89 |     quota_manager.set_user_quota('user234', 10.0, 'weekly')
 90 |     quota_manager.set_user_quota('user345', 30.0, 'monthly')
 91 | 
 92 |     # Assert initial quotas are set correctly
 93 |     assert quota_manager.check_quota('userVIP') == float('inf'), "VIP user quota should be infinite"
 94 |     assert quota_manager.check_quota('user123') == 2.0, "user123 initial quota should be 2.0"
 95 |     assert quota_manager.check_quota('user234') == 10.0, "user234 initial quota should be 10.0"
 96 |     assert quota_manager.check_quota('user345') == 30.0, "user345 initial quota should be 30.0"
 97 | 
 98 |     # Assert usage and quota management
 99 |     assert quota_manager.use_quota('user123', 0.5) == True, "Should allow using 0.5 quota"
100 |     assert quota_manager.check_quota('user123') == 2-0.5, "user123 quota after use should be 2-0.5"
101 |     assert quota_manager.use_quota('user123', 3.6) == False, "Should not allow using 3.6 quota (not enough left)"
102 |     assert quota_manager.use_quota('userVIP', 1000) == True, "VIP should always be allowed to use quota"
103 |     assert quota_manager.check_quota('user234') == 10.0, "user234 should still have full quota"
104 | 
105 |     # Test manual reset
106 |     quota_manager.reset_quota('user123')
107 |     assert quota_manager.check_quota('user123') == 2.0, "user123 should have full quota after reset"
108 | 
109 |     # Display final test results
110 |     print("Tests completed successfully.")
111 | 


--------------------------------------------------------------------------------
/bioimageio_chatbot/chatbot_extensions/web_search_extension/langchain_websearch.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import asyncio
  3 | from typing import Union
  4 | 
  5 | import httpx
  6 | from bs4 import BeautifulSoup
  7 | from langchain_community.document_transformers import EmbeddingsRedundantFilter
  8 | from langchain.retrievers.document_compressors import DocumentCompressorPipeline
  9 | from langchain.retrievers.ensemble import EnsembleRetriever
 10 | from langchain_openai import OpenAIEmbeddings # HuggingFaceEmbeddings
 11 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 12 | from langchain_community.vectorstores import FAISS
 13 | from langchain.retrievers.document_compressors.embeddings_filter import EmbeddingsFilter
 14 | from langchain.retrievers import ContextualCompressionRetriever
 15 | from langchain.schema import Document
 16 | try:
 17 |     from langchain_community.retrievers import BM25Retriever
 18 | except ImportError:
 19 |     BM25Retriever = None
 20 | 
 21 | 
 22 | class LangchainCompressor:
 23 | 
 24 |     def __init__(self, device="cuda"):
 25 |         self.embeddings = OpenAIEmbeddings() # HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2", model_kwargs={"device": device})
 26 |         self.spaces_regex = re.compile(r" {3,}")
 27 | 
 28 |     def preprocess_text(self, text: str) -> str:
 29 |         text = text.replace("\n", " \n")
 30 |         text = self.spaces_regex.sub(" ", text)
 31 |         text = text.strip()
 32 |         return text
 33 | 
 34 |     async def faiss_embedding_query_urls(self, query: str, url_list: list[str], num_results: int = 5,
 35 |                                    similarity_threshold: float = 0.5, chunk_size: int = 500) -> list[Document]:
 36 |         html_url_tuples = []
 37 | 
 38 |         # Creating a list of tasks for each URL
 39 |         tasks = [download_html(url) for url in url_list]
 40 | 
 41 |         # Using asyncio.gather to run all tasks concurrently
 42 |         results = await asyncio.gather(*tasks, return_exceptions=True)
 43 | 
 44 |         # Processing results and exceptions
 45 |         for result, url in zip(results, url_list):
 46 |             if isinstance(result, Exception):
 47 |                 print(f'LLM_Web_search | An exception occurred for {url}: {result}')
 48 |             else:
 49 |                 html_url_tuples.append((result, url))
 50 | 
 51 |         if not html_url_tuples:
 52 |             return []
 53 | 
 54 |         documents = [html_to_plaintext_doc(html, url) for html, url in html_url_tuples]
 55 |         
 56 |         text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=10,
 57 |                                                        separators=["\n\n", "\n", ".", ", ", " ", ""])
 58 |         split_docs = text_splitter.split_documents(documents)
 59 |         # filtered_docs = pipeline_compressor.compress_documents(documents, query)
 60 |         faiss_retriever = FAISS.from_documents(split_docs, self.embeddings).as_retriever(
 61 |             search_kwargs={"k": num_results}
 62 |         )
 63 |         if not BM25Retriever:
 64 |             raise ImportError("Could not import BM25Retriever. Please ensure that you have installed "
 65 |                               "langchain==0.0.352")
 66 | 
 67 |         #  This sparse retriever is good at finding relevant documents based on keywords,
 68 |         #  while the dense retriever is good at finding relevant documents based on semantic similarity.
 69 |         bm25_retriever = BM25Retriever.from_documents(split_docs, preprocess_func=self.preprocess_text)
 70 |         bm25_retriever.k = num_results
 71 | 
 72 |         redundant_filter = EmbeddingsRedundantFilter(embeddings=self.embeddings)
 73 |         embeddings_filter = EmbeddingsFilter(embeddings=self.embeddings, k=None,
 74 |                                              similarity_threshold=similarity_threshold)
 75 |         pipeline_compressor = DocumentCompressorPipeline(
 76 |             transformers=[redundant_filter, embeddings_filter]
 77 |         )
 78 | 
 79 |         compression_retriever = ContextualCompressionRetriever(base_compressor=pipeline_compressor,
 80 |                                                                base_retriever=faiss_retriever)
 81 | 
 82 |         ensemble_retriever = EnsembleRetriever(
 83 |             retrievers=[bm25_retriever, compression_retriever], weights=[0.4, 0.5]
 84 |         )
 85 | 
 86 |         compressed_docs = await ensemble_retriever.aget_relevant_documents(query)
 87 | 
 88 |         # Ensemble may return more than "num_results" results, so cut off excess ones
 89 |         return compressed_docs[:num_results]
 90 | 
 91 | 
 92 | async def download_html(url: str) -> bytes:
 93 |     headers = {
 94 |         "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0",
 95 |         "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
 96 |         "Accept-Language": "en-US,en;q=0.5"
 97 |     }
 98 | 
 99 |     async with httpx.AsyncClient() as client:
100 |         response = await client.get(url, headers=headers, timeout=8)
101 |         response.raise_for_status()
102 | 
103 |     content_type = response.headers.get("Content-Type", "")
104 |     if not content_type.startswith("text/html"):
105 |         raise ValueError(f"Expected content type text/html. Got {content_type}.")
106 |     return response.content
107 | 
108 | def html_to_plaintext_doc(html_text: Union[str, bytes], url: str) -> Document:
109 |     soup = BeautifulSoup(html_text, features="lxml")
110 |     for script in soup(["script", "style"]):
111 |         script.extract()
112 | 
113 |     strings = '\n'.join([s.strip() for s in soup.stripped_strings])
114 |     webpage_document = Document(page_content=strings, metadata={"source": url})
115 |     return webpage_document
116 | 


--------------------------------------------------------------------------------
/bioimageio_chatbot/chatbot_extensions/vision_extension.py:
--------------------------------------------------------------------------------
  1 | from bioimageio_chatbot.utils import ChatbotExtension
  2 | from openai import AsyncOpenAI
  3 | from schema_agents import schema_tool
  4 | import base64
  5 | from pydantic import Field, BaseModel
  6 | from typing import Optional, List
  7 | import httpx
  8 | from PIL import Image
  9 | from io import BytesIO
 10 | import matplotlib.pyplot as plt
 11 | # make sure matplotlib is operating headless (no GUI)
 12 | plt.switch_backend("agg")
 13 | 
 14 | # Function to encode the image
 15 | def encode_image(image_path):
 16 |   with open(image_path, "rb") as image_file:
 17 |     return base64.b64encode(image_file.read()).decode('utf-8')
 18 | 
 19 | async def aask(images, messages, max_tokens=1024):
 20 |     aclient = AsyncOpenAI()
 21 |     user_message = []
 22 |     # download the images and save it into a list of PIL image objects
 23 |     img_objs = []
 24 |     for image in images:
 25 |         async with httpx.AsyncClient() as client:
 26 |             response = await client.get(image.url)
 27 |             response.raise_for_status()
 28 |         try:
 29 |             img = Image.open(BytesIO(response.content))
 30 |         except Exception as e:
 31 |             raise ValueError(f"Failed to read image {image.title or ''} from {image.url}. Error: {e}")
 32 |         img_objs.append(img)
 33 |     
 34 |     if len(img_objs) == 1:
 35 |         # plot the image with matplotlib
 36 |         plt.imshow(img_objs[0])
 37 |         if images[0].title:
 38 |             plt.title(images[0].title)
 39 |         fig = plt.gcf()
 40 |     else:
 41 |         # plot them in subplots with matplotlib in a row
 42 |         fig, ax = plt.subplots(1, len(img_objs), figsize=(15, 5))
 43 |         for i, img in enumerate(img_objs):
 44 |             ax[i].imshow(img)
 45 |             if images[0].title:
 46 |                 ax[i].set_title(images[i].title)
 47 |     # save the plot to a buffer as png format and convert to base64
 48 |     buffer = BytesIO()
 49 |     fig.tight_layout()
 50 |     # if the image size (width or height) is smaller than 512, use the original size and aspect ratio
 51 |     # otherwise set the maximun width of the image to n*512 pixels, where n is the number of images; the maximum total width is 1024 pixels
 52 |     fig_width = min(1024, len(img_objs)*512, fig.get_figwidth()*fig.dpi)
 53 |     # make sure the pixel size (not inches)
 54 |     fig.set_size_inches(fig_width/fig.dpi, fig.get_figheight(), forward=True)
 55 |     
 56 |     # save fig
 57 |     fig.savefig(buffer, format="png")
 58 |     buffer.seek(0)
 59 |     base64_image = base64.b64encode(buffer.read()).decode("utf-8")
 60 |     # append the image to the user message
 61 |     user_message.append({
 62 |         "type": "image_url",
 63 |         "image_url": {
 64 |             "url": f"data:image/png;base64,{base64_image}"
 65 |         }
 66 |     })
 67 |     
 68 |     
 69 |     for message in messages:
 70 |         assert isinstance(message, str), "Message must be a string."
 71 |         user_message.append({"type": "text", "text": message})
 72 | 
 73 |     response = await aclient.chat.completions.create(
 74 |         model="gpt-4o",
 75 |         messages=[
 76 |             {
 77 |                 "role": "system",
 78 |                 "content": "You are a helpful AI assistant that help user to inspect the provided images visually based on the context, make insightful comments and answer questions about the provided images."
 79 |             },
 80 |             {
 81 |                 "role": "user",
 82 |                 "content": user_message
 83 |             }
 84 |         ],
 85 |         max_tokens=max_tokens,
 86 |     )
 87 |     return response.choices[0].message.content
 88 | 
 89 | class ImageInfo(BaseModel):
 90 |     """Image information."""
 91 |     url: str=Field(..., description="The URL of the image.")
 92 |     title: Optional[str]=Field(None, description="The title of the image.")
 93 | 
 94 | @schema_tool
 95 | async def inspect_tool(images: List[ImageInfo]=Field(..., description="A list of images to be inspected, each with a http url and title"), query: str=Field(..., description="user query about the image"),  context_description: str=Field(..., description="describe the context for the visual inspection task")) -> str:
 96 |     """Inspect an image using GPT4-Vision."""
 97 |     # assert image_url.startswith("http"), "Image URL must start with http."
 98 |     for image in images:
 99 |         assert image.url.startswith("http"), "Image URL must start with http."
100 |     
101 |     response = await aask(images, [context_description, query])
102 |     return response
103 | 
104 | def get_extension():
105 |     return ChatbotExtension(
106 |         id="vision",
107 |         name="Vision Inspector",
108 |         description="Perform visual inspection on images using GPT4-Vision model, used for describing images and answer image related questions. The images will be plotted using matplotlib and then sent to the GPT4-Vision model for inspection.",
109 |         tools=dict(
110 |             inspect=inspect_tool
111 |         )
112 |     )
113 | 
114 | if __name__ == "__main__":
115 |     import asyncio
116 |     async def main():
117 |         extension = get_extension()
118 |         print(await extension.tools["inspect"](images=[ImageInfo(url="https://bioimage.io/static/img/bioimage-io-icon.png", title="BioImage.io Icon"), ImageInfo(url="https://bioimage.io/static/img/bioimage-io-logo.png", title="BioImage.io Logo")], query="What are these?", context_description="Inspect the BioImage.io icon and logo."))
119 |         # test only one image
120 |         # print(await extension.tools["inspect"](images=[ImageInfo(url="https://bioimage.io/static/img/bioimage-io-icon.png", title="BioImage.io Icon")], query="What is this?", context_description="Inspect the BioImage.io icon."))
121 |     # Run the async function
122 |     asyncio.run(main())


--------------------------------------------------------------------------------
/bioimageio_chatbot/chatbot_extensions/bia_extension.py:
--------------------------------------------------------------------------------
  1 | import httpx
  2 | from pydantic import BaseModel, Field
  3 | from typing import Dict, Any, Optional
  4 | from bioimageio_chatbot.utils import ChatbotExtension
  5 | from schema_agents import schema_tool
  6 | 
  7 | class BioImageArchiveClient:
  8 |     def __init__(self):
  9 |         self._base_url = "https://www.ebi.ac.uk/biostudies/api/v1"
 10 | 
 11 |     async def search_bioimage_archive(self, 
 12 |         query: str = Field(..., description="The search query string."),
 13 |         pageSize: int = Field(10, gt=0, description="Number of search results per page."),
 14 |         page: int = Field(1, description="Page number of the search results."),
 15 |         sortOrder: Optional[str] = Field("descending", description="Sort order: ascending or descending.")
 16 |     ) -> Dict[str, Any]:
 17 |         """Search the BioImage Archive for studies and image datasets, returning a list of studies.  The link format to each study in the results is: https://www.ebi.ac.uk/biostudies/bioimages/studies/{accession}."""
 18 |         url = f"{self._base_url}/bioimages/search"
 19 |         params = {
 20 |             "query": query,
 21 |             "pageSize": pageSize,
 22 |             "page": page,
 23 |             "sortOrder": sortOrder
 24 |         }
 25 |         async with httpx.AsyncClient() as client:
 26 |             response = await client.get(url, params=params)
 27 |         response.raise_for_status()
 28 |         return self._simplify_search_results(response.json())
 29 | 
 30 |     def _simplify_search_results(self, results: Dict[str, Any]) -> Dict[str, Any]:
 31 |         simplified_results = {
 32 |             "hits": [
 33 |                 {
 34 |                     "title": hit["title"],
 35 |                     "author": hit["author"],
 36 |                     "content": hit["content"],
 37 |                     "accession": hit["accession"]
 38 |                 } for hit in results.get("hits", [])
 39 |             ],
 40 |             "totalHits": results.get("totalHits"),
 41 |             "page": results.get("page"),
 42 |             "pageSize": results.get("pageSize")
 43 |         }
 44 |         return simplified_results
 45 | 
 46 |     async def read_bioimage_archive_study(self, accession: str = Field(..., description="Accession number of the study.")) -> Dict[str, Any]:
 47 |         """Read detailed information about a specific study from the BioImage Archive, returning a simplified dictionary. The link format to the study is: https://www.ebi.ac.uk/biostudies/bioimages/studies/{accession}."""
 48 |         url = f"{self._base_url}/studies/{accession}"
 49 |         async with httpx.AsyncClient() as client:
 50 |             response = await client.get(url)
 51 |         response.raise_for_status()
 52 |         return self._simplify_study_details(response.json())
 53 | 
 54 |     def _simplify_study_details(self, study_details: Dict[str, Any]) -> Dict[str, Any]:
 55 |         # Initialize simplified details with placeholders for title and description
 56 |         simplified_details = {
 57 |             "title": "",
 58 |             "description": "",
 59 |             "accession": study_details.get("accno", ""),
 60 |             "link": f"https://www.ebi.ac.uk/biostudies/bioimages/studies/{study_details.get('accno', '')}",
 61 |             "authors": []
 62 |         }
 63 | 
 64 |         # Extract title and description from the attributes array by name
 65 |         for attribute in study_details.get("section", {}).get("attributes", []):
 66 |             if attribute.get("name") == "Title":
 67 |                 simplified_details["title"] = attribute.get("value", "")
 68 |             elif attribute.get("name") == "Description":
 69 |                 simplified_details["description"] = attribute.get("value", "")
 70 | 
 71 |         # Extracting author information
 72 |         author_subsections = [sub for sub in study_details.get("section", {}).get("subsections", []) if sub.get("type") == "Author"]
 73 |         for author in author_subsections:
 74 |             author_attributes = {attr["name"]: attr["value"] for attr in author.get("attributes", [])}
 75 |             simplified_details["authors"].append(author_attributes.get("Name", ""))
 76 | 
 77 |         return simplified_details
 78 | 
 79 | 
 80 | 
 81 | def get_extension():
 82 |     bioimage_archive_client = BioImageArchiveClient()
 83 |     search_tool = schema_tool(bioimage_archive_client.search_bioimage_archive)
 84 |     read_tool = schema_tool(bioimage_archive_client.read_bioimage_archive_study)
 85 | 
 86 |     async def get_schema():
 87 |         return {
 88 |             "search": search_tool.input_model.schema(),
 89 |             "read": read_tool.input_model.schema(),
 90 |         }
 91 | 
 92 |     return ChatbotExtension(
 93 |         id="bioimage_archive",
 94 |         name="Search BioImage Archive",
 95 |         description="Search for biological images related studies in the BioImage Archive, it provide studies and image datasets related to microscopy images and other imaging modalities.",
 96 |         get_schema=get_schema, # This is optional, exists only for testing purposes
 97 |         tools=dict(
 98 |             search=search_tool,
 99 |             read=read_tool
100 |         )
101 |     )
102 | 
103 | if __name__ == "__main__":
104 |     import asyncio
105 |     async def main():
106 |         bioimage_archive_client = BioImageArchiveClient()
107 |         # Example to search in BioImage Archive with simplified results
108 |         search_results = await bioimage_archive_client.search_bioimage_archive(query="cells", pageSize=1)
109 |         print(search_results)
110 | 
111 |         # Example to read a specific study from BioImage Archive with simplified details
112 |         study_details = await bioimage_archive_client.read_bioimage_archive_study(accession="S-BSST314")
113 |         print(study_details)
114 | 
115 |     # Run the async function
116 |     asyncio.run(main())
117 | 


--------------------------------------------------------------------------------
/docs/technical-overview.md:
--------------------------------------------------------------------------------
 1 | # Design and Functionality of BioImage.IO Chatbot: A User Guide and Technical Overview
 2 | 
 3 | ## Chatbot Interface
 4 | 
 5 | After following the installation guidelines from the [README](/README.md), the chat interface will resemble Figure 1.
 6 | 
 7 | ![BioImage.IO-Chatbot](./screenshots/chat-interface.png)
 8 | *Figure 1. The chat interface of the BioImage.IO Chatbot.*
 9 | 
10 | Users can input their profiles as depicted in Figure 2. 
11 | ![user-profile](./screenshots/user-profile.png)
12 | *Figure 2. Users can personalize responses by clicking `Edit Profile` and save their settings for future conversations by clicking `Save`.*
13 | 
14 | As of today, our chatbot integrates 6 extensions including document search in bioimage.io knowledge base, tools search on Bioimage Informatics Index (biii.eu), bioimage topics search in Bioimage Archive and Image.cs Forum, web search, and information search in Bioimage Model Zoo. The document search utilizes knowledge bases from the following pivotal communities: bioimage.io [2], Imjoy [3], deepimageJ [4], ImageJ [5], bio.tools [6], and scikit-image [7]. We also allow users to specify a preferred extension for information retrieval, as shown in Figure 3. If an extension is designated, the chatbot sources information using the specific extension and its corresponding source. Otherwise, it uses an intelligent selection process driven by a schema-based agent to choose the most relevant extension based on the user's query. 
15 | 
16 | ![channels](./screenshots/extensions.png)
17 | *Figure 3. Users can personalize the conversation by selecting a specific channel from the ‘Knowledge Base Channel’.*
18 | 
19 | ### Building the Knowledge Base
20 | 
21 | The knowledge base is efficiently and collaboratively constructed by downloading documentation from given URLs. These can be repositories, PDFs, or other forms of documentation. We use a regular expression splitter to segment the documentation into manageable chunks for efficient and accurate retrieval. These chunks are then embedded and stored as vectors in a FAISS [1]-based vector database.
22 | 
23 | ## Schema-Based Agent Design
24 | 
25 | The chatbot's ability to understand and respond to user queries is substantially improved by employing a schema-based agent design. Unlike traditional context-based models, our approach utilizes predefined schemas to guide the conversation and information retrieval process. 
26 | 
27 | The schema-based agent operates on the function-call LLM [8], and uses input and output schemas to generate text output. Within this implementation, we construct a customer service chatbot by defining a role class, as shown in Figure 4.
28 | 
29 | ![role_create](./screenshots/role_create.png)
30 | *Figure 4. Creation of a chatbot role class named ‘CustomerServiceRole’ by defining fields of the role class.*
31 | 
32 | ## Extensions
33 | The BioImage.IO Chatbot employs diverse methods to generate responses, currently encompassing five distinct response modes. The response mode is chosen by the schema-based agent based on the user's query and the selected channel.
34 | 
35 | ### Search BioImage Docs
36 | This extension allows the chatbot to search information in a community-driven bioimage related knowledge base. With a specific query, the chatbot extracts essential elements from the user's question to fetch information from the relevant documentation. 
37 |         ![direct-response](./screenshots/search-bioimage-docs.png)
38 |     *Figure 6. Search in Bioimage Knolwedge base documentation.*
39 | 
40 | ### Search BioImage Information Index (biii.eu)
41 | This extension allows the chatbot to search online software tool in biii.eu.
42 |     ![search-biii](./screenshots/search-biii.png)
43 |     *Figure 7. Search in biii.eu.*
44 | 
45 | The process begins with an initial response based on the user's query (`request`), which serves as a foundation for generating a new `query` for targeted information retrieval. This is combined with user profile data (`user_info`) and the query to produce a comprehensive final response.
46 | 
47 | ### Search Bioimage Archive
48 | This extension allows the chatbot to search for dataset index in bioimage archive. 
49 |     ![search-bioimage-archive](./screenshots/search-bioimage-archive.png)
50 |     *Figure 8. Search in bioimage archive.*
51 | 
52 | ### Search image.sc Forum
53 | This extension allows the chatbot to search bioimage related topics and software issues in the image.sc forum.
54 |     ![search-image-sc](./screenshots/search-image-forum.png)
55 |     *Figure 9. Search in image.sc forum.*
56 | 
57 | ### Search Web
58 | This extension allows the chatbot to search for information from the web. This extension is triggered while the chatbot realizes it can not find relevant information from the knowledge base.
59 |     ![search-web](./screenshots/web-search.png)
60 |     *Figure 10. Search in the web.*
61 | 
62 | 
63 | ### BioImage Model Zoo
64 | This mode is designed for queries requiring detailed model information or specific actions, generating and executing Python scripts for tailored solutions.
65 |     ![script-gen-exe-retrieval](./screenshots/search-model-zoo.png)
66 |     *Figure 11. Scripting retrieval for complex queries.*
67 | 
68 | It involves creating a `ModelZooInfoScript` schema with fields like `request`, `user info`, and `script`, where `script` is Python code for API interactions or data manipulation. The final response is formulated by integrating the script's output with the `request` and `user info`.
69 | 
70 | ## References
71 | 
72 | 1. [FAISS](https://engineering.fb.com/2017/03/29/data-infrastructure/faiss-a-library-for-efficient-similarity-search/)
73 | 2. [Bioimage.io](https://bioimage.io/docs/#/)
74 | 3. [Imjoy](https://imjoy.io/docs/#/)
75 | 4. [DeepImageJ](https://deepimagej.github.io/)
76 | 5. [ImageJ](https://imagej.net)
77 | 6. [bio.tools](https://bio.tools)
78 | 7. [scikit-image](https://scikit-image.org/docs/stable/)
79 | 8. [Function-Calling API](https://openai.com/blog/function-calling-and-other-api-updates)
80 | 9. [CellPose](https://www.cellpose.org)
81 | 


--------------------------------------------------------------------------------
/tests/test_chatbot.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from bioimageio_chatbot.chatbot import create_assistants, get_builtin_extensions, QuestionWithHistory, UserProfile
  3 | from schema_agents.schema import Message
  4 | import pytest
  5 | 
  6 | KNOWLEDGE_BASE_PATH = "./bioimageio-knowledge-base"
  7 | 
  8 | @pytest.fixture
  9 | def builtin_extensions():
 10 |     return get_builtin_extensions()
 11 | 
 12 | @pytest.fixture
 13 | def melman(builtin_extensions):
 14 |     assistants = create_assistants(builtin_extensions)
 15 |     # find an assistant name Melman
 16 |     m = [assistant for assistant in assistants if assistant['name'] == "Melman"][0]
 17 |     return m['agent']
 18 | 
 19 | @pytest.mark.asyncio
 20 | async def test_chatbot(builtin_extensions, melman):
 21 |     select_extensions = [
 22 |         {"id": "bioimage_archive"}
 23 |     ]
 24 |     chat_history=[]
 25 |     question = "Which tool can I use to analyse western blot image?"
 26 |     profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI")
 27 |     m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions)
 28 |     resp = await melman.handle(Message(content="", data=m , role="User"))
 29 |     assert resp
 30 |     str_resp = [str(element) for element in resp]
 31 |     assert any(["BioimageArchiveSearch" in element for element in str_resp])
 32 | 
 33 |     question = "Which tool can I use to segment an cell image?"
 34 |     profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI")
 35 |     m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions)
 36 |     resp = await melman.handle(Message(content="", data=m , role="User"))
 37 |     assert resp
 38 |     str_resp = [str(element) for element in resp]
 39 |     assert any(["BioimageArchiveSearch" in element for element in str_resp])
 40 |     
 41 |     question = "How can I test the models?"
 42 |     profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI")
 43 |     m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions)
 44 |     resp = await melman.handle(Message(content="", data=m , role="User"))
 45 |     assert resp
 46 |     str_resp = [str(element) for element in resp]
 47 |     assert any(["BioimageArchiveSearch" in element for element in str_resp])
 48 | 
 49 |     question = "What are Model Contribution Guidelines?"
 50 |     m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions)
 51 |     resp = await melman.handle(Message(content="", data=m , role="User"))
 52 |     assert resp
 53 |     str_resp = [str(element) for element in resp]
 54 |     assert any(["BioimageArchiveSearch" in element for element in str_resp])
 55 | 
 56 |     
 57 |     # test biii extension
 58 |     select_extensions = [
 59 |         {"id": "biii"}
 60 |     ]
 61 |     question = "What bioimage analysis tools are available for quantifying cell migration?"
 62 |     profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI")
 63 |     m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions)
 64 |     resp = await melman.handle(Message(content="", data=m , role="User"))
 65 |     assert resp
 66 |     str_resp = [str(element) for element in resp]
 67 |     assert any(["BiiiSearch" in element for element in str_resp])
 68 |     
 69 |     question = "Are there any workflows on biii.eu for 3D reconstruction of neuronal networks from electron microscopy images?"
 70 |     profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI")
 71 |     m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions)
 72 |     resp = await melman.handle(Message(content="", data=m , role="User"))
 73 |     assert resp
 74 |     str_resp = [str(element) for element in resp]
 75 |     assert any(["BiiiSearch" in element for element in str_resp])
 76 |     
 77 |     
 78 |     # test image_sc extension
 79 |     select_extensions = [
 80 |         {"id": "image_sc_forum"}
 81 |     ]
 82 |     question = "I got a problem, StarDist stops working! help me find it in image.sc forum."
 83 |     profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI")
 84 |     m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions)
 85 |     resp = await melman.handle(Message(content="", data=m , role="User"))
 86 |     assert resp
 87 |     # make resp a string
 88 |     resp = [str(element) for element in resp]
 89 |     assert any(["ImageScForumSearch" in element for element in resp])
 90 |     assert any(['''posts":''' in element for element in resp])
 91 | 
 92 | 
 93 |     # test web extension
 94 |     select_extensions = [
 95 |         {"id": "web"}
 96 |     ]
 97 |     question = "I want to know more about the BioImage Archive"
 98 |     profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI")
 99 |     m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions)
100 |     resp = await melman.handle(Message(content="", data=m , role="User"))
101 |     assert resp
102 |     str_resp = [str(element) for element in resp]
103 |     assert any(["WebSearch" in element for element in str_resp])
104 |     assert any(['''"content": ''' in element for element in str_resp])
105 |     


--------------------------------------------------------------------------------
/docs/figure-2-use-cases.md:
--------------------------------------------------------------------------------
 1 | # Reproducing Example Usage Scenarios of the BioImage.IO Chatbot Figure 2
 2 | 
 3 | This section provides detailed instructions for reproducing the example usage scenarios of the BioImage.IO Chatbot illustrated in Figure 2 of the main text:
 4 | 
 5 | <img src="https://docs.google.com/drawings/d/e/2PACX-1vTIRwRldQBnTFqz0hvS01znGOEdoeDMJmZC-PlBM-O59u_xo7DfJlUEE9SlRsy6xO1hT2HuSOBrLmUz/pub?w=1324&amp;h=1063">
 6 | 
 7 | 
 8 | These steps will guide users through querying documents, utilizing online services, executing AI models, and developing extensions.
 9 | 
10 | ## Access the BioImage.IO Chatbot Interface
11 | Launch the chatbot through the BioImage.IO website [here](https://bioimage.io/chat/) or use the dedicated user interface.
12 | 
13 | ## Video for Reproducing the Scenarios
14 |  * **[A video showcasing information retrieval (as described in senario a-c)](https://zenodo.org/records/10967840/files/Supplementary-Video-1-bioimageio-chatbot-information-retrieval.mp4?download=1)**
15 |  * **[A video showcasing AI model execution (as described in senario d)](https://zenodo.org/records/10967840/files/Supplementary-Video-2-bioimageio-chatbot-ai-image-analysis.mp4?download=1)**
16 | 
17 | 
18 | ### Scenario (a): Querying Bioimage Analysis Documentation
19 | 
20 | - **Initiate a Query**: Type a question related to bioimage analysis, e.g., "What are the best practices for optimizing model performance on bioimage.io?"
21 | - **Review the Chatbot's Response**: The chatbot will provide an answer that includes information extracted from the BioImage Model Zoo documentation.
22 | 
23 | ### Scenario (b): Exploring the Human Protein Atlas
24 | 
25 | - **Initiate a Query**: Ask the chatbot to find protein information in the Human Protein Atlas by typing "Tell me about PML protein and show me the cell images"
26 | - **Interpret the Results**: The chatbot will respond by constructing an API call to the Protein Atlas database and displaying the relevant information about the PML protein, including cell images.
27 | 
28 | ### Scenario (c): Querying the BioImage Archive
29 | 
30 | - **Initiate a Query**: Ask the chatbot to find cell images at the G1 phase by typing "Please, find datasets of cell images at G1 phase."
31 | - **Interpret the Results**: The chatbot will initiate an API call to the BioImage Archive server, and return results such as a study titled "DeepCycle: Deep learning reconstruction of the closed cell cycle trajectory from single-cell unsegmented microscopy images."
32 | 
33 | ### Scenario (d): Running AI Models for Image Analysis
34 | 
35 | - **Prereqsitues**: Ensure you have Chrome or a Chromium-based browser installed on your computer.
36 | - **Download Image Data**: Begin by creating a new folder on your computer named `test-images`. Download the image data file from [this link](https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/main/docs/example-data/nuclei.tif) and save it into the `test-images` folder.
37 | - **Initiate Bioimage Analyst**: Navigate to the BioImage.IO chatbot interface at https://bioimage.io/chat/. Note that only Chrome or Chromium-based browser is supported at the moment. Select "Bioimage Analyst(Bridget)" located in the upper right corner of the chatbot interface.
38 | - **Mount your Data Folder**: Within the chat interface, click on the "Mount Files" button located below the dialog window. This action will allow you to mount the test-image folder that contains your downloaded image data. The chatbot will confirm the successful mounting of the folder, you can now ask it to list the files contained within, and ensuring that your data is ready for analysis.
39 | - **Perform segmentation using Cellpose model**: Type "Segment the image `/mnt/nuclei.tif` using Cellpose" to run the Cellpose model on the image data. Upon successful execution of the model, the chatbot will notify you that the segmentation process is complete and will display the analyzed results. Optionally, you can ask it to "count the number of nuclei in the image" if successfully segmented, "plot the size distribution of nuclei", or you can tell it to "use the visual inspection tool to analyze the figure and create a report about the size distribution".
40 | 
41 | ### Scenario (e): Developing New Extensions
42 | 
43 | Follow the steps below to develop a new extension for microscope stage control and image capture. For a detailed tutorial, visit our [GitHub repository](https://github.com/bioimage-io/bioimageio-chatbot/blob/main/docs/bioimage-chatbot-extension-tutorial.ipynb) or access the Jupyter Notebook directly through ImJoy [here](https://imjoy-notebook.netlify.app/lab/index.html?load=https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/main/docs/bioimage-chatbot-extension-tutorial.ipynb&open=1).
44 | 
45 | ### Scenario (f): Controlling a Microscope Stage and Capturing Images
46 | 
47 | - **Pre-requisites**: You will need a microscope and the squid control software
48 | 
49 | - **Create microscope extension**: Following the example in the above [chatbot extension example notebook](https://imjoy-notebook.netlify.app/lab/index.html?load=https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/main/docs/bioimage-chatbot-extension-tutorial.ipynb&open=1), create a hypha service extension for controlling the microscope:
50 |     1. **Setup the Developer Environment**: Open a Jupyter Notebook. Install and import the `imjoy_rpc`, `hypha_rpc` and `pydantic` packages.
51 |     2. **Define Input Schemas**: Create classes for `MoveStageInput` and `SnapImageInput` to structure the user input. (Note: To help the chatbot understand the "center", you will need to tell the chatbot about the boundaries of the stage via the docstring of the `MoveStageInput` class)
52 |     3. **Implement Control Functions**: Write asynchronous functions `move_stage` and `snap_image`.
53 |     4. **Setup Extension Interface**: Develop the extension interface and define a schema getter function.
54 |     5. **Register the Extension**: Register the extension as hypha server and connect to the the chatbot.
55 | - **Initiate a Query**: Ask the chatbot to "Please move to the center and snap an image".
56 | - **Interpret the Results**: The chatbot will execute the `move_stage` function to move the microscope stage to the center and then capture an image using the `snap_image` function. The chatbot will confirm the successful completion of the tasks.
57 | 


--------------------------------------------------------------------------------
/bioimageio_chatbot/chatbot_extensions/hpa_extension.py:
--------------------------------------------------------------------------------
  1 | from bioimageio_chatbot.utils import ChatbotExtension
  2 | from schema_agents import schema_tool
  3 | from pydantic import Field, BaseModel
  4 | from typing import Optional, List, Dict, Any
  5 | import pandas as pd
  6 | from pathlib import Path
  7 | import requests
  8 | import re
  9 | import os
 10 | from bioimageio_chatbot.utils import download_file
 11 | 
 12 | class HPAClient:
 13 |     def __init__(self):
 14 |         self._base_url = 'https://www.proteinatlas.org/download/proteinatlas.tsv.zip'
 15 |         folder = Path('./data')
 16 |         file_path = os.path.join(folder, 'proteinatlas.tsv.zip')
 17 |         # firstly check if the data is already downloaded in the /data folder
 18 |         if not os.path.exists(file_path):
 19 |             os.makedirs(folder, exist_ok=True)
 20 |             # download the data
 21 |             download_file(self._base_url, file_path)
 22 |         # Load and preprocess data at startup
 23 |         self.data = pd.read_csv(file_path, delimiter='\t')
 24 |         # Convert all textual data to lowercase strings for faster case-insensitive searching
 25 |         self.preprocessed_data = self.data.apply(lambda x: x.astype(str).str.lower())
 26 |     
 27 |     async def search_hpa(self,
 28 |         query: str = Field(..., description="Enter gene names, functions, or disease terms to search in the Human Protein Atlas."),
 29 |         limitSize: int = Field(10, gt=0, description="Number of returned items per search.")
 30 |     ) -> Dict[str, Any]:
 31 |         """Search the Human Protein Atlas for proteins based on a query string, return the top search results."""
 32 |         query = query.lower()
 33 |         
 34 |         # Search for the query in the preprocessed data
 35 |         query_results = self.preprocessed_data.apply(lambda x: x.str.contains(query)).sum(axis=1)
 36 |         query_results = query_results.sort_values(ascending=False)
 37 |         query_results = query_results.head(limitSize)
 38 | 
 39 |         selected_columns = ['Gene', 'Gene synonym', 'Ensembl', 
 40 |                             'Gene description', 'Subcellular location', 'Subcellular main location', 'Subcellular additional location', 
 41 |                             'Biological process', 'Molecular function', 'Uniprot', 'Antibody',
 42 |                             'Disease involvement', 'Secretome function', 'CCD Protein', 'CCD Transcript',
 43 |                             'Evidence', 'Protein class']
 44 |         
 45 |         info_list = []
 46 |         for index in query_results.index:
 47 |             items = self.data.loc[index, selected_columns]
 48 |             info_list.append(items.to_dict())
 49 |         return info_list
 50 | 
 51 |     async def read_protein_info(self,
 52 |         ensembl: str = Field(..., description="Ensembl ID of the protein.")
 53 |     )-> Dict[str, Any]:
 54 |         """Get detailed information about a protein from the Human Protein Atlas."""
 55 |         json_link = f"https://www.proteinatlas.org/{ensembl}.json"
 56 |         response = requests.get(json_link)
 57 |         # check if the request was successful
 58 |         response.raise_for_status()
 59 |         # return the content
 60 |         return response.json()
 61 | 
 62 | 
 63 | 
 64 |     async def get_cell_image(self,
 65 |         gene: str = Field(..., description="Gene name of the protein."),
 66 |         ensembl: str = Field(..., description="Ensembl ID of the protein."),
 67 |         section: str = Field("subcellular", description="Section of the Human Protein Atlas to search for the protein. Valid options are 'subcellular', 'tissue',")
 68 |         ) -> List[str]:
 69 |         """Retrieve a list of cell image links from the Human Protein Atlas, where a specific protein is tagged in the green channel. 
 70 |         ALWAYS render the result thumbnail images as a horizatal table and create link (format: `[![](http://..._thumb.jpg)](http://....jpg)`) to the full-size image without the '_thumb' suffix."""
 71 |         link_name = f"{ensembl}-{gene}"
 72 |         http_link = f"https://www.proteinatlas.org/{link_name}/{section}"
 73 |         # read the source code of the page
 74 |         response = requests.get(http_link)
 75 |         if '<p>Not available</p>' in response.text:
 76 |             return 'No cell image available.'
 77 |         # Search for image links, capturing the part after 'src="'
 78 |         pattern = r'src="(?P<url>//images\.proteinatlas\.org/.*?_red_green_thumb\.jpg)"'
 79 |         image_links = re.findall(pattern, response.text)
 80 |         # replace the 'red_green' with 'blue_red_green_yellow' if 'blue' not in the link, otherwise replace 'blue_red_green' with 'blue_red_green_yellow'
 81 |         image_links = [link.replace('red_green', 'blue_red_green_yellow') if 'blue' not in link else link.replace('blue_red_green', 'blue_red_green_yellow') for link in image_links]
 82 |         # Remove '_thumb' from each link and print or process them
 83 |         final_image_links = []
 84 |         for link in image_links:
 85 |             final_image_links.append(f"https:{link}")
 86 |         return final_image_links
 87 | 
 88 | 
 89 | def get_extension():
 90 |     hpa_client = HPAClient()
 91 |     search_tool = schema_tool(hpa_client.search_hpa)
 92 |     read_tool = schema_tool(hpa_client.read_protein_info)
 93 |     get_cell_image_tool = schema_tool(hpa_client.get_cell_image)
 94 | 
 95 |     return ChatbotExtension(
 96 |         id="hpa",
 97 |         name="Human Protein Atlas",
 98 |         description="Search the Human Protein Atlas to find human protein-related information, including gene expressions, functions, locations, disease associations, and cell images etc. When searching for cell images, always search for the gene name and Ensembl ID of the protein.",
 99 |         tools=dict(
100 |             search=search_tool,
101 |             read=read_tool,
102 |             get_cell_image=get_cell_image_tool
103 |         )
104 |     )
105 | 
106 | if __name__ == "__main__":
107 |     import asyncio
108 |     async def main():
109 |         extension = get_extension()
110 |         query = "brain"
111 |         limitSize = 2
112 |         print(await extension.tools["search"](query=query, limitSize=limitSize))
113 |         # test only one image
114 |         # print(await extension.tools["inspect"](images=[ImageInfo(url="https://bioimage.io/static/img/bioimage-io-icon.png", title="BioImage.io Icon")], query="What is this?", context_description="Inspect the BioImage.io icon."))
115 |     # Run the async function
116 |     asyncio.run(main())
117 | 


--------------------------------------------------------------------------------
/bioimageio_chatbot/chatbot_extensions/biii_extension.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import requests
  3 | from bs4 import BeautifulSoup
  4 | import pandas as pd
  5 | from typing import List, Optional
  6 | from pydantic import BaseModel, Field
  7 | from bioimageio_chatbot.utils import ChatbotExtension
  8 | from schema_agents import schema_tool
  9 | 
 10 | class BiiiQuery(BaseModel):
 11 |     """Queries parameters for biii.eu search"""
 12 | 
 13 |     queries: List[str] = Field(description="A list of keywords to search for")
 14 | 
 15 | 
 16 | class BiiiRow(BaseModel):
 17 |     """Search result row from biii.eu"""
 18 | 
 19 |     name: str = Field(description="Name")
 20 |     relevance: str = Field(description="Relevance score")
 21 |     image_dimension: Optional[str] = Field(
 22 |         None, description="Supported image dimension"
 23 |     )
 24 |     requires: Optional[str] = Field(description="Dependent software")
 25 |     excerpt: str = Field(description="Description")
 26 | 
 27 | 
 28 | def extract_table_with_links(table, base_url) -> pd.DataFrame:
 29 |     """
 30 |     Extracts a table from HTML and includes hyperlinks in the cells if available.
 31 | 
 32 |     Args:
 33 |     table (bs4.element.Tag): A BeautifulSoup Tag object representing a table.
 34 | 
 35 |     Returns:
 36 |     pd.DataFrame: A DataFrame representation of the table with text and hyperlinks.
 37 |     """
 38 |     rows = table.find_all("tr")
 39 |     data = []
 40 | 
 41 |     for index, row in enumerate(rows):
 42 |         columns = row.find_all(["td", "th"])
 43 |         row_data = []
 44 | 
 45 |         for column in columns:
 46 |             cell_text = column.get_text(strip=True)
 47 | 
 48 |             # Check for a hyperlink in the cell
 49 |             link = column.find("a", href=True)
 50 |             if index != 0 and link and cell_text:
 51 |                 cell_text += f"({link['href'] if link['href'].startswith('http') else base_url + link['href']})"
 52 | 
 53 |             row_data.append(cell_text)
 54 | 
 55 |         data.append(row_data)
 56 | 
 57 |     if data:
 58 |         columns = data[0]
 59 |         columns[0] = "Name"
 60 |         if columns[3] == "Supported Image Dimension":
 61 |             columns[2] = "Logo"
 62 |     df = pd.DataFrame(data[1:], columns=columns) if data and columns else pd.DataFrame()
 63 |     # remove column named "Content type" if exists
 64 |     if "Content type" in df.columns:
 65 |         df = df.drop(columns=["Content type"])
 66 | 
 67 |     # convert to list of BiiiRow
 68 |     df = df.to_dict(orient="records")
 69 |     return [
 70 |         BiiiRow(
 71 |             name=row["Name"],
 72 |             relevance=row["Relevance"],
 73 |             image_dimension=row.get("Supported Image Dimension"),
 74 |             requires=row.get("Requires"),
 75 |             excerpt=row["Excerpt"],
 76 |         )
 77 |         for row in df
 78 |     ]
 79 | 
 80 | 
 81 | def search_biii_with_links(
 82 |     queries: List[str], content_type="software", base_url="https://biii.eu"
 83 | ) -> dict:
 84 |     """
 85 |     Modified search function to include hyperlinks in the extracted tables.
 86 | 
 87 |     Args:
 88 |     queries (List[str]): A list of search queries.
 89 | 
 90 |     Returns:
 91 |     dict: A dictionary where each key is a "Content type" and value is a pandas dataframe of the table with links.
 92 |     """
 93 |     search_base_url = "https://biii.eu/search?search_api_fulltext="
 94 | 
 95 |     for query in queries:
 96 |         url = search_base_url + ",".join(query.split())
 97 |         response = requests.get(url)
 98 |         soup = BeautifulSoup(response.text, "html.parser")
 99 |         tables = soup.find_all("table")
100 | 
101 |         for table in tables:
102 |             caption = table.find("caption")
103 |             if caption:
104 |                 caption = (
105 |                     caption.get_text().strip().replace("Content type: ", "").lower()
106 |                 )
107 |             else:
108 |                 continue  # Skip tables without a caption
109 | 
110 |             if caption != content_type:
111 |                 continue
112 | 
113 |             df = extract_table_with_links(table, base_url)
114 |             return df
115 | 
116 | 
117 | class BiiiSearchResult(BaseModel):
118 |     """Search results from biii.eu"""
119 |     results: List[BiiiRow] = Field(description="Search results from biii.eu")
120 |     base_url: str = Field(
121 |         description="The based URL of the search results, e.g. ImageJ (/imagej) will become <base_url>/imagej"
122 |     )
123 | 
124 | 
125 | class BiiiResponse(BaseModel):
126 |     """Summarize the search results from biii.eu"""
127 | 
128 |     response: str = Field(
129 |         description="The answer to the user's question based on the search results. Can be either a detailed response in markdown format if the search results are relevant to the user's question or 'I don't know'. It should resolve relative URLs in the search results using the base_url."
130 |     )
131 | 
132 | @schema_tool
133 | async def search_biii(
134 |     keywords: List[str] = Field(
135 |         description="A list of search keywords, no space allowed in each keyword."
136 |     ),
137 |     top_k: int = Field(
138 |         10,
139 |         description="The maximum number of search results to return. Should use a small number to avoid overwhelming the user.",
140 |     )):
141 |     """Search software tools on BioImage Informatics Index (biii.eu) is a platform for sharing bioimage analysis software and tools."""
142 |     # limit top_k from 1 to 15
143 |     top_k = max(1, min(top_k, 15))
144 |     print(f"Searching biii.eu with keywords: {keywords}, top_k: {top_k}")
145 |     loop = asyncio.get_running_loop()
146 |     # steps.append(ResponseStep(name="Search on biii.eu", details=dict()))
147 |     results = await loop.run_in_executor(
148 |         None, search_biii_with_links, keywords, "software", ""
149 |     )
150 |     if results:
151 |         results = BiiiSearchResult(
152 |             results=results[: top_k],
153 |             base_url="https://biii.eu",
154 |         )
155 |         return results
156 |     else:
157 |         return f"Sorry I didn't find relevant information in biii.eu about {keywords}"
158 | 
159 | def get_extension():
160 |     return ChatbotExtension(
161 |         id="biii",
162 |         name="Search BioImage Informatics Index (biii.eu)",
163 |         description="Search software tools on BioImage Informatics Index (biii.eu) is a platform for sharing bioimage analysis software and tools. Provide a list of keywords to search for software tools on biii.eu. Returns a list of relevant documents.",
164 |         tools=dict(search=search_biii),
165 |     )
166 | 
167 | 
168 | if __name__ == "__main__":
169 |     results = search_biii_with_links(["image segmentation"])
170 |     # Index(['Name', 'Relevance', 'Logo', 'Supported Image Dimension', 'requires',
171 |     #   'Content type', 'Excerpt'],
172 |     #  dtype='object')
173 |     print(results)
174 | 


--------------------------------------------------------------------------------
/bioimageio_chatbot/chatbot_extensions/image_sc_extension.py:
--------------------------------------------------------------------------------
  1 | import httpx
  2 | import os
  3 | import os
  4 | import urllib.parse
  5 | import asyncio
  6 | import html2text
  7 | import logging
  8 | from pydantic import Field
  9 | from bioimageio_chatbot.utils import ChatbotExtension
 10 | from typing import List, Dict, Any, Optional
 11 | from schema_agents import schema_tool
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | class DiscourseClient:
 16 |     def __init__(self, base_url: str, username: str, api_key: str):
 17 |         self._base_url = base_url
 18 |         self._username = username
 19 |         self._api_key = api_key
 20 | 
 21 |     def _build_query_string(self, query: str, order: str, status: str) -> str:
 22 |         # Construct the query string with the provided parameters.
 23 |         # Note: `urllib.parse.quote` is used to ensure the query is URL encoded.
 24 |         query_components = [
 25 |             f"{query}",
 26 |             f"order:{order}",
 27 |         ]
 28 |         if status:
 29 |             query_components.append(f"status:{status}")
 30 |         return "q=" + urllib.parse.quote(" ".join(query_components))
 31 | 
 32 |     def _get_headers(self) -> Dict[str, str]:
 33 |         return {
 34 |             "Content-Type": "application/json",
 35 |             "Api-Username": self._username,
 36 |             "Api-Key": self._api_key,
 37 |         }
 38 | 
 39 |     def _cleanup_search_results(self, results: Dict[str, Any], top_k: int=10) -> Dict[str, Any]:
 40 |         cleaned_results = {
 41 |             "posts": [
 42 |                 {"id": post["id"], "topic_id": post["topic_id"], "blurb": post["blurb"]}
 43 |                 for post in results.get("posts", [])
 44 |                 if "id" in post and "topic_id" in post and "blurb" in post
 45 |             ],
 46 |             "topics": [
 47 |                 {"title": topic["title"], "slug": topic["slug"]}
 48 |                 for topic in results.get("topics", [])
 49 |                 if "title" in topic and "slug" in topic
 50 |             ]
 51 |         }
 52 |         cleaned_results["posts"] = cleaned_results["posts"][:top_k]
 53 |         cleaned_results["topics"] = cleaned_results["topics"][:top_k]
 54 |         return cleaned_results
 55 | 
 56 |     async def search_image_sc(self, query: str = Field(..., description="The search query string."),
 57 |             top_k: int = Field(..., gt=0, description="Maximum number of search results to return."),
 58 |             order: Optional[str] = Field("latest", description="Order of the search results, options: latest, likes, views, latest_topic."),
 59 |             status: Optional[str] = Field(None, description="The status filter for the search results, options: solved, unsolved, open, closed."),
 60 |         ):
 61 |         """Search the Image.sc Forum(a forum for scientific image software) for posts and topics."""
 62 |         # Prepare headers for authentication
 63 |         headers = self._get_headers()
 64 | 
 65 |         # Build the query string
 66 |         query_string = self._build_query_string(query, order, status)
 67 |         
 68 |         # Construct the full URL
 69 |         url = f"{self._base_url}/search.json?{query_string}"
 70 |         logger.info(f"Searching Image.sc Forum for: {query}")
 71 | 
 72 |         # Perform the asynchronous HTTP GET request
 73 |         async with httpx.AsyncClient() as client:
 74 |             response = await client.get(url, headers=headers)
 75 | 
 76 |         # Check if the request was successful
 77 |         if response.status_code == 200:
 78 |             return self._cleanup_search_results(response.json(), top_k)  # Return the JSON response
 79 |         else:
 80 |             response.raise_for_status()  # Raise an error for bad responses
 81 | 
 82 |     async def read_image_sc_posts(self,
 83 |             type: str = Field(..., description="type: `post` or `topic`"),
 84 |             id: int = Field(..., description="topic id")
 85 |         ):
 86 |         """Read a single or all the posts in a topic from the Image.sc Forum (a discussion forum for scientific image software)."""
 87 |         if type == "post":
 88 |             return await self.get_post_content(id)
 89 |         elif type == "topic":
 90 |             return await self.get_topic_content(id)
 91 |     
 92 |     async def get_topic_content(self, topic_id: int) -> Dict[str, Any]:
 93 |         url = f"{self._base_url}/t/{topic_id}.json"
 94 |         headers = self._get_headers()
 95 |         async with httpx.AsyncClient() as client:
 96 |             response = await client.get(url, headers=headers)
 97 |         response.raise_for_status()
 98 |         topic_data = response.json()
 99 | 
100 |         post_ids = [post['id'] for post in topic_data['post_stream']['posts']]
101 |         messages = await asyncio.gather(*[self.get_post_content(post_id) for post_id in post_ids])
102 |         posts = []
103 |         for msg in messages:
104 |             posts.append(f"{msg['username']}: {html2text.html2text(msg['content'])}")
105 |         return {"posts": posts, "url": f"{self._base_url}/t/{topic_data['slug']}"}
106 | 
107 |     async def get_post_content(self, post_id: int) -> str:
108 |         url = f"{self._base_url}/posts/{post_id}.json"
109 |         headers = self._get_headers()
110 |         async with httpx.AsyncClient() as client:
111 |             response = await client.get(url, headers=headers)
112 |         response.raise_for_status()
113 |         post_data = response.json()
114 |         return {"username": post_data["username"], "content": post_data["cooked"], "url": f"{self._base_url}/t/{post_data['topic_slug']}"}
115 |     
116 | def get_extension():
117 |     username = os.environ.get("DISCOURSE_USERNAME")
118 |     api_key = os.environ.get("DISCOURSE_API_KEY")
119 |     if not username or not api_key:
120 |         print("WARNING: Image.sc Forum extensions require DISCOURSE_USERNAME and DISCOURSE_API_KEY environment variables to be set, disabling it for now.")
121 |         return None
122 | 
123 |     discourse_client = DiscourseClient(base_url="https://forum.image.sc/", username=username, api_key=api_key)
124 |     return ChatbotExtension(
125 |         id="image_sc_forum",
126 |         name="Search image.sc Forum",
127 |         description="Search the Image.sc Forum for posts and topics. Provide a search query to search the Image.sc Forum for posts or post, and read a specific topic",
128 |         tools=dict(
129 |             search=schema_tool(discourse_client.search_image_sc),
130 |             read=schema_tool(discourse_client.read_image_sc_posts)
131 |         )
132 |     )
133 | 
134 | if __name__ == "__main__":
135 |     import json
136 |     async def main():
137 |         discourse_client = DiscourseClient(base_url="https://forum.image.sc", username="oeway", api_key="1b8819f9f95bc7f4eb51d3f9bac6d4dd0245569314a7801f670c1067d06c8268")
138 |         results = await discourse_client.search_image_sc("python", 5, "latest")
139 |         print(json.dumps(results))
140 |         results = await discourse_client.read_image_sc_posts('topic', 44826)
141 |         print(results)
142 | 
143 |     # Run the async function
144 |     asyncio.run(main())
145 | 


--------------------------------------------------------------------------------
/bioimageio_chatbot/jsonschema_pydantic.py:
--------------------------------------------------------------------------------
  1 | """Jsonschema to pydantic schema from https://github.com/c32168/dyntamic"""
  2 | from typing import Annotated, Union, Any, Optional
  3 | 
  4 | import typing
  5 | from pydantic import create_model
  6 | from pydantic.fields import Field, PydanticUndefined
  7 | 
  8 | Model = typing.TypeVar('Model', bound='BaseModel')
  9 | 
 10 | 
 11 | class DyntamicFactory:
 12 | 
 13 |     TYPES = {
 14 |         'string': str,
 15 |         'array': list,
 16 |         'boolean': bool,
 17 |         'integer': int,
 18 |         'float': float,
 19 |         'number': float,
 20 |         'null': None,
 21 |     }
 22 | 
 23 |     def __init__(self,
 24 |                  json_schema: dict,
 25 |                  base_model: Union[type[Model], tuple[type[Model], ...], None] = None,
 26 |                  ref_template: str = "definitions"
 27 |                  ) -> None:
 28 |         """
 29 |         Creates a dynamic pydantic model from a JSONSchema, dumped from and existing Pydantic model elsewhere.
 30 |             JSONSchema dump must be called with ref_template='{model}' like:
 31 | 
 32 |             SomeSampleModel.model_json_schema(ref_template='{model}')
 33 |             Use:
 34 |             >> _factory = DyntamicFactory(schema)
 35 |             >> _factory.make()
 36 |             >> _model = create_model(_factory.class_name, **_factory.model_fields)
 37 |             >> _instance = dynamic_model.model_validate(json_with_data)
 38 |             >> validated_data = model_instance.model_dump()
 39 |         """
 40 |         self.class_name = json_schema.get('title')
 41 |         self.description = json_schema.get('description')
 42 |         self.class_type = json_schema.get('type')
 43 |         self.required = json_schema.get('required', [])
 44 |         self.default = json_schema.get('default')
 45 |         self.raw_fields = json_schema.get('properties')
 46 |         self.ref_template = ref_template
 47 |         self.definitions = json_schema.get(ref_template)
 48 |         self.fields = {}
 49 |         self.model_fields = {}
 50 |         self._base_model = base_model
 51 |         
 52 |     def get_factory(self, field_name, field) -> Any:
 53 |         """Get the factory for a given type"""
 54 |         f_type = field.get('type')
 55 |         if f_type is None and 'anyOf' in field:
 56 |             factory = tuple([self.get_factory(None, t) for t in field.get('anyOf')])
 57 |             if None in factory and len(factory) == 2:
 58 |                 if field_name and field_name not in self.required:
 59 |                     factory = [f for f in factory if f is not None][0]
 60 |                 else:
 61 |                     factory = Optional[[f for f in factory if f is not None][0]]
 62 |             else:
 63 |                 factory = Union[factory]
 64 |         else:
 65 |             factory = self.TYPES.get(f_type)
 66 |         return factory
 67 | 
 68 |     def make(self) -> Model:
 69 |         """Factory method, dynamically creates a pydantic model from JSON Schema"""
 70 |         for field in self.raw_fields:
 71 |             if field not in self.required:
 72 |                 default = self.raw_fields[field].get('default')
 73 |             else:
 74 |                 default = PydanticUndefined
 75 |             if '$ref' in self.raw_fields[field]:
 76 |                 model_name = self.raw_fields[field].get('$ref')
 77 |                 # resolve $ref
 78 |                 # consider all the cases in standard json schema
 79 |                 
 80 |                 if model_name.startswith('#/'):
 81 |                     model_name = model_name.replace('#/', '')
 82 |                 elif model_name.startswith('#'):
 83 |                     model_name = model_name.replace('#', '')
 84 |                 
 85 |                 if model_name.startswith(self.ref_template+"/"):
 86 |                     model_name = model_name.replace(self.ref_template+"/", '')
 87 | 
 88 |                 self._make_nested(model_name, field, default)
 89 |             else:
 90 |                 factory = self.get_factory(field, self.raw_fields[field])
 91 |                 if factory is None:
 92 |                     factory = Any
 93 |                 if factory == list:
 94 |                     items = self.raw_fields[field].get('items')
 95 |                     if self.ref_template in items:
 96 |                         self._make_nested(items.get(self.ref_template), field)
 97 |                 
 98 |                 self._make_field(factory, field, self.raw_fields.get('title'), self.raw_fields.get(field).get('description'), default=default)
 99 |         model = create_model(self.class_name, __base__=self._base_model, **self.model_fields)
100 |         model.__doc__ = self.description
101 |         return model
102 | 
103 |     def _make_nested(self, model_name: str, field, default) -> None:
104 |         """Create a nested model"""
105 |         level = DyntamicFactory({self.ref_template: self.definitions} | self.definitions.get(model_name),
106 |                                 ref_template=self.ref_template)
107 |         level.make()
108 |         model = create_model(model_name, **level.model_fields)
109 |         model.__doc__ = level.description
110 |         self._make_field(model, field, field, level.description, default)
111 | 
112 |     def _make_field(self, factory, field, alias, description, default) -> None:
113 |         """Create an annotated field"""
114 |         # if field not in self.required:
115 |         #     factory_annotation = Annotated[Union[factory, None], factory]
116 |         # else:
117 |         factory_annotation = factory
118 |         self.model_fields[field] = (
119 |             Annotated[factory_annotation, Field(default_factory=None, alias=alias, description=description)], default)
120 | 
121 | def json_schema_to_pydantic_model(schema):
122 |     f = DyntamicFactory(schema)
123 |     return f.make()
124 | 
125 | if __name__ == "__main__":
126 |     input_schema = {
127 |         "title": "RunMacro",
128 |         "description": "Run a macro",
129 |         "type": "object",
130 |         "properties": {
131 |             "macro": {
132 |                 "type": "string",
133 |                 "description": "The macro to run"
134 |             },
135 |             "args": {"$ref": "#/definitions/Args"},
136 |             "query": {"description": "The search query string.", "title": "Query", "type": "string"},
137 |             "pageSize": {"default": 10, "description": "Number of search results per page.", "exclusiveMinimum": 0, "title": "Pagesize", "type": "integer"},
138 |             "page": {"default": 1, "description": "Page number of the search results.", "title": "Page", "type": "integer"},
139 |             "sortOrder": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": "descending", "description": "Sort order: ascending or descending.", "title": "Sortorder"}
140 |         },
141 |         "required": ["macro", "query"],
142 |         "definitions": {
143 |             "Args": {
144 |                 "title": "Args",
145 |                 "type": "object",
146 |                 "description": "Arguments for the macro",
147 |                 "properties": {
148 |                     "arg1": {
149 |                         "type": "string",
150 |                         "description": "arg1"
151 |                     }
152 |                 }
153 |             }
154 |         }
155 |     }
156 |     RunMacroClass = json_schema_to_pydantic_model(input_schema)
157 |     assert RunMacroClass.__name__ == input_schema["title"]
158 |     # assert RunMacroClass.__doc__ == input_schema["description"]
159 |     m = RunMacroClass(macro="test", args={"test": "test"}, query="test")
160 |     schema = RunMacroClass.model_json_schema()
161 |     print(schema)
162 |     assert schema['title'] == input_schema['title']
163 |     assert schema['description'] == input_schema['description']
164 |     assert schema['properties']['macro']["description"] == input_schema['properties']['macro']["description"]
165 |     assert schema['properties']['args']['allOf'][0]['$ref'] == "#/$defs/Args"
166 |     assert m.macro == "test"
167 |     


--------------------------------------------------------------------------------
/bioimageio_chatbot/chatbot_extensions/docs_extension.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import asyncio
  3 | from functools import partial
  4 | from pydantic import BaseModel, Field
  5 | from typing import Any, Dict, Optional
  6 | from bioimageio_chatbot.knowledge_base import load_knowledge_base
  7 | from bioimageio_chatbot.utils import get_manifest
  8 | from bioimageio_chatbot.utils import ChatbotExtension
  9 | from schema_agents import schema_tool
 10 | 
 11 | class DocWithScore(BaseModel):
 12 |     """A document with an associated relevance score."""
 13 | 
 14 |     doc: str = Field(description="The document retrieved.")
 15 |     score: float = Field(description="The relevance score of the retrieved document.")
 16 | 
 17 | 
 18 | async def run_extension(
 19 |     docs_store_dict,
 20 |     channel_id,
 21 |     query: str = Field(
 22 |         description="The query used to retrieve documents related to the user's request. It should be a sentence which will be used to match descriptions using the OpenAI text embedding to match document chunks in a vector database."
 23 |     ),
 24 |     top_k: int = Field(
 25 |         3,
 26 |         description="The maximum number of search results to return. Should use a small number to avoid overwhelming the user.",
 27 |     ),
 28 | ):
 29 |     channel_results = []
 30 |     # channel_urls = []
 31 |     # limit top_k from 1 to 15
 32 |     top_k = max(1, min(top_k, 15))
 33 |     docs_store = docs_store_dict[channel_id]
 34 | 
 35 |     print(f"Retrieving documents from database {channel_id} with query: {query}")
 36 |     channel_results.append(
 37 |         await docs_store.asimilarity_search_with_relevance_scores(
 38 |             query, k=top_k
 39 |         )
 40 |     )
 41 | 
 42 |     docs_with_score = [
 43 |         DocWithScore(
 44 |             doc=doc.page_content,
 45 |             score=round(score, 2),
 46 |             metadata=doc.metadata,  # , base_url=base_url
 47 |         )
 48 |         for results_with_scores in channel_results
 49 |         for doc, score in results_with_scores
 50 |     ]
 51 |     # sort by relevance score
 52 |     docs_with_score = sorted(docs_with_score, key=lambda x: x.score, reverse=True)[
 53 |         : top_k
 54 |     ]
 55 | 
 56 |     if len(docs_with_score) > 2:
 57 |         print(
 58 |             f"Retrieved documents:\n{docs_with_score[0].doc[:20] + '...'} (score: {docs_with_score[0].score})\n{docs_with_score[1].doc[:20] + '...'} (score: {docs_with_score[1].score})\n{docs_with_score[2].doc[:20] + '...'} (score: {docs_with_score[2].score})"
 59 |         )
 60 |     else:
 61 |         print(f"Retrieved documents:\n{docs_with_score}")
 62 |     return docs_with_score
 63 | 
 64 | 
 65 | def title_case(s):
 66 |     return s.replace(".", " ").replace("-", " ").title().replace(" ", "")
 67 | 
 68 | def create_tool(docs_store_dict, collection):
 69 |     async def run_extension(
 70 |         query: str = Field(
 71 |             description="The query used to retrieve documents related to the user's request. It should be a sentence which will be used to match descriptions using the OpenAI text embedding to match document chunks in a vector database."
 72 |         ),
 73 |         top_k: int = Field(
 74 |             3,
 75 |             description="The maximum number of search results to return. Should use a small number to avoid overwhelming the user.",
 76 |         ),
 77 |     ):
 78 |         channel_results = []
 79 |         # channel_urls = []
 80 |         # limit top_k from 1 to 15
 81 |         top_k = max(1, min(top_k, 15))
 82 |         docs_store = docs_store_dict[collection["id"]]
 83 | 
 84 |         print(f"Retrieving documents from database {collection['id']} with query: {query}")
 85 |         channel_results.append(
 86 |             await docs_store.asimilarity_search_with_relevance_scores(
 87 |                 query, k=top_k
 88 |             )
 89 |         )
 90 | 
 91 |         docs_with_score = [
 92 |             DocWithScore(
 93 |                 doc=doc.page_content,
 94 |                 score=round(score, 2),
 95 |                 metadata=doc.metadata,  # , base_url=base_url
 96 |             )
 97 |             for results_with_scores in channel_results
 98 |             for doc, score in results_with_scores
 99 |         ]
100 |         # sort by relevance score
101 |         docs_with_score = sorted(docs_with_score, key=lambda x: x.score, reverse=True)[
102 |             : top_k
103 |         ]
104 | 
105 |         if len(docs_with_score) > 2:
106 |             print(
107 |                 f"Retrieved documents:\n{docs_with_score[0].doc[:20] + '...'} (score: {docs_with_score[0].score})\n{docs_with_score[1].doc[:20] + '...'} (score: {docs_with_score[1].score})\n{docs_with_score[2].doc[:20] + '...'} (score: {docs_with_score[2].score})"
108 |             )
109 |         else:
110 |             print(f"Retrieved documents:\n{docs_with_score}")
111 |         return docs_with_score
112 | 
113 |     channel_id = collection["id"]
114 |     base_url = collection.get("base_url")
115 |     reference = collection.get("reference")
116 |     if base_url:
117 |         base_url_prompt = f" The documentation is available at {base_url}."
118 |     else:
119 |         base_url_prompt = ""
120 |     
121 |     if reference:
122 |         reference_prompt = f" The reference is available at {reference}."
123 |     else:
124 |         reference_prompt = ""
125 |     run_extension.__name__ = "Search" + title_case(channel_id)
126 |     run_extension.__doc__ = f"""Searching documentation for {channel_id}: {collection['description']}.{base_url_prompt}. {reference_prompt}"""
127 |     return schema_tool(run_extension)
128 | 
129 | INFO_KEYS = ["name","description", "authors", "license", "reference"]
130 | 
131 | def get_extension():
132 |     collections = get_manifest()["collections"]
133 |     knowledge_base_path = os.environ.get(
134 |         "BIOIMAGEIO_KNOWLEDGE_BASE_PATH", "./bioimageio-knowledge-base"
135 |     )
136 |     assert (
137 |         knowledge_base_path is not None
138 |     ), "Please set the BIOIMAGEIO_KNOWLEDGE_BASE_PATH environment variable to the path of the knowledge base."
139 |     if not os.path.exists(knowledge_base_path):
140 |         print(
141 |             f"The knowledge base is not found at {knowledge_base_path}, will download it automatically."
142 |         )
143 |         os.makedirs(knowledge_base_path, exist_ok=True)
144 | 
145 |     knowledge_base_path = os.environ.get(
146 |         "BIOIMAGEIO_KNOWLEDGE_BASE_PATH", "./bioimageio-knowledge-base"
147 |     )
148 |     docs_store_dict = load_knowledge_base(knowledge_base_path)
149 |     
150 |     docs_tools = {}
151 |     docs_info = {}
152 |     books_tools = {}
153 |     books_info = {}
154 |     for col in collections:
155 |         info = {k: col[k] for k in INFO_KEYS if k in col}
156 |         if "book" in col["id"]:
157 |             books_tools["search_" + col["id"]] = create_tool(docs_store_dict, col)
158 |             if info:
159 |                 books_info["search_" + col["id"]] = info
160 |         else:
161 |             docs_tools["search_" + col["id"]] = create_tool(docs_store_dict, col)
162 |             if info:
163 |                 docs_info["search_" + col["id"]] = info
164 |             
165 | 
166 |     if docs_tools:
167 |         sinfo1 = ChatbotExtension(
168 |             id="docs",
169 |             name="Search BioImage Docs",
170 |             description="Search information in the documents of the bioimage.io knowledge base. Provide a list of keywords to search information in the documents. Returns a list of relevant documents. Ensure that the reference to the document is ALWAYS included!",
171 |             tools=docs_tools,
172 |             info=docs_info
173 |         )
174 |     if books_tools:
175 |         sinfo2 = ChatbotExtension(
176 |             id="books",
177 |             name="Search BioImage Books",
178 |             description="Search information in BioImage books. Provide a list of keywords to search information in the books. Returns a list of relevant documents. Ensure that the reference to the book is ALWAYS included!",
179 |             tools=books_tools,
180 |             info=books_info
181 |         )
182 | 
183 |     return sinfo1, sinfo2
184 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
  1 | # Installation Guide
  2 | 
  3 | ## Setup the Chatbot locally
  4 | 
  5 | If you want to run the chatbot server locally, you need to have an OpenAI API key. You can get one by signing up at [OpenAI](https://beta.openai.com/). Once you have your API key, you can install the chatbot package via pip and set the environment variables:
  6 | 
  7 | ```bash
  8 | pip install bioimageio-chatbot
  9 | ```
 10 | 
 11 | ```bash
 12 | export OPENAI_API_KEY=sk-xxxxxxxx # Required
 13 | export BIOIMAGEIO_KNOWLEDGE_BASE_PATH=/path/to/bioimageio-knowledge-base  # Optional, default to ./bioimageio-knowledge-base 
 14 | export BIOIMAGEIO_CHAT_LOGS_PATH=./chat-logs # Optional, default to ./chat-logs
 15 | ```
 16 | 
 17 | The chatbot server backend has been tested on Ubuntu and MacOS, it should work on Windows as well.
 18 | 
 19 | ## Command-line Interface
 20 | 
 21 | BioImage.IO Chatbot comes with a command-line interface to facilitate server management, connection to external servers, and knowledge base creation.
 22 | 
 23 | You can access the command-line interface by running `python -m bioimageio_chatbot` or the `bioimageio-chatbot` command.
 24 | 
 25 | Below are the available commands and options:
 26 | 
 27 | ### Initialize Knowledge Base
 28 | 
 29 | To initialize the knowledge base, use the `init` command:
 30 | 
 31 | ```bash
 32 | python -m bioimageio_chatbot init
 33 | ```
 34 | 
 35 | This will load the knowledge base from the location specified by the `BIOIMAGEIO_KNOWLEDGE_BASE_PATH` environment variable, or use the default path `./bioimageio-knowledge-base`. If the knowledge base is not found, it will be downloaded from the predefined URL (by default, it uses https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimageio-knowledge-base. It can be overridden with `BIOIMAGEIO_KNOWLEDGE_BASE_URL`).
 36 | 
 37 | NOTE: It may take some time to download the knowledge base depending on your internet connection. 
 38 | **Example:**
 39 | 
 40 | ```bash
 41 | export BIOIMAGEIO_KNOWLEDGE_BASE_PATH="./my_knowledge_base"
 42 | python -m bioimageio_chatbot init
 43 | ```
 44 | 
 45 | After running the `init` command, it will list the databases loaded into the knowledge base.
 46 | 
 47 | #### Start Server
 48 | 
 49 | To start your own server entirely, use the `start-server` command:
 50 | 
 51 | ```bash
 52 | python -m bioimageio_chatbot start-server [--host HOST] [--port PORT] [--public-base-url PUBLIC_BASE_URL]
 53 | ```
 54 | 
 55 | **Options:**
 56 | 
 57 | - `--host`: The host address to run the server on (default: `0.0.0.0`)
 58 | - `--port`: The port number to run the server on (default: `9000`)
 59 | - `--public-base-url`: The public base URL of the server (default: `http://127.0.0.1:9000`)
 60 | - `--login-required`: Whether to require users to log in before accessing the chatbot (default to not require login)
 61 | 
 62 | **Example:**
 63 | 
 64 | ```bash
 65 | export OPENAI_API_KEY=sk-xxxxxxxx
 66 | export BIOIMAGEIO_KNOWLEDGE_BASE_PATH=./bioimageio-knowledge-base
 67 | export BIOIMAGEIO_CHAT_LOGS_PATH=./chat-logs
 68 | python -m bioimageio_chatbot start-server --host=0.0.0.0 --port=9000
 69 | ```
 70 | This will create a local server, and the BioImage.IO Chatbot is available at: https://bioimage.io/chat?server=http://127.0.0.1:9000
 71 | 
 72 | Open the link in a browser, and you will see the chat interface.
 73 | 
 74 | Please note that the chatbot server may not be accessible to users outside your local network.
 75 | 
 76 | A user guide and technical overview can be found [here](./technical-overview.md).
 77 | 
 78 | To be able to share your chatbot service over the internet (especially for users outside your local network), you will need to expose your server publicly. Please, see [Connect to Server](#connect-to-server)
 79 | 
 80 | 
 81 | #### Connect to Server
 82 | 
 83 | To help you share your chatbot with users external to your local network, you can use our public [BioEngine](https://aicell.io/project/bioengine/) server as a proxy.
 84 | 
 85 | To connect to an external BioEngine server, use the `connect-server` command:
 86 | 
 87 | ```bash
 88 | python -m bioimageio_chatbot connect-server [--server-url SERVER_URL]
 89 | ```
 90 | 
 91 | **Options:**
 92 | 
 93 | - `--server-url`: The URL of the external BioEngine server to connect to (default: `https://ai.imjoy.io`)
 94 | - `--login-required`: Whether to require users to log in before accessing the chatbot (default to not require login)
 95 | 
 96 | **Example:**
 97 | 
 98 | ```bash
 99 | export OPENAI_API_KEY=sk-xxxxxxxx
100 | export BIOIMAGEIO_KNOWLEDGE_BASE_PATH=./bioimageio-knowledge-base
101 | export BIOIMAGEIO_CHAT_LOGS_PATH=./chat-logs
102 | python -m bioimageio_chatbot connect-server --server-url=https://ai.imjoy.io
103 | ```
104 | 
105 | First, you will be asked to log in with a hypha account. Either your GitHub or Google account can be reused. Then, the following message containing a link to the chatbot will be displayed: 'The BioImage.IO Chatbot is available at: https://bioimage.io/chat?server=https://ai.imjoy.io'
106 | 
107 | Leave your chatbot running to enable users inside or outside your network to access it from this URL.
108 | 
109 | #### User Management
110 | 
111 | If you set `--login-required` when running `start-server` or `connect-server`, users will be required to log in before accessing the chatbot. The chatbot will then collect the user's GitHub or Google account information and store it its logs for future analysis.
112 | 
113 | You can also provide an optional environment variable `BIOIMAGEIO_AUTHORIZED_USERS_PATH` for the chatbot to load a list of authorized users. The file should be a JSON file containing a list of GitHub or Google account names. For example:
114 | 
115 | ```json
116 | {
117 |     "users": [
118 |         {"email": "user1@email.org"}
119 |     ]
120 | }
121 | ```
122 | 
123 | #### Create Knowledge Base
124 | 
125 | To create a new knowledge base, use the `create-knowledge-base` command:
126 | 
127 | ```bash
128 | python -m bioimageio_chatbot create-knowledge-base [--output-dir OUTPUT_DIR]
129 | ```
130 | 
131 | **Options:**
132 | 
133 | - `--output-dir`: The directory where the knowledge base will be created (default: `./bioimageio-knowledge-base`)
134 | 
135 | **Example:**
136 | 
137 | ```bash
138 | export OPENAI_API_KEY=sk-xxxxxxxx
139 | export BIOIMAGEIO_KNOWLEDGE_BASE_PATH=./bioimageio-knowledge-base
140 | python -m bioimageio_chatbot create-knowledge-base --output-dir=./bioimageio-knowledge-base
141 | ```
142 | 
143 | 
144 | ### Running the BioImage.IO Chatbot in a Docker Container
145 | 
146 | #### Step 1: Build the Docker Image
147 | 
148 | To run the BioImage.IO Chatbot using a Docker container, follow these steps. First, build the Docker image by running the following command in your terminal:
149 | 
150 | ```bash
151 | docker build -t bioimageio-chatbot:latest .
152 | ```
153 | 
154 | If you prefer to use a pre-built Docker image from Docker Hub, you can pull the image using the following command:
155 | 
156 | ```bash
157 | docker pull alalulu/bioimageio-chatbot:latest
158 | ```
159 | 
160 | 
161 | #### Step 2: Start the Chatbot Server
162 | 
163 | After building the Docker image, you can start the chatbot server with the following command:
164 | 
165 | ```bash
166 | docker run -e OPENAI_API_KEY=sk-xxxxxxxxxxxxx -e BIOIMAGEIO_KNOWLEDGE_BASE_PATH=/knowledge-base -p 3000:9000 -v /path/to/local/knowledge-base:/knowledge-base bioimageio-chatbot:latest python -m bioimageio_chatbot start-server --host=0.0.0.0 --port=9000 --public-base-url=http://localhost:3000
167 | ```
168 | 
169 | Replace the placeholders in the command with the following values:
170 | 
171 | - `sk-xxxxxxxxxxxxx`: Your OpenAI API key.
172 | - `/path/to/local/knowledge-base`: The local path to your knowledge base folder.
173 | 
174 | Optionally, for improved reproducibility, you can change `latest` to a version tag such as `v0.1.18`.
175 | 
176 | #### Step 3: Access the Chatbot
177 | 
178 | The BioImage.IO Chatbot is now running in the Docker container. You can access it locally in your web browser by visiting:
179 | 
180 | ```
181 | https://bioimage.io/chat?server=http://localhost:3000
182 | ```
183 | 
184 | Make sure to replace `3000` with the host port you specified in the `docker run` command.
185 | 
186 | 
187 | Enjoy using the BioImage.IO Chatbot!
188 | 
189 | 


--------------------------------------------------------------------------------
/bioimageio_chatbot/static/bioimage-model-zoo-extension.imjoy.html:
--------------------------------------------------------------------------------
  1 | <docs lang="markdown">
  2 | [TODO: write documentation for this plugin.]
  3 | </docs>
  4 | 
  5 | <config lang="json">
  6 | {
  7 |     "name": "SearchBioImageModelZoo",
  8 |     "type": "web-python",
  9 |     "version": "0.1.0",
 10 |     "description": "BioImage.IO Chatbot Extension for getting information about models, applications, datasets, etc. in the BioImage Model Zoo.",
 11 |     "tags": [],
 12 |     "ui": "",
 13 |     "cover": "",
 14 |     "inputs": null,
 15 |     "outputs": null,
 16 |     "flags": [],
 17 |     "icon": "extension",
 18 |     "api_version": "0.1.8",
 19 |     "env": "",
 20 |     "permissions": [],
 21 |     "requirements": ["pydantic"],
 22 |     "dependencies": []
 23 | }
 24 | </config>
 25 | 
 26 | <script lang="python">
 27 | import sys
 28 | import io
 29 | from imjoy import api
 30 | from js import fetch
 31 | from pydantic import BaseModel, Field
 32 | from typing import Callable, Type, Optional
 33 | from typing import List, Optional, Dict, Any
 34 | 
 35 | class ResourceType(str):
 36 |     MODEL = "model"
 37 |     DATASET = "dataset"
 38 |     APPLICATION = "application"
 39 | 
 40 | def normalize_text(text: str) -> str:
 41 |     return text.replace('_', ' ').lower()
 42 | 
 43 | def matches_keywords(text: str, keywords: List[str]) -> bool:
 44 |     normalized_text = normalize_text(text)
 45 |     return any(keyword in normalized_text for keyword in keywords)
 46 | 
 47 | def search_item(item: Dict[str, Any], keywords: List[str]) -> bool:
 48 |     search_fields = [item.get('id', ''), item.get('nickname', ''), item.get('name', ''),
 49 |         item.get('nickname_icon', ''), item.get('license', ''), item.get('description', ''), item.get('created')
 50 |     ] + [tag for tag in item.get('tags', [])]
 51 |     search_fields += [author['name'] for author in item.get('authors', [])]
 52 |     return any(matches_keywords(field, keywords) for field in search_fields)
 53 | 
 54 | def search(keywords, type, top_k, resource_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
 55 |     keywords = [normalize_text(keyword) for keyword in keywords]
 56 |     filtered_items = []
 57 |     for item in resource_items:
 58 |         if type and item.get('type') != type:
 59 |             continue
 60 |         if search_item(item, keywords):
 61 |             filtered_items.append(item)
 62 |         if len(filtered_items) == top_k:
 63 |             break
 64 |     return filtered_items
 65 | 
 66 | async def load_model_info():
 67 |     response = await fetch("https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/collection.json")
 68 |     model_info = await response.json()
 69 |     model_info = model_info.to_py()
 70 |     resource_items = model_info['collection']
 71 |     return resource_items
 72 | 
 73 | def execute_code(script, context=None):
 74 |     if context is None:
 75 |         context = {}
 76 | 
 77 |     # Redirect stdout and stderr to capture their output
 78 |     original_stdout = sys.stdout
 79 |     original_stderr = sys.stderr
 80 |     sys.stdout = io.StringIO()
 81 |     sys.stderr = io.StringIO()
 82 | 
 83 |     try:
 84 |         # Create a copy of the context to avoid modifying the original
 85 |         local_vars = context.copy()
 86 | 
 87 |         # Execute the provided Python script with access to context variables
 88 |         exec(script, local_vars)
 89 | 
 90 |         # Capture the output from stdout and stderr
 91 |         stdout_output = sys.stdout.getvalue()
 92 |         stderr_output = sys.stderr.getvalue()
 93 | 
 94 |         return {
 95 |             "stdout": stdout_output,
 96 |             "stderr": stderr_output,
 97 |             # "context": local_vars  # Include context variables in the result
 98 |         }
 99 |     except Exception as e:
100 |         return {
101 |             "stdout": "",
102 |             "stderr": str(e),
103 |             # "context": context  # Include context variables in the result even if an error occurs
104 |         }
105 |     finally:
106 |         # Restore the original stdout and stderr
107 |         sys.stdout = original_stdout
108 |         sys.stderr = original_stderr
109 | 
110 | async def register_chatbot_extension(register):
111 |     resource_items = await load_model_info()
112 |     types = set()
113 |     tags = set()
114 |     for resource in resource_items:
115 |         types.add(resource['type'])
116 |         tags.update(resource['tags'])
117 |     types = list(types)
118 |     tags = list(tags)[:5]
119 |     resource_item_stats = f"""- keys: {list(resource_items[0].keys())}\n- resource types: {types}\n- Exampletags: {tags}\n""" #Here is an example: {resource_items[0]}
120 | 
121 |     class ModelZooInfoScript(BaseModel):
122 |         script: str = Field(..., description="""Executable python script (Python runtime: Pyodide) for querying information""")
123 |     
124 |     ModelZooInfoScript.__doc__ = (
125 |         "Search the BioImage Model Zoo for statistical information by executing Python3 scripts on the resource items."
126 |         "For exampling counting models, applications, and datasets filtered by tags in the BioImage Model Zoo (bioimage.io). "
127 |         "The generated scripts will be executed browser pyodide environment, the script can access data through the 'resources' local variable, containing zoo resources as dictionaries. "
128 |         "Handle any missing fields in zoo items, and ensure outputs are directed to stdout. "
129 |         "Filter resources by the 'type' key without making remote server requests. 'resources' variable details:\\n"
130 |     ) + resource_item_stats
131 | 
132 | 
133 |     class ModelZooSearchInput(BaseModel):
134 |         """Search the BioImage Model Zoo (bioimage.io) resource items such as models, applications, datasets, etc. in the model zoo and return detailed information. The link format to the models etc. is: https://bioimage.io/#/?id=[ResourceID]"""
135 |         keywords: List[str] = Field(..., description="List of keywords to search for in the model zoo.")
136 |         top_k: int = Field(3, description="The maximum number of search results to return. Default is 3. Please be aware each item may contain a large amount of data.")
137 |         type: Optional[ResourceType] = Field(None, description="The type of resource to search for. Options include 'model', 'dataset', 'application'.")
138 | 
139 | 
140 |     def get_schema():
141 |         return {
142 |             "run_script": ModelZooInfoScript.schema(),
143 |             "search": ModelZooSearchInput.schema()
144 |         }
145 | 
146 |     async def execute_script(kwargs):
147 |         info_script = ModelZooInfoScript.parse_obj(kwargs)
148 |         result = execute_code(info_script.script, {"resources": resource_items})
149 |         return result
150 | 
151 |     async def execute_search(kwargs):
152 |         config = ModelZooSearchInput.parse_obj(kwargs)
153 |         result = search(config.keywords, config.type, config.top_k, resource_items)
154 |         return result
155 | 
156 |     await register({
157 |         "_rintf": True,
158 |         "id": "bioimage_model_zoo",
159 |         "type": "bioimageio-chatbot-extension",
160 |         "name": "BioImage Model Zoo",
161 |         "description": "Getting information about models, applications, datasets, etc. in the BioImage Model Zoo. It takes a list of keywords or a python script to query the resources in the BioImage Model Zoo.",
162 |         "get_schema": get_schema,
163 |         "tools": {
164 |             "run_script": execute_script,
165 |             "search": execute_search,
166 |         }
167 |     })
168 | 
169 | 
170 | 
171 | class ImJoyPlugin():
172 |     async def setup(self):
173 |         if api.registerChatbotExtension:
174 |             # Loading inside the chatbot
175 |             await register_chatbot_extension(api.registerChatbotExtension)
176 |             await api.showMessage("BioImage Model Zoo Chatbot Extension registered")
177 |         else:
178 |             # Use together with the chatbot
179 |             chatbot = await api.getWindow("BioImage.IO Chatbot")
180 |             if chatbot:
181 |                 await register_chatbot_extension(chatbot.registerExtension)
182 |                 await api.showMessage("BioImage Model Zoo Chatbot Extension registered")
183 | 
184 |         await api.log('initialized')
185 | 
186 | api.export(ImJoyPlugin())
187 | </script>
188 | 


--------------------------------------------------------------------------------
/docs/usage-example.md:
--------------------------------------------------------------------------------
 1 | # BioImage.IO Chatbot Usage Example Guide
 2 | 
 3 | ## Introduction
 4 | Welcome to the world of BioImage.IO Chatbot, a revolutionary conversational assistant designed exclusively for the bioimage community. Our chatbot is more than just a virtual conversationalist; it's a sophisticated AI-powered tool that offers personalized, context-aware responses by drawing from a diverse array of databases, tool-specific documentation, and structured data sources.
 5 | 
 6 | The BioImage.IO Chatbot is all about efficiency, knowledge enrichment, and accessibility. In this guide, we will walk you through various aspects of its functionality, so you can harness its capabilities to enhance your computational bioimaging experience. Whether you're a biologist, bioimage analyst, or developer, this guide is your key to unlocking the full potential of the BioImage.IO Chatbot.
 7 | 
 8 | 
 9 | ## Basic Interaction
10 | ### How to Start a Conversation
11 | Firstly, to make the most of your chatbot experience, consider setting your profile information. This helps the chatbot understand your background and tailor responses accordingly.
12 | 
13 | To start a conversation, simply type `Hi`  or `Hello`, and the chatbot will warmly greet you in return.
14 | 
15 | ![Basic Interaction](./screenshots/chatbot-hi.png)
16 | 
17 | 
18 | ## Asking Questions
19 | ### Simple Questions
20 | You can ask the BioImage.IO Chatbot a wide range of general and straightforward questions. The chatbot will promptly retrieve and provide you with the relevant information. For example, you can inquire about a "Community Partner" in the BioImage Model Zoo.
21 | 
22 | ![What is a Community Partner in the BioImage Model Zoo](./screenshots/chatbot-community-partner.png)
23 | 
24 | ### Technical Questions
25 | If you have more technical questions, the chatbot is equipped to assist you effectively. It retrieves the necessary information, summarizes it, and presents it in a simple and understandable manner. For instance, you can ask the chatbot about procedures like "How to segment an image using Fiji" or "How to upload a model to the bioimage.io repository."
26 | 
27 | ![Segment an Image using Fiji](./screenshots/search-bioimage-docs.png)
28 | 
29 | 
30 | ### Personalization: tailored Responses
31 | The BioImage.IO Chatbot is designed to understand and cater to the unique backgrounds and needs of its users. Whether you're a biologist, bioimage analyst, or developer, the chatbot tailors its responses to provide you with the most relevant and context-aware information.
32 | 
33 | To illustrate this personalized approach, we have two screenshots of the chatbot answering the same question: "How can I analyze biological images?" In the first screenshot, we have Abby, a biologist, posing this query. In the second screenshot, it's Abby again, but this time, she identifies as a developer. As expected, the chatbot is able to provide different feedback to Abby based on her background and needs.
34 | 
35 | ![Abby, a biologist, willing to analyze biological images.](./screenshots/chatbot-biologist.png)
36 | ![Abby, a developer, willing to analyze biological images.](./screenshots/chatbot-developer.png)
37 | 
38 | ### How to switch chatbot's assistants
39 | We have three assistants built in the BioImage.IO Chatbot to cater to the different purpose and needs of the users. BioImage Seeker is designated for users that seek for information about bioimaging, it is equiped with 6+ extensions including community-driven knowledge base, as well as online source like image.sc forum, web search. BioImage Executor is desginated for users that want to execute model using BioEngine such as CellPose. BioImage Tutor is designated for users that seek for tutoring in bioimaging, it is equiped with Bioimage Books extension combined with web search and Bioimage Model Zoo search.
40 | ![BioImage Tutor](./screenshots/bia-tutor.png) 
41 | ![BioImage Executor](./screenshots/bia-executor.png)
42 | 
43 | ## Database Integration
44 | Our system harnesses knowledge from a variety of valuable sources, including established databases such as [ELIXIR bio.tools](https://bio.tools) and the [ImageJ Wiki](https://imagej.net/software/imagej2/), tool-specific documentation like [deepImageJ](https://deepimagej.github.io), [ilastik](https://www.ilastik.org), and [ImJoy](https://imjoy.io/#/), and structured databases such as the [BioImage Model Zoo repository](https://bioimage.io).
45 | In the chatbot's user interface, you'll find a list of the available knowledge base channels that the chatbot can access. s. You can choose to select a specific knowledge base channel or opt for the `auto` mode to query information from all integrated databases.
46 | 
47 | ## Extension Selection
48 | 
49 | If you are interested in using specific extensions in the chatbot, you can click 'More Options' and select one or several extensions from the extension list. This way, the information provided will be exclusively by using the specified extensions you have selected.
50 | 
51 | For this example, you can select the `Search Image.cs Forum` extension and ask the chatbot for software issues.
52 | 
53 | ![Select extension](./screenshots/select-extension.png)
54 | ![Image.sc Forum](./screenshots/search-image-forum.png)
55 | 
56 | ### Steps details of Chatbot's working trajectory
57 | In order to check the chatbot working trajectories, with every query, all steps of chatbot's working trajectory are integrated in the response. Click 'More Details' on the bottom of the response, users can see the detail of each step. 
58 | 
59 | ![More details](./screenshots/more-details.png)
60 | 
61 | ### Search in Knowledge Base 
62 | Our system harnesses knowledge from a variety of valuable sources, including established databases such as [ELIXIR bio.tools](https://bio.tools) and the [ImageJ Wiki](https://imagej.net/software/imagej2/), tool-specific documentation like [deepImageJ](https://deepimagej.github.io), [ilastik](https://www.ilastik.org), and [ImJoy](https://imjoy.io/#/), and structured databases such as the [BioImage Model Zoo repository](https://bioimage.io). This comprehensive integration enables our chatbot to not only respond to questions related to each of these individual tools but also tackle complex inquiries that demand information from multiple databases.
63 | 
64 | To illustrate this capability, consider the following scenario: You have a Whole Slide Image (WSI) that you'd like to open with [QuPath](https://qupath.github.io), apply the Chatty-Frog model ([StarDist H&E Nuclei Segmentation](https://bioimage.io/#/?tags=chatty-frog&id=10.5281%2Fzenodo.6338614)) from BioImage.IO using deepImageJ within Fiji. How can this be accomplished? You can present this intricate question to the BioImage.IO Chatbot.
65 | 
66 | The chatbot provides a detailed breakdown of the steps required to complete the requested pipeline. This example demonstrates how our chatbot excels at efficiently retrieving and summarizing information from various sources, offering a valuable solution for your bioimaging needs.
67 | 
68 | ![Cross-Database Query](./screenshots/search-bioimage-docs.png)
69 | 
70 | ### Retrieving Models
71 | The BioImage.IO Chatbot is a versatile tool that can generate and execute code in response to user queries. This means that when a user asks about specific models available in bioimage.io, the chatbot can fetch this information by generating a custom script.
72 | 
73 | For instance, if a user inquires about the total number of models in the BioImage Model Zoo, the chatbot can effortlessly retrieve this information using a straightforward script. However, the questions can become more intricate. Users might want to know which models in bioimage.io are suitable for a particular tag or task (*i.e., segmentation*)or similar criteria. The chatbot is adept at handling these more complex queries, too.
74 | 
75 | ![Model Queries](./screenshots/search-model-zoo.png)
76 | 
77 | 
78 | ### Model execution
79 | TODO: Update the description for model execution and screenshot for cellpose example.
80 | 
81 | The BioImage.IO Chatbot can also execute functions in external APIs, analysis pipelines, plugins, and user code. Cellpose image segmentation has been added as a Chatbot function calling extension and can be used as a template for additional APIs. Users may currently use the Cellpose API function calling by uploading a file and requesting the Chatbot to segment it either from the `auto` or `cellpose` channels.
82 | 
83 | ## Conclusion
84 | The BioImage.IO Chatbot is a powerful tool designed to provide you with accurate and personalized information from a wide range of databases. Whether you're a biologist or a developer, our chatbot is here to assist you. Feel free to explore its capabilities, ask questions, and customize your experience.
85 | 
86 | Explore more, learn more, and enjoy the benefits of BioImage.IO Chatbot!
87 | 
88 | ## Additional Resources
89 | - For documentation, visit our [GitHub repository](https://github.com/bioimage-io/bioimageio-chatbot).
90 | - Do you have questions or need assistance? Contact us through [GitHub Issues]((https://github.com/bioimage-io/bioimageio-chatbot/issues)).
91 | 


--------------------------------------------------------------------------------
/bioimageio_chatbot/static/worker-manager.js:
--------------------------------------------------------------------------------
  1 | class PyodideWorkerManager {
  2 |   hyphaServices = {}
  3 |   workers = {}
  4 |   workerApps = {}
  5 |   subscribers = []
  6 |   workerRecords = {}
  7 |   // native file system handle
  8 |   constructor(dirHandle, mountPoint) {
  9 |     this.workers = {}
 10 |     this.workerRecords = {}
 11 |     this.dirHandle = dirHandle
 12 |     this.mountPoint = mountPoint || "/mnt"
 13 |   }
 14 | 
 15 |   getDirHandle() {
 16 |     return this.dirHandle
 17 |   }
 18 | 
 19 |   // Subscribe method
 20 |   subscribe(callback) {
 21 |     this.subscribers.push(callback)
 22 | 
 23 |     // Return an unsubscribe function
 24 |     return () => {
 25 |       this.subscribers = this.subscribers.filter(sub => sub !== callback)
 26 |     }
 27 |   }
 28 | 
 29 |   // Call this method whenever the workers list changes
 30 |   notify() {
 31 |     this.subscribers.forEach(callback => callback())
 32 |   }
 33 | 
 34 |   getWorkerApps() {
 35 |     // return appInfo
 36 |     return Object.values(this.workerApps)
 37 |   }
 38 | 
 39 |   async createWorker(info) {
 40 |     const id = Math.random().toString(36).substring(7)
 41 |     console.log("Creating worker:", id)
 42 |     const worker = new Worker("/chat/pyodide-worker.js")
 43 |     await new Promise(resolve => (worker.onmessage = () => resolve()))
 44 |     this.workers[id] = worker
 45 |     worker.kill = () => {
 46 |       worker.terminate()
 47 |       worker.terminated = true;
 48 |     }
 49 |     this.workerRecords[id] = []
 50 |     this.hyphaServices[id] = []
 51 |     const self = this
 52 |     const appService = {
 53 |       id,
 54 |       appInfo: info,
 55 |       worker,
 56 |       async runScript(script, ioContext) {
 57 |         return await self.runScript(id, script, ioContext)
 58 |       },
 59 |       async run_script(script, io_context) {
 60 |         return await self.runScript(id, script, io_context)
 61 |       },
 62 |       async mount(mountPoint, dirHandle) {
 63 |         return await self.mountNativeFs(id, mountPoint, dirHandle)
 64 |       },
 65 |       async render(container) {
 66 |         self.render(id, container)
 67 |       },
 68 |       async renderSummary(container) {
 69 |         return self.renderSummary(id, container)
 70 |       },
 71 |       async close() {
 72 |         await self.closeWorker(id)
 73 |       },
 74 |       getLogs() {
 75 |         return self.workerRecords[id]
 76 |       },
 77 |       get_logs() {
 78 |         return self.workerRecords[id]
 79 |       },
 80 |       async listHyphaServices() {
 81 |         return self.hyphaServices[id]
 82 |       },
 83 |       async list_hypha_services() {
 84 |         return self.hyphaServices[id]
 85 |       }
 86 |     }
 87 |     this.workerApps[id] = appService
 88 |     if (this.dirHandle) {
 89 |       await this.mountNativeFs(id)
 90 |     }
 91 |     this.notify()
 92 |     return appService
 93 |   }
 94 | 
 95 |   async closeWorker(id) {
 96 |     if (this.workers[id]) {
 97 |       this.workers[id].kill();
 98 |       delete this.workers[id]
 99 |       delete this.workerRecords[id]
100 |       delete this.workerApps[id]
101 |       this.notify()
102 |     }
103 |   }
104 | 
105 |   async getWorker(id) {
106 |     if (id && this.workers[id]) {
107 |       return this.workers[id]
108 |     } else {
109 |       throw new Error("No worker found with ID: " + id)
110 |     }
111 |   }
112 | 
113 |   async mountNativeFs(workerId, mountPoint, dirHandle) {
114 |     if (!workerId) {
115 |       throw new Error("No worker ID provided and no current worker available.")
116 |     }
117 |     const worker = await this.getWorker(workerId)
118 |     return new Promise((resolve, reject) => {
119 |       const handler = e => {
120 |         if (e.data.mounted) {
121 |           worker.removeEventListener("message", handler)
122 |           resolve(true)
123 |         } else if (e.data.mountError) {
124 |           worker.removeEventListener("message", handler)
125 |           reject(new Error(e.data.mountError))
126 |         }
127 |       }
128 |       worker.addEventListener("message", handler)
129 |       worker.postMessage({
130 |         mount: {
131 |           mountPoint: mountPoint || this.mountPoint,
132 |           dirHandle: dirHandle || this.dirHandle
133 |         }
134 |       })
135 |     })
136 |   }
137 | 
138 |   addToRecord(workerId, record) {
139 |     if (!this.workerRecords[workerId]) {
140 |       this.workerRecords[workerId] = []
141 |     }
142 |     this.workerRecords[workerId].push(record)
143 |   }
144 | 
145 |   renderOutputSummary(container, record) {
146 |     // return a string preview of the output
147 |     if (record.type === "store") {
148 |       return `Store: ${record.key}`
149 |     }
150 |     else if (record.type === "script") {
151 |       return `Script>>>:\n\`\`\`python\n${record.content}\n\`\`\`\n`
152 |     } else if (record.type === "stdout") {
153 |       if(record.content.trim() === "\n") {
154 |         return "\n"
155 |       }
156 |       return `${record.content}\n`
157 |     } else if (record.type === "stderr") {
158 |       if(record.content.trim() === "\n") {
159 |         return "\n"
160 |       }
161 |       return `${record.content}\n`
162 |     } else if (record.type === "service") {
163 |       return `Service: ${record.content}`
164 |     } else if (record.type === "audio" || record.type === "img") {
165 |       return `Image: <Object>`
166 |     }
167 |   }
168 | 
169 |   renderOutput(container, record) {
170 |     if (record.type === "stdout" || record.type === "stderr") {
171 |       if(record.content.trim() !== "\n" && record.content.trim() !== ""){
172 |         const outputEl = document.createElement("pre")
173 |         if (record.type === "stderr") {
174 |           outputEl.style.color = "red"
175 |         }
176 |         outputEl.textContent = record.content
177 |         container.appendChild(outputEl)
178 |       }
179 |     }
180 |     else if (record.type === "store") {
181 |       const storeEl = document.createElement("pre")
182 |       storeEl.textContent = `Store: ${record.key}`
183 |       container.appendChild(storeEl)
184 |     }
185 |     else if (record.type === "script") {
186 |       const scriptEl = document.createElement("pre")
187 |       scriptEl.textContent = `Script: ${record.content}`
188 |       container.appendChild(scriptEl)
189 |     } else if (record.type === "service") {
190 |       // display service info
191 |       const serviceEl = document.createElement("div")
192 |       serviceEl.textContent = `Service: ${record.content}`
193 |       container.appendChild(serviceEl)
194 |     } else if (record.type === "audio" || record.type === "img") {
195 |       const el = document.createElement(record.type)
196 |       el.src = record.content
197 |       if (record.attrs) {
198 |         record.attrs.forEach(([attr, value]) => {
199 |           el.setAttribute(attr, value)
200 |         })
201 |       }
202 |       if (record.type === "audio") {
203 |         el.controls = true
204 |       }
205 |       container.appendChild(el)
206 |     }
207 |   }
208 | 
209 |   async readStoreItem(workerId, key) {
210 |     const records = this.workerRecords[workerId]
211 |     return records.filter(record => record.type === "store" && (!key || record.key === key))[0]
212 |   }
213 | 
214 |   async runScript(workerId, script, ioContext) {
215 |     const outputContainer = ioContext && ioContext.output_container
216 |     if(outputContainer) {
217 |       delete ioContext.output_container
218 |     }
219 |     const worker = await this.getWorker(workerId)
220 |     if(worker.terminated){
221 |       throw new Error("Worker already terminated")
222 |     }
223 |     return new Promise((resolve, reject) => {
224 |       worker.onerror = e => console.error(e)
225 |       worker.kill = () => {
226 |         worker.terminate()
227 |         worker.terminated = true;
228 |         reject("Python runtime was killed")
229 |       }
230 |       const outputs = []
231 |       const handler = e => {
232 |         if (e.data.type !== undefined) {
233 |           if(!ioContext || !ioContext.skip_record)
234 |           this.addToRecord(workerId, e.data)
235 |           outputs.push(e.data)
236 |           if (outputContainer) {
237 |             this.renderOutput(outputContainer, e.data)
238 |           }
239 |           if (e.data.type === "service") {
240 |             this.hyphaServices[workerId].push(e.data.attrs)
241 |           }
242 |         } else if (e.data.executionDone) {
243 |           worker.removeEventListener("message", handler)
244 |           resolve(outputs)
245 |         } else if (e.data.executionError) {
246 |           console.error("Execution Error", e.data.executionError)
247 |           worker.removeEventListener("message", handler)
248 |           reject(e.data.executionError)
249 |         }
250 |       }
251 |       worker.addEventListener("message", handler)
252 |       if(!ioContext || !ioContext.skip_record)
253 |         this.addToRecord(workerId, { type: 'script', content: script });
254 |       worker.postMessage({ source: script, io_context: ioContext })
255 |     })
256 |   }
257 | 
258 |   render(workerId, container) {
259 |     const records = this.workerRecords[workerId]
260 |     if (!records) {
261 |       console.error("No records found for worker:", workerId)
262 |       return
263 |     }
264 |     records.forEach(record => this.renderOutput(container, record))
265 |   }
266 | 
267 |   renderSummary(workerId, container) {
268 |     const records = this.workerRecords[workerId]
269 |     if (!records) {
270 |       console.error("No records found for worker:", workerId)
271 |       return
272 |     }
273 |     
274 |     let outputSummay = ""
275 |     records.forEach(record => {
276 |       const summary = this.renderOutputSummary(container, record)
277 |       outputSummay += summary
278 |     })
279 |     return outputSummay
280 |   }
281 | }
282 | 
283 | window.PyodideWorkerManager = PyodideWorkerManager;


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
  1 | # 🦒 BioImage.IO Chatbot 🤖
  2 | 
  3 | **📣 Publication on Nature Methods: https://www.nature.com/articles/s41592-024-02370-y. Free access: https://rdcu.be/dQuw7  </a>**
  4 | 
  5 | **👇 Want to Try the Chatbot? [Visit here!](https://bioimage.io/chat)**
  6 | 
  7 | **📖 Read the full Documentation [here](https://bioimage-io.github.io/bioimageio-chatbot/#/)**
  8 | ## Your Personal Assistant in Computational Bioimaging
  9 | 
 10 | Welcome to the BioImage.IO Chatbot user guide. This guide will help you get the most out of the chatbot, providing detailed information on how to interact with it and retrieve valuable insights related to computational bioimaging.
 11 | 
 12 | ## Introduction
 13 | 
 14 | The BioImage.IO Chatbot is a versatile conversational agent designed to assist users in accessing information related to computational bioimaging. It leverages the power of Large Language Models (LLMs) and integrates user-specific data to provide contextually accurate and personalized responses. Whether you're a researcher, developer, or scientist, the chatbot is here to make your bioimaging journey smoother and more informative.
 15 | 
 16 | 
 17 | ![screenshot for the chatbot](./screenshots/chatbot-animation.gif)
 18 | 
 19 | You can watch a demonstration of the BioImage.IO Chatbot performing image analysis in this [video](https://zenodo.org/records/10967840/preview/Supplementary-Video-2-bioimageio-chatbot-ai-image-analysis.mp4?include_deleted=0).
 20 | 
 21 | 
 22 | The following diagram shows how the chatbot works:
 23 | 
 24 | <img src="https://docs.google.com/drawings/d/e/2PACX-1vROHmf1aZPMLOMvwjot1laB9wvRsaDkjkYbGNNveqN-Pm_9xWlD48krQMobWT1WrrOrZnwH9gPLsDRw/pub?w=1392&amp;h=1112">
 25 | 
 26 | ## Chatbot Features
 27 | 
 28 | The BioImage.IO Chatbot is equipped with an array of capabilities designed to enhance the bioimaging experience:
 29 | 
 30 | * **Contextual and Personalized Response**: Interprets the context of inquiries to deliver relevant and accurate responses. Adapts interactions based on user-specific background information to provide customized advice.
 31 | 
 32 | * **Comprehensive Data Source Integration**: Accesses a broad range of databases and documentation for bioimaging, including [bio.tools](https://bio.tools), [ImageJ.net](https://imagej.net/), [deepImageJ](https://deepimagej.github.io/deepimagej/), [ImJoy](https://imjoy.io/#/), and [bioimage.io](https://bioimage.io). Details on the supported sources are maintained in the [`knowledge-base-manifest.yaml`](knowledge-base-manifest.yaml) file.
 33 | 
 34 | * **Advanced Query Capabilities**: Generates and executes Python scripts for detailed queries within structured databases such as CSV, JSON files, or SQL databases, facilitating complex data retrievals.
 35 | 
 36 | * **AI-Powered Analysis and Code Interpretation**: Directly runs complex image analysis tasks using advanced AI models like Cellpose, via an embedded code interpreter.
 37 | 
 38 | * **Performance Enhancements with ReAct and RAG**: Utilizes a Retrieval Augmented Generation system with a ReAct loop for dynamic, iterative reasoning and tool engagement, improving response quality.
 39 | 
 40 | * **Extension Mechanism for Developers**: Allows for the development of custom extensions using ImJoy plugins or hypha services within Jupyter notebooks, enhancing flexibility and integration possibilities.
 41 | 
 42 | * **Vision Inspection and Hardware Control**: Features a Vision Inspector extension powered by GPT-4 for visual feedback on image content and analysis outcomes, and demonstrates potential for controlling microscopy hardware in smart microscopy setups.
 43 | 
 44 | * **Interactive User Interface and Documentation**: Offers a user-friendly interface with comprehensive support documents, ensuring easy access to its features and maximizing user engagement.
 45 | 
 46 | ## Using the Chatbot
 47 | 
 48 | We are providing a public chatbot service for you to try out. You can access the chatbot [here](https://chat.bioimage.io/chat).
 49 | 
 50 | Please note that the chatbot is still in beta and is being actively developed, we will log the message you input into the chatbot for further investigation of issues and support our development. See the [Disclaimer for BioImage.IO Chatbot](./DISCLAIMER.md). If you want to to remove your chat logs, please contact us via [this form](https://oeway.typeform.com/to/K3j2tJt7).
 51 | 
 52 | Here you can find usage guide and more examples: [Usage guide and example screenshots](docs/usage-example.md).
 53 | 
 54 | If you encounter any issues, please report them via [Github](https://github.com/bioimage-io/bioimageio-chatbot/issues).
 55 | 
 56 | 
 57 | ### Asking Questions
 58 | 
 59 | To ask the chatbot a question, type your query and send it. The chatbot will analyze your question and provide a relevant response. You can ask questions related to bioimaging, software tools, models, and more.
 60 | 
 61 | ### Personalized Responses
 62 | 
 63 | The chatbot uses your user profile information, such as your name, occupation, and background, to personalize its responses. This ensures that the information you receive is tailored to your specific needs.
 64 | 
 65 | 
 66 | ## Setup Your Own Chatbot
 67 | 
 68 | You can also set up your own chatbot server. Please refer to the [installation guide](./installation.md) for detailed instructions on how to set up the chatbot server on your local machine or server.
 69 | 
 70 | ## Technical Overview
 71 | 
 72 | Please read the [technical overview](./technical-overview.md) for more details about the chatbot's design and implementation.
 73 | 
 74 | ## Develop Chatbot Extensions
 75 | 
 76 | The BioImage.IO Chatbot is designed to be extensible, allowing developers to create custom extensions to add new functionalities to the chatbot. You can create extensions to integrate new tools, databases, and services into the chatbot, making it more powerful and versatile. See the [development guide](./development.md) for more details.
 77 | 
 78 | ## Join Us as a Community Partner
 79 | 
 80 | The BioImage.IO Chatbot is a community-driven project. We welcome contributions from the community to help improve the chatbot's knowledge base and make it more informative and useful to the community.
 81 | 
 82 | For more information, please visit the [contribution guidelines](docs/CONTRIBUTING.md).
 83 | 
 84 | If you are a tool developer or a database maintainer related to bioimaging, you can join us as a community partner. Please get in touch with us via [Github issues](https://github.com/bioimage-io/bioimageio-chatbot/issues).
 85 | 
 86 | ## Contact Us
 87 | 
 88 | If you have any questions, need assistance, or want to contribute to the chatbot's knowledge base, please do not hesitate to contact us via [Github issues](https://github.com/bioimage-io/bioimageio-chatbot/issues). Our team is here to help you get started and make valuable contributions.
 89 | 
 90 | Thanks for your support and helping make the BioImage.IO Chatbot more informative and useful to the community.
 91 | 
 92 | ## Publication
 93 | 
 94 | For detailed description of our work, please read our preprint: **[![arXiv](https://img.shields.io/badge/arXiv-2310.18351-red.svg)](https://arxiv.org/abs/2310.18351) <a href="https://zenodo.org/records/10032227" target="_blank"><img id="record-doi-badge" data-target="[data-modal='10.5281/zenodo.10032227']" src="https://zenodo.org/badge/DOI/10.5281/zenodo.10032227.svg" alt="10.5281/zenodo.10032227"></a>**
 95 | 
 96 | 
 97 | To reproduce the use cases described in [Figure 2](https://docs.google.com/drawings/d/e/2PACX-1vTIRwRldQBnTFqz0hvS01znGOEdoeDMJmZC-PlBM-O59u_xo7DfJlUEE9SlRsy6xO1hT2HuSOBrLmUz/pub?w=1324&amp;h=1063) in the manuscript, please refer to the [reproducing example usage scenarios](./figure-2-use-cases.md).
 98 | 
 99 | <img style="width:300px;" src="https://docs.google.com/drawings/d/e/2PACX-1vTIRwRldQBnTFqz0hvS01znGOEdoeDMJmZC-PlBM-O59u_xo7DfJlUEE9SlRsy6xO1hT2HuSOBrLmUz/pub?w=1324&amp;h=1063">
100 | 
101 | 
102 | ## Cite Us
103 | 
104 | If you use the BioImage.IO Chatbot in your research, please cite us:
105 | 
106 | ```
107 | Lei, W., Fuster-Barceló, C., Reder, G. et al. BioImage.IO Chatbot: a community-driven AI assistant for integrative computational bioimaging. Nat Methods 21, 1368–1370 (2024). https://doi.org/10.1038/s41592-024-02370-y
108 | ```
109 | 
110 | ## Acknowledgements
111 | 
112 | The BioImage.IO Chatbot is greatly enriched by the valuable resources contributed by the global bioimage analysis community. We extend our deepest gratitude to all authors and contributors listed in the [knowledge base manifest](https://github.com/bioimage-io/bioimageio-chatbot/blob/main/knowledge-base-manifest.yaml), especially the original authors of the materials integrated into our extensions. Their work has significantly enhanced the capabilities of this chatbot, providing users with high-quality information and tools.
113 | 
114 | We would also like to thank the open-source community for their ongoing support and innovation, which continuously drives the development and improvement of the BioImage.IO platform.
115 | 
116 | We thank [AI4Life consortium](https://ai4life.eurobioimaging.eu/) for its crucial support in the development of the BioImage.IO Chatbot.
117 | 
118 | ![AI4Life](https://ai4life.eurobioimaging.eu/wp-content/uploads/2022/09/AI4Life-logo_giraffe-nodes-2048x946.png)
119 | 
120 | AI4Life has received funding from the European Union’s Horizon Europe research and innovation programme under grant agreement number 101057970. Views and opinions expressed are, however those of the author(s) only and do not necessarily reflect those of the European Union or the European Research Council Executive Agency. Neither the European Union nor the granting authority can be held responsible for them.
121 | 
122 | Thank you for supporting the BioImage.IO project and contributing to the advancement of bioimage analysis!
123 | 
124 | ## Disclaimer
125 | 
126 | 
127 | These resources are primarily utilized for research purposes in compliance with the European Union's copyright exception on Text and Data Mining (TDM) as outlined in [Directive (EU) 2019/790](https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A32019L0790).
128 | 
129 | The full list of sources and resources used by the BioImage.IO Chatbot can be found in our [knowledge base manifest](https://github.com/bioimage-io/bioimageio-chatbot/blob/main/knowledge-base-manifest.yaml). We kindly ask users to carefully verify the accuracy and completeness of information with the original sources. For any work derived from these resources, please ensure that you respect the respective licenses and adhere to the conditions set forth by the original authors.
130 | 
131 | If you are an author of any material used within the BioImage.IO Chatbot and wish to have your content removed, please contact us directly. We are committed to respecting the rights of all contributors and will promptly address your request.
132 | 
133 | See the full [Disclaimer for BioImage.IO Chatbot](./DISCLAIMER.md).
134 | 


--------------------------------------------------------------------------------
/bioimageio_chatbot/knowledge_base.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import requests
  3 | import zipfile
  4 | import shutil
  5 | from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
  6 | from langchain_community.vectorstores import FAISS
  7 | from langchain_community.document_loaders import TextLoader, PyPDFLoader
  8 | from langchain_openai import OpenAIEmbeddings
  9 | from langchain.docstore.document import Document
 10 | import json
 11 | import pickle
 12 | from bioimageio_chatbot.utils import get_manifest, download_file
 13 | 
 14 | 
 15 | def load_docs_store(db_path, collection_name):
 16 |     # Each collection has two files [collection_name].faiss and [collection_name].pkl
 17 |     # Check if it exists, otherwise, download from {KNOWLEDGE_BASE_URL}/[collection].faiss
 18 |     if not os.path.exists(os.path.join(db_path, f"{collection_name}.faiss")):
 19 |         raise Exception(f"Please build the docs store {collection_name} by running create_vector_knowledge_base first.")
 20 |     if not os.path.exists(os.path.join(db_path, f"{collection_name}.pkl")):
 21 |         raise Exception(f"Please build the docs store {collection_name} by running create_vector_knowledge_base first.")
 22 |     # Load from vector store
 23 |     embeddings = OpenAIEmbeddings()
 24 |     docs_store = FAISS.load_local(index_name=collection_name, folder_path=db_path, embeddings=embeddings, allow_dangerous_deserialization=True)
 25 |     return docs_store
 26 | 
 27 | 
 28 | def load_knowledge_base(db_path):
 29 |     collections = get_manifest()['collections']
 30 |     docs_store_dict = {}
 31 |     
 32 |     for collection in collections:
 33 |         channel_id = collection['id']
 34 |         try:
 35 |             docs_store = load_docs_store(db_path, channel_id)
 36 |             length = len(docs_store.docstore._dict.keys())
 37 |             assert length > 0, f"Please make sure the docs store {channel_id} is not empty."
 38 |             print(f"Loaded {length} documents from {channel_id}")
 39 |             docs_store_dict[channel_id] = docs_store
 40 |         except Exception as e:
 41 |             print(f"Failed to load docs store for {channel_id}. Error: {e}")
 42 | 
 43 |     if len(docs_store_dict) == 0:
 44 |         raise Exception("No docs store is loaded, please make sure the docs store is not empty.")
 45 | 
 46 |     return docs_store_dict
 47 | 
 48 | def extract_biotools_information(json_file_path):
 49 |     with open(json_file_path, 'r') as f:
 50 |         data = json.load(f)
 51 |     
 52 |     extracted_info = []
 53 |     data['url'] = f"https://bio.tools/{data['name']}"
 54 |     # Extracting required information
 55 |     if 'name' in data:
 56 |         extracted_info.append(f"Name: {data['name']}")
 57 |     if 'description' in data:
 58 |         extracted_info.append(f"Description: {data['description']}")
 59 |     
 60 |     if 'toolType' in data:
 61 |         extracted_info.append(f"Tags: {', '.join(data['toolType'])}")
 62 |         
 63 |     if 'topic' in data:
 64 |         topics = [item['term'] for item in data['topic']]
 65 |         extracted_info.append(f"Topics: {', '.join(topics)}")
 66 |     
 67 |     if 'publication' in data:
 68 |         for pub in data['publication']:
 69 |             if 'metadata' in pub and 'authors' in pub['metadata']:
 70 |                 authors = [author['name'] for author in pub['metadata']['authors']]
 71 |                 extracted_info.append(f"Publication Authors: {', '.join(authors)}")
 72 |     # Write extracted information to text file
 73 |     return "\n".join(extracted_info), data
 74 | 
 75 | # Read text_files folder to get all txt files including the ones in subfolders
 76 | def parse_docs(root_folder, md_separator=None, pdf_separator=None, chunk_size=1000, chunk_overlap=10):
 77 |     chunk_list = []
 78 |     for foldername, _, filenames in os.walk(root_folder):
 79 |             for filename in filenames:
 80 |                 file_path = os.path.join(foldername, filename)
 81 |                 if filename.endswith(".md"):
 82 |                     print(f"Reading {file_path}...")
 83 |                     documents = TextLoader(file_path).load()
 84 |                     text_splitter = CharacterTextSplitter(separator=md_separator or "\n## ", chunk_size=chunk_size, chunk_overlap=chunk_overlap)
 85 |                     chunks =text_splitter.split_documents(documents)
 86 |                 elif filename.endswith(".pdf"):
 87 |                     print(f"Reading {file_path}...")
 88 |                     documents = PyPDFLoader(file_path).load()
 89 |                     text_splitter = RecursiveCharacterTextSplitter(separators=pdf_separator or ["\n\n", "\n", " ", ""], chunk_size=chunk_size, chunk_overlap=chunk_overlap)
 90 |                     chunks = text_splitter.split_documents(documents)    
 91 |                 elif filename.endswith(".txt"):
 92 |                     print(f"Reading {file_path}...")
 93 |                     documents = TextLoader(file_path).load()
 94 |                     text_splitter = CharacterTextSplitter(separator="\n", chunk_size=chunk_size, chunk_overlap=chunk_overlap)
 95 |                     chunks = text_splitter.split_documents(documents)
 96 |                 elif filename.endswith(".biotools.json"):
 97 |                     # convert json to yaml
 98 |                     print(f"Reading {file_path}...")
 99 |                     content, metadata = extract_biotools_information(file_path)
100 |                     chunks = [Document(page_content=content, metadata=metadata)]         
101 |                 else:
102 |                     print(f"Skipping {file_path}")
103 |                     continue
104 |                 chunk_list.extend(chunks)
105 |                     
106 |     return chunk_list
107 | 
108 | def download_docs(root_dir, url):
109 |     os.makedirs(root_dir, exist_ok=True)
110 |     # extract filename from url, remove query string
111 |     filename = url.split("/")[-1].split("?")[0]
112 |     # target directory is ./repos
113 |     target_directory = os.path.join(root_dir)
114 |     # if the target directory exists, remove it anyway and create a new one
115 |     if os.path.exists(target_directory):
116 |         shutil.rmtree(target_directory)
117 |     os.mkdir(target_directory)
118 |     if filename.endswith(".zip"):
119 |         # Define the file and folder names
120 |         zip_file_path = os.path.join(target_directory, filename)
121 |         print(f"Downloading {url} to {zip_file_path}")
122 |         # Download the ZIP file
123 |         download_file(url, zip_file_path)
124 | 
125 |         result_folder = os.path.join(target_directory, filename + "-unzipped")
126 |         # Unzip the downloaded file 
127 |         with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
128 |             zip_ref.extractall(result_folder)
129 |         
130 |         # Clean up - remove the downloaded ZIP file
131 |         os.remove(zip_file_path)
132 |         print(f"Downloaded and unzipped {url} to {result_folder}")
133 |     elif filename.endswith(".pdf"):
134 |         result_folder = os.path.join(target_directory, ".".join(filename.split(".")[:-1]))
135 |         os.makedirs(result_folder, exist_ok=True)
136 |         print(f"Downloading {url} to {result_folder}")
137 |         pdf_file_path = os.path.join(result_folder, filename)
138 |         download_file(url, pdf_file_path)
139 |         print(f"Downloaded {url} to {result_folder}")
140 |     else:
141 |         raise Exception("Unsupported file format")
142 |     
143 |     if len(os.listdir(result_folder)) == 0:
144 |         raise Exception("Downloaded folder is empty")
145 |     elif len(os.listdir(result_folder)) == 1:
146 |         # strip the folder name of the unzipped repo
147 |         r_dir = os.path.join(result_folder, os.listdir(result_folder)[0])
148 |         if os.path.isdir(r_dir):
149 |             return r_dir
150 |     # get the folder name of the unzipped repo
151 |     return result_folder
152 | 
153 | 
154 | def create_vector_knowledge_base(output_dir=None, collections=None):
155 |     """Create a vector knowledge base from the downloaded documents"""
156 |     if output_dir is None:
157 |         output_dir = os.environ.get("BIOIMAGEIO_KNOWLEDGE_BASE_PATH", "./bioimageio-knowledge-base")
158 |     os.makedirs(output_dir, exist_ok=True)
159 |     
160 |     if not collections:
161 |         collections = get_manifest()['collections']
162 |     
163 |     embeddings = OpenAIEmbeddings()
164 |     for collection in collections:
165 |         if collection.get("format") and collection.get("format").startswith("custom:"):
166 |             print(f"Skipping {collection['id']} because it is a custom collection.")
167 |             continue
168 |         url = collection['source']
169 |         cached_docs_file = os.path.join(output_dir, collection['id'] + "-docs.pickle")
170 |         if os.path.exists(cached_docs_file):
171 |             with open(cached_docs_file, "rb") as f:
172 |                 documents = pickle.load(f)
173 |         else:    
174 |             docs_dir = download_docs("./data", url)
175 |             documents = parse_docs(os.path.join(docs_dir, collection.get('directory', '')),md_separator=collection.get('md_separator', None), pdf_separator=collection.get('pdf_separator', None), chunk_size=collection.get('chunk_size', 1000), chunk_overlap=collection.get('chunk_overlap', 10))
176 |         if len(documents) > 10000:
177 |             print(f"Waring: {len(documents)} documents found in {url}.")
178 |         # save the vector db to output_dir
179 |         print(f"Creating embeddings (#documents={len(documents)}))")
180 | 
181 |         # Choose an appropriate batch size
182 |         batch_size = 1000 
183 | 
184 |         # Initialize an empty list to store all the batch_embedding_pairs
185 |         all_embedding_pairs = []
186 |         all_metadata = []
187 | 
188 |         total_length = len(documents)
189 | 
190 |         # Loop over your documents in batches
191 |         for batch_start in range(0, total_length, batch_size):
192 |             batch_end = min(batch_start + batch_size, total_length)
193 |             batch_texts = documents[batch_start:batch_end]
194 | 
195 |             # Generate embeddings for the batch of texts
196 |             batch_embeddings = embeddings.embed_documents([t.page_content for t in batch_texts])
197 |             batch_embedding_pairs = zip([t.page_content for t in batch_texts], batch_embeddings)
198 | 
199 |             # Append the batch_embedding_pairs to the all_embedding_pairs list
200 |             all_embedding_pairs.extend(batch_embedding_pairs)
201 |             all_metadata.extend([t.metadata for t in batch_texts])
202 | 
203 |             print(f"Processed {batch_end}/{total_length} documents")
204 | 
205 |         # Create the FAISS index from all the embeddings
206 |         vectordb = FAISS.from_embeddings(all_embedding_pairs, embeddings, metadatas=all_metadata)
207 |         print("Saving the vector database...")
208 |         vectordb.save_local(output_dir, index_name=collection['id'])
209 |         print("Created a vector database from the downloaded documents.")
210 | 
211 | if __name__ == "__main__":
212 |     create_vector_knowledge_base()


--------------------------------------------------------------------------------
/docs/development.md:
--------------------------------------------------------------------------------
  1 | # Developing Chatbot Extensions
  2 | 
  3 | ## Introduction
  4 | The BioImage.IO Chatbot offers a framework designed for easy extensibility, allowing developers to enrich its capabilities with custom extensions. This guide walks you through the process of developing and integrating new extensions into the chatbot, emphasizing the minimal requirements and the steps involved in using ImJoy to interact with the chatbot.
  5 | 
  6 | Extensions must expose a callable function that adheres to a specific interface: it should accept a dictionary of parameters as its single argument and return a dictionary containing the results of its operations. This design facilitates seamless integration and communication between the chatbot and its extensions.
  7 | 
  8 | A chatbot extension object is a dictionary with the following keys:
  9 |  - `id`: a unique identifier for the extension;
 10 |  - `name`: the name of the extension;
 11 |  - `description`: a short description of the extension;
 12 |  - `type`: it must be `bioimageio-chatbot-extension`;
 13 |  - `tools`: a dictionary with functions of tools, it represents the set of functions your extension offers, each accepting configuration parameters as input. These functions should carry out specific tasks and return their results in a dictionary;
 14 |  - `get_schema`: a function returns the schema for the tools, it returns a JSON schema for each tool function, specifying the structure and types of the expected parameters. This schema is crucial for instructing the chatbot to generate the correct input paramters and validate the inputs and ensuring they adhere to the expected format. Importantly, the chatbot uses the title and description for each field to understand what expected for the tool will generating a function call to run the tool (also see the detailed instructions below). To produce the schema, you can either create it manually, or generated it automatically using libraries such as `pydantic`.
 15 | 
 16 | The following is a chatbot extension object defined in Python:
 17 | ```python
 18 | def my_tool(config):
 19 |     print(config["my_param"])
 20 |     return {"result": "success"}
 21 | 
 22 | def get_schema():
 23 |     return {
 24 |         "my_tool": {
 25 |             "type": "object",
 26 |             "title": "my_tool",
 27 |             "description": "my tool",
 28 |             "properties": {
 29 |                 "my_param": {
 30 |                     "type": "number",
 31 |                     "description": "This is my parameter"
 32 |                 }
 33 |             }
 34 |         }
 35 |     }
 36 | 
 37 | chatbot_extension = {
 38 |     "id": "my-extension",
 39 |     "type": "bioimageio-chatbot-extension",
 40 |     "name": "My Extension",
 41 |     "description": "This is my extension",
 42 |     "get_schema": get_schema,
 43 |     "tools": {
 44 |         "my_tool": my_tool
 45 |     }
 46 | }
 47 | ```
 48 | 
 49 | 
 50 | Instead of writing the schema manually, it is recommended to use libraries such as `pydantic` to generate the schema:
 51 | ```python
 52 | class MyTool(BaseModel):
 53 |     """my tool description"""
 54 |     my_param: float = Field(..., description="This is my parameter doc")
 55 | 
 56 | def my_tool(config):
 57 |     print(config["my_param"])
 58 |     return {"result": "success"}
 59 | 
 60 | def get_schema():
 61 |     return {
 62 |         "my_tool": MyTool.schema()
 63 |     }
 64 | 
 65 | chatbot_extension = {
 66 |     "id": "my-extension",
 67 |     "type": "bioimageio-chatbot-extension",
 68 |     "name": "My Extension",
 69 |     "description": "This is my extension",
 70 |     "get_schema": get_schema,
 71 |     "tools": {
 72 |         "my_tool": my_tool
 73 |     }
 74 | }
 75 | ```
 76 | 
 77 | In JavaScript, you can also create the extension similarily:
 78 | ```javascript
 79 | 
 80 | const chatbotExtension = {
 81 |     id: "my-extension",
 82 |     type: "bioimageio-chatbot-extension",
 83 |     name: "My Extension",
 84 |     description: "This is my extension",
 85 |     get_schema() {
 86 |         return {
 87 |             my_tool: {
 88 |                 type: "object",
 89 |                 title: "my_tool",
 90 |                 description: "my tool description",
 91 |                 properties: {
 92 |                     my_param: {
 93 |                         type: "number",
 94 |                         description: "This is my parameter doc"
 95 |                     }
 96 |                 }
 97 |             }
 98 |         };
 99 |     },
100 |     tools: {
101 |         my_tool(config) {
102 |             console.log(config.my_param);
103 |             return {result: "success"};
104 |         }
105 |     }
106 | }
107 | 
108 | ```
109 | 
110 | After creating the extension object, there are two ways to serve the extensions, one is to use the [ImJoy](https://imjoy.io) plugin framework for running extensions in the browser, the other way is to use [Hypha](https://ha.amun.ai) framework to serve the extensions remotely, either in another browser tab or in a native Python process running on your local machine or a remote server.
111 | 
112 | ## Option 1: Register Extension with ImJoy
113 | 
114 | Below are examples demonstrating how to register an extension with the chatbot using both JavaScript and Python in ImJoy:
115 | 
116 | You can try them here: https://imjoy.io/lite?plugin=https://if.imjoy.io
117 | 
118 | ### Register Chatbot Extension with ImJoy in JavaScript
119 | 
120 | ```javascript
121 | const chatbot = await api.createWindow({
122 |     src: "https://bioimage.io/chat",
123 |     name:"BioImage.IO Chatbot",
124 | });
125 | chatbotExtension._rintf = true; // make the chatbot extension as an interface
126 | chatbot.registerExtension(chatbotExtension);
127 | ```
128 | 
129 | ### Register Chatbot Extension with ImJoy in Python
130 | 
131 | ```python
132 | from imjoy_rpc import api
133 | 
134 | chatbot = await api.createWindow(
135 |     src="https://bioimage.io/chat",
136 |     name="BioImage.IO Chatbot",
137 | )
138 | chatbotExtension._rintf = True # make the chatbot extension as an interface
139 | await chatbot.registerExtension(chatbot_extension)
140 | ```
141 | 
142 | ## Option 2: Serve Extension with Hypha
143 | 
144 | With Hypha, you can serve your extension remotely, enabling seamless integration with the chatbot.
145 | 
146 | Below are examples demonstrating how to serve an extension with Hypha using both JavaScript and Python:
147 | 
148 | ### Serve Chatbot Extension with Hypha in JavaScript
149 | 
150 | ```javascript
151 | const token = await login({server_url: "https://chat.bioimage.io"})
152 | const server = await connectToServer({server_url: "https://chat.bioimage.io", token});
153 | const svc = await server.registerService(chatbotExtension);
154 | console.log(`Extension service registered with id: ${svc.id}, you can visit the service at: https://bioimage.io/chat?server=${server_url}&extension=${svc.id}`);
155 | ```
156 | 
157 | **IMPORTANT: The above hypha service can only be accessed by the same user who registered the service, below you will find a section about making it public**
158 | 
159 | ### Serve Chatbot Extension with Hypha in Python
160 | 
161 | ```python
162 | from hypha_rpc import connect_to_server, login
163 | 
164 | server_url = "https://chat.bioimage.io"
165 | token = await login({"server_url": server_url})
166 | server = await connect_to_server({"server_url": server_url, "token": token})
167 | svc = await server.register_service(chatbot_extension)
168 | print(f"Extension service registered with id: {svc.id}, you can visit the service at: https://bioimage.io/chat?server={server_url}&extension={svc.id}")
169 | ```
170 | 
171 | After registering the extension with Hypha, you can access the chatbot with the extension by visiting the following URL: `https://bioimage.io/chat?server=https://chat.bioimage.io&extension=<extension_id>`, where `<extension_id>` is the ID of the registered extension service.
172 | 
173 | **IMPORTANT: The above hypha service can only be accessed by the same user who registered the service, below you will find a section about making it public**
174 | 
175 | ### Making Chatbot Extension Public
176 | 
177 | To make it public, you need to set the visibility of the chatbot extension service to `public`.
178 | 
179 | See the example below:
180 | 
181 | ```python
182 | from hypha_rpc import connect_to_server, login
183 | 
184 | server_url = "https://chat.bioimage.io"
185 | token = await login({"server_url": server_url})
186 | server = await connect_to_server({"server_url": server_url, "token": token})
187 | # Below, we set the visibility to public
188 | chatbot_extension['config'] = {"visibility": "public"}
189 | svc = await server.register_service(chatbot_extension)
190 | print(f"Extension service registered with id: {svc.id}, you can visit the service at: https://bioimage.io/chat?server={server_url}&extension={svc.id}")
191 | ```
192 | 
193 | You can also implement authorization logic in the tool function, see [hypha service authorization](https://ha.amun.ai/#/?id=service-authorization).
194 | 
195 | ## Tutorial
196 | 
197 | For an in-depth understanding, refer to [our detailed tutorial](./bioimage-chatbot-extension-tutorial.ipynb), accessible directly through the ImJoy Jupyter Notebook in your browser without installation. [Click here to launch the notebook](https://imjoy-notebook.netlify.app/lab/index.html?load=https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/main/docs/bioimage-chatbot-extension-tutorial.ipynb&open=1).
198 | 
199 | ## Extension Development Details
200 | 
201 | ### `tools` and `get_schema`
202 | When developing extensions, it's essential to define the `tools` and `get_schema` functionalities carefully:
203 | - **`tools`**: Represents the set of functions your extension offers, each accepting configuration parameters as input. These functions should carry out specific tasks and return their results in a dictionary.
204 | - **`get_schema`**: Returns a JSON schema for each tool function, specifying the structure and types of the expected parameters. This schema is crucial for instructing the chatbot to generate the correct input paramters and validate the inputs and ensuring they adhere to the expected format. Importantly, the chatbot uses the title and description for each field to understand what expected for the tool will generating a function call to run the tool (also see the detailed instructions below).
205 | 
206 | ### Notes on Function Input/Output
207 | The input and output of tool functions are restricted to primitive types (e.g., numbers, strings) that can be encoded in JSON. This limitation ensures compatibility and facilitates the exchange of data between the chatbot and extensions.
208 | 
209 | ### Importance of Detailed Descriptions
210 | Providing a detailed description for your extension and its arguments is vital. These descriptions assist the chatbot in correctly invoking the tools and help the chatbot understand the functionality and purpose of your extension. Ensure that each argument is accompanied by a clear title and a comprehensive description to improve usability and interaction quality of the chatbot.
211 | 
212 | By adhering to these guidelines, you will enhance the clarity, utility, and ease of integration of your chatbot extensions, contributing to a richer ecosystem of tools within the BioImage.IO community.
213 | 


--------------------------------------------------------------------------------
/bioimageio_chatbot/static/pyodide-worker.js:
--------------------------------------------------------------------------------
  1 | const indexURL = 'https://cdn.jsdelivr.net/pyodide/v0.25.0/full/'
  2 | importScripts(`${indexURL}pyodide.js`);
  3 | 
  4 | (async () => {
  5 |     self.pyodide = await loadPyodide({ indexURL })
  6 |     await self.pyodide.loadPackage("micropip");
  7 |     const micropip = self.pyodide.pyimport("micropip");
  8 |     await micropip.install(['numpy', 'imjoy-rpc', 'pyodide-http']);
  9 |     // NOTE: We intentionally avoid runPythonAsync here because we don't want this to pre-load extra modules like matplotlib.
 10 |     self.pyodide.runPython(setupCode)
 11 |     self.postMessage({loading: true})  // Inform the main thread that we finished loading.
 12 | })()
 13 | 
 14 | let outputs = []
 15 | 
 16 | function write(type, content) {
 17 |     self.postMessage({ type, content })
 18 |     outputs.push({ type, content })
 19 |     return content.length
 20 | }
 21 | 
 22 | function logService(type, url, attrs) {
 23 |     outputs.push({type, content: url, attrs: attrs?.toJs({dict_converter : Object.fromEntries})})
 24 |     self.postMessage({ type, content: url, attrs: attrs?.toJs({dict_converter : Object.fromEntries}) })
 25 | }
 26 | 
 27 | function show(type, url, attrs) {
 28 |     const turl = url.length > 32 ? url.slice(0, 32) + "..." : url
 29 |     outputs.push({type, content: turl, attrs: attrs?.toJs({dict_converter : Object.fromEntries})})
 30 |     self.postMessage({ type, content: url, attrs: attrs?.toJs({dict_converter : Object.fromEntries}) })
 31 | }
 32 | 
 33 | function store_put(key, value) {
 34 |     self.postMessage({ type: "store", key, content: `${value}` })
 35 | }
 36 | 
 37 | // Stand-in for `time.sleep`, which does not actually sleep.
 38 | // To avoid a busy loop, instead import asyncio and await asyncio.sleep().
 39 | function spin(seconds) {
 40 |     const time = performance.now() + seconds * 1000
 41 |     while (performance.now() < time);
 42 | }
 43 | 
 44 | // NOTE: eval(compile(source, "<string>", "exec", ast.PyCF_ALLOW_TOP_LEVEL_AWAIT))
 45 | // returns a coroutine if `source` contains a top-level await, and None otherwise.
 46 | 
 47 | const setupCode = `
 48 | import array
 49 | import ast
 50 | import base64
 51 | import contextlib
 52 | import io
 53 | import js
 54 | import pyodide
 55 | import sys
 56 | import time
 57 | import traceback
 58 | import wave
 59 | import pyodide_http
 60 | 
 61 | python_version = f"{sys.version_info.major}.{sys.version_info.minor}"; print(python_version)
 62 | 
 63 | pyodide_http.patch_all()  # Patch all libraries
 64 | help_string = f"""
 65 | Welcome to BioImage.IO Chatbot Debug console!
 66 | Python {python_version} on Pyodide {pyodide.__version__}
 67 | 
 68 | In this console, you can run Python code and interact with the code interpreter used by the chatbot.
 69 | You can inspect variables, run functions, and more.
 70 | 
 71 | If this is your first time using Python, you should definitely check out
 72 | the tutorial on the internet at https://docs.python.org/{python_version}/tutorial/.
 73 | Enter the name of any module, keyword, or topic to get help on writing
 74 | Python programs and using Python modules.  To quit this help utility and
 75 | return to the interpreter, just type "quit".
 76 | To get a list of available modules, keywords, symbols, or topics, type
 77 | "modules", "keywords", "symbols", or "topics".  Each module also comes
 78 | with a one-line summary of what it does; to list the modules whose name
 79 | or summary contain a given string such as "spam", type "modules spam".
 80 | """
 81 | 
 82 | __builtins__.help = lambda *args, **kwargs: print(help_string)
 83 | 
 84 | # patch hypha services
 85 | import imjoy_rpc.hypha
 86 | _connect_to_server = imjoy_rpc.hypha.connect_to_server
 87 | 
 88 | async def patched_connect_to_server(*args, **kwargs):
 89 |     server = await _connect_to_server(*args, **kwargs)
 90 |     _register_service = server.register_service
 91 |     async def patched_register_service(*args, **kwargs):
 92 |         svc_info = await _register_service(*args, **kwargs)
 93 |         service_id = svc_info['id'].split(':')[1]
 94 |         service_url = f"{server.config['public_base_url']}/{server.config['workspace']}/services/{service_id}"
 95 |         js.logService("service", service_url, svc_info)
 96 |         return svc_info
 97 |     server.register_service = patched_register_service
 98 |     server.registerService = patched_register_service
 99 |     return server
100 | 
101 | imjoy_rpc.hypha.connect_to_server = patched_connect_to_server
102 | 
103 | # For redirecting stdout and stderr later.
104 | class JSOutWriter(io.TextIOBase):
105 |     def write(self, s):
106 |         return js.write("stdout", s)
107 | 
108 | class JSErrWriter(io.TextIOBase):
109 |     def write(self, s):
110 |         return js.write("stderr", s)
111 | 
112 | def setup_matplotlib():
113 |     import matplotlib
114 |     matplotlib.use('Agg')
115 |     import matplotlib.pyplot as plt
116 | 
117 |     def show():
118 |         buf = io.BytesIO()
119 |         plt.savefig(buf, format='png')
120 |         img = 'data:image/png;base64,' + base64.b64encode(buf.getvalue()).decode('utf-8')
121 |         js.show("img", img)
122 |         plt.clf()
123 | 
124 |     plt.show = show
125 | 
126 | def show_image(image, **attrs):
127 |     from PIL import Image
128 |     if not isinstance(image, Image.Image):
129 |         image = Image.fromarray(image)
130 |     buf = io.BytesIO()
131 |     image.save(buf, format='png')
132 |     data = 'data:image/png;base64,' + base64.b64encode(buf.getvalue()).decode('utf-8')
133 |     js.show("img", data, attrs)
134 | 
135 | _store = {}
136 | def store_put(key, value):
137 |     _store[key] = value
138 |     js.store_put(key, value)
139 | 
140 | def store_get(key):
141 |     return _store.get(key)
142 | 
143 | def show_animation(frames, duration=100, format="apng", loop=0, **attrs):
144 |     from PIL import Image
145 |     buf = io.BytesIO()
146 |     img, *imgs = [frame if isinstance(frame, Image.Image) else Image.fromarray(frame) for frame in frames]
147 |     img.save(buf, format='png' if format == "apng" else format, save_all=True, append_images=imgs, duration=duration, loop=0)
148 |     img = f'data:image/{format};base64,' + base64.b64encode(buf.getvalue()).decode('utf-8')
149 |     js.show("img", img, attrs)
150 | 
151 | def convert_audio(data):
152 |     try:
153 |         import numpy as np
154 |         is_numpy = isinstance(data, np.ndarray)
155 |     except ImportError:
156 |         is_numpy = False
157 |     if is_numpy:
158 |         if len(data.shape) == 1:
159 |             channels = 1
160 |         if len(data.shape) == 2:
161 |             channels = data.shape[0]
162 |             data = data.T.ravel()
163 |         else:
164 |             raise ValueError("Too many dimensions (expected 1 or 2).")
165 |         return ((data * (2**15 - 1)).astype("<h").tobytes(), channels)
166 |     else:
167 |         data = array.array('h', (int(x * (2**15 - 1)) for x in data))
168 |         if sys.byteorder == 'big':
169 |             data.byteswap()
170 |         return (data.tobytes(), 1)
171 | 
172 | def show_audio(samples, rate):
173 |     bytes, channels = convert_audio(samples)
174 |     buf = io.BytesIO()
175 |     with wave.open(buf, mode='wb') as w:
176 |         w.setnchannels(channels)
177 |         w.setframerate(rate)
178 |         w.setsampwidth(2)
179 |         w.setcomptype('NONE', 'NONE')
180 |         w.writeframes(bytes)
181 |     audio = 'data:audio/wav;base64,' + base64.b64encode(buf.getvalue()).decode('utf-8')
182 |     js.show("audio", audio)
183 | 
184 | # HACK: Prevent 'wave' import from failing because audioop is not included with pyodide.
185 | import types
186 | import ast
187 | embed = types.ModuleType('embed')
188 | sys.modules['embed'] = embed
189 | embed.image = show_image
190 | embed.animation = show_animation
191 | embed.audio = show_audio
192 | 
193 | def preprocess_code(source):
194 |     """Parse the source code and separate it into main code and last expression."""
195 |     parsed_ast = ast.parse(source)
196 |     
197 |     last_node = parsed_ast.body[-1] if parsed_ast.body else None
198 |     
199 |     if isinstance(last_node, ast.Expr):
200 |         # Separate the AST into main body and last expression
201 |         main_body_ast = ast.Module(body=parsed_ast.body[:-1], type_ignores=parsed_ast.type_ignores)
202 |         last_expr_ast = last_node
203 |         
204 |         # Convert main body AST back to source code for exec
205 |         main_body_code = ast.unparse(main_body_ast)
206 |         
207 |         return main_body_code, last_expr_ast
208 |     else:
209 |         # If the last node is not an expression, treat the entire code as the main body
210 |         return source, None
211 |     
212 | 
213 | context = {"store_put": store_put, "store_get": store_get}
214 | 
215 | async def run(source, io_context):
216 |     out = JSOutWriter()
217 |     err = JSErrWriter()
218 |     io_context = io_context or {}
219 |     inputs = io_context.get("inputs") or []
220 |     outputs = io_context.get("outputs") or []
221 |     for ip in inputs:
222 |         if ip not in _store:
223 |             raise Exception("Error: Input not found in store:", ip)
224 |         context[ip] = store_get(ip)
225 |     with contextlib.redirect_stdout(out), contextlib.redirect_stderr(err):
226 |         try:
227 |             imports = pyodide.code.find_imports(source)
228 |             await js.pyodide.loadPackagesFromImports(source)
229 |             if "matplotlib" in imports or "skimage" in imports:
230 |                 setup_matplotlib()
231 |             if "embed" in imports:
232 |                 await js.pyodide.loadPackagesFromImports("import numpy, PIL")
233 |             
234 |             source, last_expression = preprocess_code(source)
235 |             code = compile(source, "<string>", "exec", ast.PyCF_ALLOW_TOP_LEVEL_AWAIT)
236 | 
237 |             result = eval(code, context)
238 |             if result is not None:
239 |                 result = await result
240 |             if last_expression:
241 |                 if isinstance(last_expression.value, ast.Await):
242 |                     # If last expression is an await, compile and execute it as async
243 |                     last_expr_code = compile(ast.Expression(last_expression.value), "<string>", "eval", flags=ast.PyCF_ALLOW_TOP_LEVEL_AWAIT)
244 |                     result = await eval(last_expr_code, context)
245 |                 else:
246 |                     # If last expression is not an await, compile and evaluate it normally
247 |                     last_expr_code = compile(ast.Expression(last_expression.value), "<string>", "eval")
248 |                     result = eval(last_expr_code, context)
249 |                 if result is not None:
250 |                     print(result)
251 |             for op in outputs:
252 |                 if op not in context:
253 |                     raise Exception("Error: The script did not produce an variable named: " +  op)
254 |                 store_put(op, context[op])
255 |         except:
256 |             traceback.print_exc()
257 |             raise
258 | `
259 | const mountedFs = {}
260 | 
261 | self.onmessage = async (event) => {
262 |     if(event.data.source){
263 |         try{
264 |             const { source, io_context } = event.data
265 |             self.pyodide.globals.set("source", source)
266 |             self.pyodide.globals.set("io_context", io_context && self.pyodide.toPy(io_context))
267 |             outputs = []
268 |             // see https://github.com/pyodide/pyodide/blob/b177dba277350751f1890279f5d1a9096a87ed13/src/js/api.ts#L546
269 |             // sync native ==> browser
270 |             await new Promise((resolve, _) => self.pyodide.FS.syncfs(true, resolve));
271 |             await self.pyodide.runPythonAsync("await run(source, io_context)")
272 |             // sync browser ==> native
273 |             await new Promise((resolve, _) => self.pyodide.FS.syncfs(false, resolve)),
274 |             console.log("Execution done", outputs)
275 |             self.postMessage({ executionDone: true, outputs })
276 |             outputs = []
277 |         }
278 |         catch(e){
279 |             console.error("Execution Error", e)
280 |             self.postMessage({ executionError: e.message })
281 |         }
282 |     }
283 |     if(event.data.mount){
284 |         try{
285 |             const { mountPoint, dirHandle } = event.data.mount
286 |             if(mountedFs[mountPoint]){
287 |                 console.log("Unmounting native FS:", mountPoint)
288 |                 await self.pyodide.FS.unmount(mountPoint)
289 |                 delete mountedFs[mountPoint]
290 |             }
291 |             const nativefs = await self.pyodide.mountNativeFS(mountPoint, dirHandle)
292 |             mountedFs[mountPoint] = nativefs
293 |             console.log("Native FS mounted:", mountPoint, nativefs)
294 |             self.postMessage({ mounted: mountPoint })
295 |         }
296 |         catch(e){
297 |             self.postMessage({ mountError: e.message })
298 |         }
299 |     }
300 | 
301 | }
302 | 


--------------------------------------------------------------------------------