├── docs ├── .nojekyll ├── Elixir-Presentation.pdf ├── example-data │ └── nuclei.tif ├── screenshots │ ├── bia-tutor.png │ ├── channels.png │ ├── chatbot-hi.png │ ├── extensions.png │ ├── gpts_json.png │ ├── web-search.png │ ├── bia-executor.png │ ├── more-details.png │ ├── role_create.png │ ├── search-biii.png │ ├── user-profile.png │ ├── SimilarityScore.png │ ├── chat-interface.png │ ├── configure_gpt.png │ ├── direct-response.png │ ├── respond_to_user.png │ ├── retrieval-text.png │ ├── screenshot-hi.png │ ├── chatbot-animation.gif │ ├── chatbot-biologist.png │ ├── chatbot-developer.png │ ├── gpts_landing_page.png │ ├── search-model-zoo.png │ ├── select-extension.png │ ├── chatbot-model-upload.png │ ├── chatbot-wsi-pipeline.png │ ├── search-bioimage-docs.png │ ├── search-image-forum.png │ ├── customization1_student.png │ ├── customization_biotool.png │ ├── chatbot-community-partner.png │ ├── customization2_developer.png │ ├── script-gen-exe-retrieval.png │ ├── search-bioimage-archive.png │ ├── similarity-score-results.png │ ├── chatbot-channel-bioimageio.png │ ├── chatbot-technical-question.png │ ├── chatbot-channel-scikit-image.png │ └── chatgpt-vs-bioimageiochatbot.png ├── _sidebar.md ├── beta-testing-guidelines.md ├── creating_GPTs.md ├── CONTRIBUTING.md ├── DISCLAIMER.md ├── index.html ├── technical-overview.md ├── figure-2-use-cases.md ├── installation.md ├── usage-example.md ├── README.md └── development.md ├── bioimageio_chatbot ├── __init__.py ├── tools.py ├── gpts_action.py ├── static │ ├── imagej-js-extension.imjoy.html │ ├── bioimage-model-zoo-extension.imjoy.html │ ├── worker-manager.js │ └── pyodide-worker.js ├── evaluation.py ├── chatbot_extensions │ ├── web_search_extension │ │ ├── __init__.py │ │ └── langchain_websearch.py │ ├── __init__.py │ ├── vision_extension.py │ ├── bia_extension.py │ ├── hpa_extension.py │ ├── biii_extension.py │ ├── image_sc_extension.py │ └── docs_extension.py ├── __main__.py ├── utils.py ├── quota.py ├── jsonschema_pydantic.py └── knowledge_base.py ├── MANIFEST.in ├── requirements_test.txt ├── Dockerfile ├── tests ├── __pycache__ │ ├── test_chatbot.cpython-39-pytest-7.2.1.pyc │ └── test_chatbot.cpython-310-pytest-7.4.2.pyc ├── test_knowledge_base.py ├── test_chatbot_answer.py └── test_chatbot.py ├── scripts └── publish.sh ├── requirements.txt ├── pyproject.toml ├── .github └── workflows │ ├── publish.yml │ └── build.yml ├── LICENSE └── .gitignore /docs/.nojekyll: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bioimageio_chatbot/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include bioimageio_chatbot/static/* -------------------------------------------------------------------------------- /requirements_test.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | pytest 3 | pytest-asyncio 4 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3:latest 2 | RUN pip install bioimageio-chatbot 3 | 4 | EXPOSE 9000 -------------------------------------------------------------------------------- /docs/Elixir-Presentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/Elixir-Presentation.pdf -------------------------------------------------------------------------------- /docs/example-data/nuclei.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/example-data/nuclei.tif -------------------------------------------------------------------------------- /docs/screenshots/bia-tutor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/bia-tutor.png -------------------------------------------------------------------------------- /docs/screenshots/channels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/channels.png -------------------------------------------------------------------------------- /docs/screenshots/chatbot-hi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-hi.png -------------------------------------------------------------------------------- /docs/screenshots/extensions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/extensions.png -------------------------------------------------------------------------------- /docs/screenshots/gpts_json.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/gpts_json.png -------------------------------------------------------------------------------- /docs/screenshots/web-search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/web-search.png -------------------------------------------------------------------------------- /docs/screenshots/bia-executor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/bia-executor.png -------------------------------------------------------------------------------- /docs/screenshots/more-details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/more-details.png -------------------------------------------------------------------------------- /docs/screenshots/role_create.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/role_create.png -------------------------------------------------------------------------------- /docs/screenshots/search-biii.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/search-biii.png -------------------------------------------------------------------------------- /docs/screenshots/user-profile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/user-profile.png -------------------------------------------------------------------------------- /docs/screenshots/SimilarityScore.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/SimilarityScore.png -------------------------------------------------------------------------------- /docs/screenshots/chat-interface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chat-interface.png -------------------------------------------------------------------------------- /docs/screenshots/configure_gpt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/configure_gpt.png -------------------------------------------------------------------------------- /docs/screenshots/direct-response.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/direct-response.png -------------------------------------------------------------------------------- /docs/screenshots/respond_to_user.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/respond_to_user.png -------------------------------------------------------------------------------- /docs/screenshots/retrieval-text.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/retrieval-text.png -------------------------------------------------------------------------------- /docs/screenshots/screenshot-hi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/screenshot-hi.png -------------------------------------------------------------------------------- /docs/screenshots/chatbot-animation.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-animation.gif -------------------------------------------------------------------------------- /docs/screenshots/chatbot-biologist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-biologist.png -------------------------------------------------------------------------------- /docs/screenshots/chatbot-developer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-developer.png -------------------------------------------------------------------------------- /docs/screenshots/gpts_landing_page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/gpts_landing_page.png -------------------------------------------------------------------------------- /docs/screenshots/search-model-zoo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/search-model-zoo.png -------------------------------------------------------------------------------- /docs/screenshots/select-extension.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/select-extension.png -------------------------------------------------------------------------------- /docs/screenshots/chatbot-model-upload.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-model-upload.png -------------------------------------------------------------------------------- /docs/screenshots/chatbot-wsi-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-wsi-pipeline.png -------------------------------------------------------------------------------- /docs/screenshots/search-bioimage-docs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/search-bioimage-docs.png -------------------------------------------------------------------------------- /docs/screenshots/search-image-forum.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/search-image-forum.png -------------------------------------------------------------------------------- /docs/screenshots/customization1_student.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/customization1_student.png -------------------------------------------------------------------------------- /docs/screenshots/customization_biotool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/customization_biotool.png -------------------------------------------------------------------------------- /docs/screenshots/chatbot-community-partner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-community-partner.png -------------------------------------------------------------------------------- /docs/screenshots/customization2_developer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/customization2_developer.png -------------------------------------------------------------------------------- /docs/screenshots/script-gen-exe-retrieval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/script-gen-exe-retrieval.png -------------------------------------------------------------------------------- /docs/screenshots/search-bioimage-archive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/search-bioimage-archive.png -------------------------------------------------------------------------------- /docs/screenshots/similarity-score-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/similarity-score-results.png -------------------------------------------------------------------------------- /bioimageio_chatbot/tools.py: -------------------------------------------------------------------------------- 1 | 2 | # example function 3 | def example_function(address): 4 | pass 5 | 6 | 7 | TOOL_MAP = {"example_function": example_function} -------------------------------------------------------------------------------- /docs/screenshots/chatbot-channel-bioimageio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-channel-bioimageio.png -------------------------------------------------------------------------------- /docs/screenshots/chatbot-technical-question.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-technical-question.png -------------------------------------------------------------------------------- /docs/screenshots/chatbot-channel-scikit-image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatbot-channel-scikit-image.png -------------------------------------------------------------------------------- /docs/screenshots/chatgpt-vs-bioimageiochatbot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/docs/screenshots/chatgpt-vs-bioimageiochatbot.png -------------------------------------------------------------------------------- /tests/__pycache__/test_chatbot.cpython-39-pytest-7.2.1.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/tests/__pycache__/test_chatbot.cpython-39-pytest-7.2.1.pyc -------------------------------------------------------------------------------- /tests/__pycache__/test_chatbot.cpython-310-pytest-7.4.2.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/HEAD/tests/__pycache__/test_chatbot.cpython-310-pytest-7.4.2.pyc -------------------------------------------------------------------------------- /scripts/publish.sh: -------------------------------------------------------------------------------- 1 | pip install -U twine 2 | pip install -U wheel 3 | python3 -m pip install --upgrade build 4 | rm -rf ./build 5 | rm ./dist/* 6 | python3 -m build 7 | twine upload dist/* 8 | rm -rf ./build -------------------------------------------------------------------------------- /docs/_sidebar.md: -------------------------------------------------------------------------------- 1 | 2 | * [Overview](/README) 3 | * [Installation](/installation) 4 | * [Usage guide](/usage-example) 5 | * [Technical Overview](/technical-overview) 6 | * [Extension Development](/development) 7 | * [Contribution Guidelines](/CONTRIBUTING) 8 | * [Use Cases](/figure-2-use-cases) 9 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | schema-agents>=0.1.59 2 | hypha-rpc==0.20.38 3 | requests 4 | pypdf 5 | pillow 6 | matplotlib 7 | hypha==0.20.38.post19 8 | tqdm 9 | aiofiles 10 | serpapi 11 | langchain>=0.1.11 12 | langchain-community==0.0.27 13 | langchain-core==0.1.31 14 | beautifulsoup4 15 | pandas 16 | duckduckgo-search==6.1.5 17 | langchain-openai==0.0.8 18 | rank-bm25==0.2.2 19 | html2text==2020.1.16 20 | setuptools 21 | -------------------------------------------------------------------------------- /tests/test_knowledge_base.py: -------------------------------------------------------------------------------- 1 | from langchain_community.vectorstores import FAISS 2 | from langchain_openai import OpenAIEmbeddings 3 | 4 | def test_knowledge_base(): 5 | """Test the knowledge base""" 6 | vectordb = FAISS.load_local(folder_path="./bioimageio-knowledge-base", index_name="bioimage.io", embeddings=OpenAIEmbeddings(), allow_dangerous_deserialization=True) 7 | retriever = vectordb.as_retriever(score_threshold=0.4) 8 | items = retriever.get_relevant_documents("community partner", verbose=True) 9 | assert len(items) > 0 -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | 4 | [project] 5 | name = "bioimageio-chatbot" 6 | version = "0.2.13" 7 | readme = "README.md" 8 | description = "Your Personal Assistant in Computational BioImaging." 9 | dependencies = [ 10 | "schema-agents>=0.1.59", 11 | "hypha-rpc>=0.20.38", 12 | "requests", 13 | "pypdf", 14 | "pillow", 15 | "matplotlib", 16 | "hypha>=0.20.38.post19", 17 | "tqdm", 18 | "aiofiles", 19 | "langchain>=0.1.6", 20 | "beautifulsoup4", 21 | "pandas", 22 | "duckduckgo-search>=6.1.5", 23 | "rank-bm25", 24 | "langchain-openai", 25 | "langchain-core>=0.1.31", 26 | "langchain-community>=0.0.27", 27 | "html2text", 28 | ] 29 | 30 | [tool.setuptools] 31 | include-package-data = true 32 | 33 | [tool.setuptools.packages.find] 34 | include = ["bioimageio_chatbot*"] 35 | exclude = ["tests*", "scripts*"] 36 | 37 | [options.entry_points] 38 | console_scripts = [ 39 | "bioimageio-chatbot = bioimageio_chatbot.__main__:main", 40 | ] 41 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI 2 | 3 | on: 4 | workflow_dispatch: 5 | # Optional inputs, you can add more according to your needs 6 | inputs: 7 | # version: 8 | # description: 'Version of the package to release' 9 | # required: true 10 | # default: '1.0.0' 11 | 12 | jobs: 13 | publish: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Check out code 17 | uses: actions/checkout@v2 18 | 19 | # Add steps for any necessary setup, like installing dependencies 20 | - name: Build 21 | run: | 22 | python -m pip install --upgrade pip 23 | python -m pip install -U twine 24 | python -m pip install -U wheel 25 | python3 -m pip install build==1.0.3 # pin build 26 | rm -rf ./build 27 | rm -rf ./dist/* 28 | python3 -m build 29 | 30 | - name: Publish to PyPI 31 | uses: pypa/gh-action-pypi-publish@master 32 | with: 33 | user: __token__ 34 | password: ${{ secrets.PYPI_API_TOKEN }} 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 BioImage.IO 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | 6 | name: Build 7 | 8 | permissions: 9 | contents: read 10 | 11 | jobs: 12 | build: 13 | permissions: 14 | contents: write # for Git to git push 15 | name: Build 16 | runs-on: ubuntu-latest 17 | steps: 18 | - name: Checkout code 19 | uses: actions/checkout@v3 20 | - name: Setup conda 21 | uses: s-weigand/setup-conda@v1 22 | with: 23 | update-conda: true 24 | python-version: 3.9 25 | conda-channels: anaconda, conda-forge 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | python -m pip install -r requirements.txt 30 | - name: Trigger workflow at bioimage.io 31 | uses: benc-uk/workflow-dispatch@v1 32 | with: 33 | workflow: build-site.yml 34 | repo: bioimage-io/bioimage.io 35 | inputs: '{}' 36 | # Required when using the `repo` option. Either a PAT or a token generated from the GitHub app or CLI 37 | token: "${{ secrets.PAT_TOKEN_WORKFLOW }}" 38 | - name: Run Init 39 | run: python -m bioimageio_chatbot init 40 | - name: Test server 41 | env: 42 | OPENAI_API_KEY: sk-xxxxxxxx 43 | run: python -m bioimageio_chatbot start-server --host=0.0.0.0 --port=9000 & wget http://127.0.0.1:9000/ 44 | 45 | # - name: Build vector database 46 | # if: github.event_name == 'push' && github.ref == 'refs/heads/main' 47 | # run: | 48 | # python -m bioimageio_chatbot create-knowledge-base --output-dir=./dist/bioimageio-knowledge-base 49 | # env: 50 | # OPENAI_API_KEY: ${{ secrets.OPENAI_SECRET_API_KEY }} 51 | # - name: Deploy 52 | # if: github.event_name == 'push' && github.ref == 'refs/heads/main' 53 | # uses: peaceiris/actions-gh-pages@v3 54 | # with: 55 | # github_token: ${{ secrets.GITHUB_TOKEN }} 56 | # publish_dir: ./dist 57 | -------------------------------------------------------------------------------- /bioimageio_chatbot/gpts_action.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from openai import AsyncOpenAI 3 | from bioimageio_chatbot.chatbot_extensions import extension_to_tools 4 | from schema_agents.utils.schema_conversion import get_service_openapi_schema 5 | from hypha_rpc import login, connect_to_server 6 | 7 | client = AsyncOpenAI() 8 | 9 | async def convert_extensions(builtin_extensions): 10 | extension_services = {} 11 | for extension in builtin_extensions: 12 | tools = await extension_to_tools(extension) 13 | for tool in tools: 14 | extension_services[tool.__name__] = tool 15 | return extension_services 16 | 17 | async def serve_actions(server, server_url, builtin_extensions): 18 | extension_services = await convert_extensions(builtin_extensions) 19 | svc = { 20 | "id": "bioimageio-chatbot-extensions-api", 21 | "name": "BioImage.io Chatbot Extensions", 22 | "description": "A collection of chatbot extensions for facilitate user interactions with external documentation, services and tools.", 23 | "config": { 24 | "visibility": "public", 25 | "require_context": False 26 | }, 27 | } 28 | svc.update(extension_services) 29 | workspace = server.config['workspace'] 30 | service_id = "bioimageio-chatbot-extensions-api" 31 | openapi_schema = get_service_openapi_schema(svc, f"{server_url}/{workspace}/services/{service_id}") 32 | svc["get_openapi_schema"] = lambda : openapi_schema 33 | 34 | service_info = await server.register_service(svc) 35 | print(f"Service registered, openapi schema: {server_url}/services/call?service_id={service_info['id']}&function_key=get_openapi_schema") 36 | 37 | async def start_server(server_url): 38 | token = await login({"server_url": server_url}) 39 | server = await connect_to_server({"server_url": server_url, "token": token, "method_timeout": 100}) 40 | print(f"Connected to server: {server_url}") 41 | await serve_actions(server, server_url) 42 | 43 | 44 | if __name__ == "__main__": 45 | server_url = "https://staging.chat.bioimage.io/" 46 | loop = asyncio.get_event_loop() 47 | loop.create_task(start_server(server_url)) 48 | loop.run_forever() -------------------------------------------------------------------------------- /docs/beta-testing-guidelines.md: -------------------------------------------------------------------------------- 1 | # BioImage.IO Chatbot Beta Testing Guidelines 2 | 3 | Thank you for participating in the beta testing phase of the BioImage.IO Chatbot! Your feedback is invaluable in helping us improve and refine the chatbot. Please follow these guidelines to ensure a smooth testing experience: 4 | 5 | ## Prerequisites 6 | 7 | 1. Please Read the [Disclaimer for BioImage.IO Chatbot](./DISCLAIMER.md). 8 | 2. **Login with Your Email Account:** First, please create an account [here](https://ai.imjoy.io/public/apps/hypha-login/). Then use the chatbot via [https://bioimage.io/chat](https://bioimage.io/chat). If you have any issues with the login, please contact us. 9 | 10 | ## Beta Testing Steps 11 | 12 | 1. **Access the Chatbot:** 13 | - Explore the example usage guide: [Usage Example Guide](https://github.com/bioimage-io/bioimageio-chatbot/blob/main/docs/usage-example.md) 14 | - Access the BioImage.IO Chatbot through the link we provided via email. 15 | 16 | 2. **Testing Scenarios:** 17 | - Try to edit your profile which mostly suitable for you. 18 | - Test the chatbot with various scenarios relevant to your field (biologist, developer, etc.). 19 | - Try different knowledge base channels and observe the responses. 20 | 21 | 3. **Provide Feedback:** 22 | - For a specific chatbot response, you can also like/dislike a response from the BioImage.IO Chatbot by clicking the thumbs up/down button under each message. Then, a pop up will appear for you to provide feedback on the response. 23 | - For general feedback, please use the "Feedback" button on the bottom of the chatbot interface. Feel free to comment on the current chat session, or provide general feedbacks or ideas. E.g. we would be happy to hear about your experience and ideas on how to improve the chatbot. 24 | - If you encounter any bugs, issues, or unexpected behavior, please use the [Github issues](https://github.com/bioimage-io/bioimageio-chatbot/issues) section to create a new issue. 25 | 26 | 4. **Community Contributions:** 27 | - If you have data sources or documentation relevant to the chatbot, feel free to propose community contributions. 28 | 29 | ## Conclusion 30 | 31 | Thank you for your valuable contribution to the BioImage.IO Chatbot beta testing! Your input helps us enhance the functionality and user experience for the entire community. 32 | -------------------------------------------------------------------------------- /docs/creating_GPTs.md: -------------------------------------------------------------------------------- 1 | # Creating OpenAI GPTs with Chatbot Extensions 2 | 3 | ## Introduction 4 | In addition to standalone usage, the BioImage.IO Chatbot supports porting extensions to OpenAI custom [GPTs](https://chat.openai.com/gpts) for users with OpenAI accounts. Chatbot extensions following the development model specified in the [development guidelines](./development.md) and [notebook tutorial](./bioimage-chatbot-extension-tutorial.ipynb) are automatically converted to `openapi` schema which can be used to create OpenAI GPTs using the online GPT creator. 5 | 6 | `openapi` schemas for extensions are generated on Chatbot server startup via the `register_service` function in [gpts_action.py](../bioimageio_chatbot/gpts_action.py). These schemas are then made available for OpenAI GPT creator import directly via url. This process for creating a custom GPT from the public BioImage.IO Chatbot instance extensions is shown below. Users are encouraged to submit their extensions to the BioImage.IO team for incorporation into the public Chatbot instance. 7 | 8 | Note that GPT actions are run through the hosted server instance (chat.bioimage.io in the case of the public Chatbot instance). Also note that the creation of custom OpenAI GPTs requires a paid OpenAI account. 9 | 10 | ## Creating a Custom GPT from the public Chatbot Instance 11 | The public Chatbot instance's `openapi` extension schema are available at the following link: `https://chat.bioimage.io/public/services/bioimageio-chatbot-extensions-api/get_openapi_schema` 12 | 13 | After logging in to their OpenAI accounts, users can navigate to the GPTs [page](https://chat.openai.com/gpts) and click `Create` as shown below: 14 | 15 | ![gpt_landing_page](./screenshots/gpts_landing_page.png) 16 | 17 | To add GPT actions from Chatbot extensions, navigate to the `Configure` tab and select `Create new action`: 18 | 19 | ![configure_gpt](./screenshots/configure_gpt.png) 20 | 21 | The Chatbot-generated `openapi` schema can then be imported direct by selecting `Import from URL` and inputting the public Chatbot's extension schema `https://chat.bioimage.io/public/services/bioimageio-chatbot-extensions-api/get_openapi_schema` 22 | 23 | Users can edit the JSON content to select individual actions from the Chatbot extensions if desired: 24 | 25 | ![gpts_json](./screenshots/gpts_json.png) 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /docs/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # BioImage.IO Chatbot Contribution Guidelines 2 | 3 | Thank you for your interest in contributing to the BioImage.IO Chatbot. Your contributions help us enhance the chatbot's knowledge base and provide more accurate and detailed responses. This document outlines how you can contribute new databases or information for retrieval by the chatbot. 4 | 5 | ## Introduction 6 | 7 | The BioImage.IO Chatbot relies on a comprehensive knowledge base to provide accurate responses to user queries. We encourage contributions to expand this knowledge base by adding new databases, information, or resources. Whether you're a researcher, developer, or domain expert, your contributions can help improve the chatbot's functionality. 8 | 9 | ## Contribution Process 10 | ### Knowledge Base 11 | 12 | You can contribute to the chatbot's knowledge base by adding new databases or information. 13 | 14 | We use the [`knowledge-base-manifest.yaml`](../knowledge-base-manifest.yaml) file to keep track of the databases and their details. 15 | 16 | Follow these steps to contribute to the BioImage.IO Chatbot: 17 | 18 | 1. Take a look at the [`knowledge-base-manifest.yaml`](../knowledge-base-manifest.yaml) file to see the databases that are currently integrated with the chatbot. The existing data sources are markdown files hosted on github, json files etc. 19 | 2. Prepare your database by organising your information to ensure it is accurate, relevant, and structured in a manner that can be easily retrived. You can find some URLs for the existing data sources, please use those as examples. 20 | 3. Fork this repository and edit the manifest to include the details of your database, including the name, URL and description. 21 | 4. You can submit your contribution with a Pull Request (PR) with the updated manifest. Our team will review and integrate the changes. 22 | 5. Once your contribution is accepted and the chatbot's knowledge is updated, test that the chatbot is accurate on its responses when retrieving information from your database. 23 | 24 | Remember that, in any step of the process you can contact us to look for feedback or assistance. We deeply appreciate your contribution! 25 | 26 | ### Develop Custom Extenstion 27 | 28 | The BioImage.IO Chatbot offers a framework designed for easy extensibility, allowing developers to enrich its capabilities with custom extensions. Please check details on how to contribute to the chatbot by developing custom extension [`Developing Chatbot Extensions`](./development.md). 29 | 30 | 31 | ## Contact Us 32 | 33 | If you have any questions, need assistance, or want to contribute to the chatbot's knowledge base, please don't hesitate to contact us via [Github issues](https://github.com/bioimage-io/bioimageio-chatbot/issues). Our team is here to help you get started and make valuable contributions. 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | quota_manager.db 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | tests/*.pyc 7 | *.csv 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | data/ 16 | dist/ 17 | chat_sessions/ 18 | 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | share/python-wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | cover/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | db.sqlite3 67 | db.sqlite3-journal 68 | 69 | # Flask stuff: 70 | instance/ 71 | .webassets-cache 72 | 73 | # Scrapy stuff: 74 | .scrapy 75 | 76 | # Sphinx documentation 77 | docs/_build/ 78 | 79 | # PyBuilder 80 | .pybuilder/ 81 | target/ 82 | 83 | # Jupyter Notebook 84 | .ipynb_checkpoints 85 | 86 | # IPython 87 | profile_default/ 88 | ipython_config.py 89 | 90 | # pyenv 91 | # For a library or package, you might want to ignore these files since the code is 92 | # intended to run in multiple environments; otherwise, check them in: 93 | # .python-version 94 | 95 | # pipenv 96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 99 | # install all needed dependencies. 100 | #Pipfile.lock 101 | 102 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 103 | __pypackages__/ 104 | 105 | # Celery stuff 106 | celerybeat-schedule 107 | celerybeat.pid 108 | 109 | # SageMath parsed files 110 | *.sage.py 111 | 112 | # Environments 113 | .env 114 | .venv 115 | env/ 116 | venv/ 117 | ENV/ 118 | env.bak/ 119 | venv.bak/ 120 | 121 | # Spyder project settings 122 | .spyderproject 123 | .spyproject 124 | 125 | # Rope project settings 126 | .ropeproject 127 | 128 | # mkdocs documentation 129 | /site 130 | 131 | # mypy 132 | .mypy_cache/ 133 | .dmypy.json 134 | dmypy.json 135 | 136 | # Pyre type checker 137 | .pyre/ 138 | 139 | # pytype static type analyzer 140 | .pytype/ 141 | 142 | # Cython debug symbols 143 | cython_debug/ 144 | 145 | # Visual Studio Code 146 | .vscode/ 147 | 148 | # others 149 | *.bin 150 | logs/ 151 | bioimageio-knowledge-base/ 152 | .DS_Store 153 | chat_logs 154 | authorized_users.json 155 | .pypirc 156 | -------------------------------------------------------------------------------- /bioimageio_chatbot/static/imagej-js-extension.imjoy.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | [TODO: write documentation for this plugin.] 4 | 5 | 6 | 7 | { 8 | "name": "ImageJ.JS Chatbot Extension", 9 | "type": "web-worker", 10 | "tags": [], 11 | "ui": "", 12 | "version": "0.1.0", 13 | "cover": "", 14 | "description": "Run ImageJ.JS macro in the chatbot", 15 | "icon": "extension", 16 | "inputs": null, 17 | "outputs": null, 18 | "api_version": "0.1.8", 19 | "env": "", 20 | "permissions": [], 21 | "requirements": [], 22 | "dependencies": [] 23 | } 24 | 25 | 26 | 99 | -------------------------------------------------------------------------------- /bioimageio_chatbot/evaluation.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | from schema_agents.schema import Message 3 | from typing import Any, Dict, List, Optional, Union 4 | from schema_agents.role import Role 5 | 6 | class EvaluationCriteria(BaseModel): 7 | relevance_and_accuracy: str = Field( 8 | default="Score 0-100: 0 means completely irrelevant, providing no useful information. " 9 | "100 means the answer is comprehensive, accurate, and closely matches the reference answer.", 10 | description="Assess how relevant and accurate the chatbot's answer is compared to the reference answer." 11 | ) 12 | coverage_of_key_points: str = Field( 13 | default="Baseline >60: Answers covering the main points from the reference should score above 60, " 14 | "indicating they address the primary aspects of the question.", 15 | description="Evaluate whether the chatbot's answer includes the main points mentioned in the reference answer." 16 | ) 17 | additional_information: str = Field( 18 | default="Variable Impact: Additional helpful information can increase the score. " 19 | "Irrelevant or unhelpful information should lead to a reduced score.", 20 | description="Assess the impact of additional information not present in the reference answer." 21 | ) 22 | evaluation_guidelines: str = Field( 23 | default="Apply scoring criteria consistently and impartially. " 24 | "Provide justification for scores, especially for significant deviations from the baseline.", 25 | description="Guidelines for objective and transparent evaluation." 26 | ) 27 | 28 | class EvalInput(BaseModel): 29 | """Input for evaluating scores of LLM-based system.""" 30 | question: str = Field(description="The question that was asked.") 31 | reference_answer: str = Field(description="The answer that was expected.") 32 | llm_answer: str = Field(description="The answer that was generated by the LLM-based system.") 33 | 34 | class EvalScores(BaseModel): 35 | """Scores of evaluating llm answer.""" 36 | criteria: EvaluationCriteria = Field(description="Criteria for evaluating the performance of the LLM-based system.") 37 | similarity_score: float = Field(description="Following the criteria, access the llm_answer. Float between 0 and 100 representing the similarity score. ") 38 | 39 | def create_eval_agent(): 40 | async def bot_answer_evaluate(req: EvalInput, role: Role) -> EvalScores: 41 | """Return the answer to the question.""" 42 | response = await role.aask(req, EvalScores) 43 | return response 44 | 45 | eval_bot = Role( 46 | name="Thomas", 47 | profile="Evaluator", 48 | goal="Evaluate the performance of the LLM-based system.", 49 | constraints=None, 50 | actions=[bot_answer_evaluate], 51 | model="gpt-4-1106-preview" 52 | ) 53 | return eval_bot 54 | 55 | async def evaluate(question, reference_answer, llm_answer): 56 | eval_bot = create_eval_agent() 57 | eval_input = EvalInput(question=question, reference_answer=reference_answer, llm_answer=llm_answer) 58 | scores = await eval_bot.handle(Message(content=eval_input.model_dump_json(), data=eval_input, role="User")) 59 | similarity_score = scores[0].data.similarity_score 60 | return similarity_score 61 | -------------------------------------------------------------------------------- /bioimageio_chatbot/chatbot_extensions/web_search_extension/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from schema_agents import schema_tool 3 | from bioimageio_chatbot.utils import ChatbotExtension 4 | from pydantic import Field 5 | from typing import Optional 6 | 7 | import httpx 8 | from bs4 import BeautifulSoup 9 | 10 | from .langchain_websearch import LangchainCompressor 11 | 12 | default_langchain_compressor = None 13 | 14 | @schema_tool 15 | async def search_web(query: str=Field(description="space separated keywords for the duckduckgo search engine"), max_results: int = Field(description="maximum number of results to return")): 16 | """Search the web for information using duckduckgo.""" 17 | from duckduckgo_search import AsyncDDGS 18 | query = query.strip("\"'") 19 | results = await AsyncDDGS(proxy=None).atext(query, region='wt-wt', safesearch='moderate', timelimit=None, 20 | max_results=max_results) 21 | if not results: 22 | return "No relevant information found." 23 | docs = [] 24 | for d in results: 25 | docs.append({"title": d['title'], "body": d['body'], "url": d['href']}) 26 | return docs 27 | 28 | @schema_tool 29 | async def browse_web_pages(query: str=Field(description="keywords or a sentence describing the information to be retrieved"), urls: list[str]=Field(description="list of web page urls to analyse"), num_results_to_process: Optional[int]=Field(5, description="number of results to process")): 30 | """Read web pages and return compressed documents with most relevant information.""" 31 | global default_langchain_compressor 32 | default_langchain_compressor = default_langchain_compressor or LangchainCompressor(device="cpu") 33 | 34 | documents = await default_langchain_compressor.faiss_embedding_query_urls(query, urls, 35 | num_results=num_results_to_process) 36 | 37 | if not documents: # Fall back to old simple search rather than returning nothing 38 | print("LLM_Web_search | Could not find any page content " 39 | "similar enough to be extracted, using basic search fallback...") 40 | return "No relevant information found." 41 | #return the json serializable documents 42 | return [doc.page_content + '\nsource: ' + doc.metadata.get('source') for doc in documents] 43 | 44 | @schema_tool 45 | async def read_webpage(url: str=Field(description="the web url to read")) -> str: 46 | """Read the full content of a web page converted to plain text.""" 47 | headers = { 48 | "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0", 49 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", 50 | "Accept-Language": "en-US,en;q=0.5" 51 | } 52 | 53 | async with httpx.AsyncClient() as client: 54 | response = await client.get(url, headers=headers) 55 | 56 | soup = BeautifulSoup(response.content, features="lxml") 57 | for script in soup(["script", "style"]): 58 | script.extract() 59 | 60 | strings = soup.stripped_strings 61 | return '\n'.join([s.strip() for s in strings]) 62 | 63 | 64 | def get_extension(): 65 | return ChatbotExtension( 66 | id="web", 67 | name="Search Web", 68 | description="Search the web for information using duckduckgo. Search by keywords and returns a list of relevant documents.", 69 | tools=dict(search=search_web, browse=browse_web_pages) 70 | ) 71 | -------------------------------------------------------------------------------- /bioimageio_chatbot/__main__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import asyncio 4 | import subprocess 5 | import os 6 | from bioimageio_chatbot.knowledge_base import load_knowledge_base 7 | 8 | def start_server(args): 9 | if args.login_required: 10 | os.environ["BIOIMAGEIO_LOGIN_REQUIRED"] = "true" 11 | else: 12 | os.environ["BIOIMAGEIO_LOGIN_REQUIRED"] = "false" 13 | # get current file path so we can get the path of apps under the same directory 14 | current_dir = os.path.dirname(os.path.abspath(__file__)) 15 | command = [ 16 | sys.executable, 17 | "-m", 18 | "hypha.server", 19 | f"--host={args.host}", 20 | f"--port={args.port}", 21 | f"--public-base-url={args.public_base_url}", 22 | f"--static-mounts=/chat:{current_dir}/static", 23 | "--startup-functions=bioimageio_chatbot.chatbot:register_chat_service" 24 | ] 25 | subprocess.run(command) 26 | 27 | def connect_server(args): 28 | from bioimageio_chatbot.chatbot import connect_server 29 | if args.login_required: 30 | os.environ["BIOIMAGEIO_LOGIN_REQUIRED"] = "true" 31 | else: 32 | os.environ["BIOIMAGEIO_LOGIN_REQUIRED"] = "false" 33 | server_url = args.server_url 34 | loop = asyncio.get_event_loop() 35 | loop.create_task(connect_server(server_url)) 36 | loop.run_forever() 37 | 38 | def create_knowledge_base(args): 39 | from bioimageio_chatbot.knowledge_base import create_vector_knowledge_base 40 | create_vector_knowledge_base(args.output_dir) 41 | 42 | def init(args): 43 | knowledge_base_path = os.environ.get("BIOIMAGEIO_KNOWLEDGE_BASE_PATH", "./bioimageio-knowledge-base") 44 | assert knowledge_base_path is not None, "Please set the BIOIMAGEIO_KNOWLEDGE_BASE_PATH environment variable to the path of the knowledge base." 45 | if not os.path.exists(knowledge_base_path): 46 | print(f"The knowledge base is not found at {knowledge_base_path}, will download it automatically.") 47 | os.makedirs(knowledge_base_path, exist_ok=True) 48 | docs_store_dict = load_knowledge_base(knowledge_base_path) 49 | 50 | print("Databases loaded in the knowledge base:") 51 | for key in docs_store_dict.keys(): 52 | print(f" - {key}") 53 | 54 | def main(): 55 | parser = argparse.ArgumentParser(description="BioImage.IO Chatbot utility commands.") 56 | 57 | subparsers = parser.add_subparsers() 58 | 59 | # Init command 60 | parser_init = subparsers.add_parser("init") 61 | parser_init.set_defaults(func=init) 62 | 63 | # Start server command 64 | parser_start_server = subparsers.add_parser("start-server") 65 | parser_start_server.add_argument("--host", type=str, default="0.0.0.0") 66 | parser_start_server.add_argument("--port", type=int, default=9000) 67 | parser_start_server.add_argument("--public-base-url", type=str, default="") 68 | parser_start_server.add_argument("--login-required", action="store_true") 69 | parser_start_server.set_defaults(func=start_server) 70 | 71 | # Connect server command 72 | parser_connect_server = subparsers.add_parser("connect-server") 73 | parser_connect_server.add_argument("--server-url", default="https://ai.imjoy.io") 74 | parser_connect_server.add_argument("--login-required", action="store_true") 75 | parser_connect_server.set_defaults(func=connect_server) 76 | 77 | # Create knowledge base command 78 | parser_create_kb = subparsers.add_parser("create-knowledge-base") 79 | parser_create_kb.add_argument("--output-dir", default="./bioimageio-knowledge-base") 80 | parser_create_kb.set_defaults(func=create_knowledge_base) 81 | 82 | args = parser.parse_args() 83 | if hasattr(args, 'func'): 84 | args.func(args) 85 | else: 86 | parser.print_help() 87 | 88 | if __name__ == '__main__': 89 | main() -------------------------------------------------------------------------------- /docs/DISCLAIMER.md: -------------------------------------------------------------------------------- 1 | # Disclaimer for BioImage.IO Chatbot 2 | 3 | ## Research Purpose and Use of Copyrighted Material 4 | 5 | The BioImage.IO Chatbot ("Chatbot") is part of a research project focused on Text and Data Mining (TDM) to support advancements in bioimage analysis. The primary purpose of this Chatbot is to assist users in navigating resources, tools, and workflows related to bioimage analysis for research purposes. In compliance with the European Union's copyright exception on TDM as outlined in [Directive (EU) 2019/790](https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A32019L0790), the Chatbot utilizes copyrighted materials to which we have lawful access. These materials are used exclusively for non-commercial research purposes. 6 | 7 | ## Evaluation-Only Use of Source Code and Live Demo 8 | 9 | The BioImage.IO Chatbot's source code and live demo instance are provided solely for evaluation purposes. These services are intended to support ongoing research in Text and Data Mining and are not intended for production use or commercial purposes. Users are encouraged to explore the capabilities of the Chatbot but should refrain from deploying it in any production environment or for commercial gain. 10 | 11 | ## General Usage 12 | 13 | While we strive for accuracy, the Chatbot is not a substitute for professional advice, consultation, diagnosis, or any kind of formal scientific interpretation. Users should independently verify the accuracy and completeness of the information provided by the Chatbot. 14 | 15 | ## No Warranties 16 | 17 | The Chatbot service is provided "as is" and "as available" without any warranties of any kind, either express or implied, including but not limited to the implied warranties of merchantability, fitness for a particular purpose, or non-infringement. We make no warranty that the service will meet your requirements or be available on an uninterrupted, secure, or error-free basis. 18 | 19 | ## Liability 20 | 21 | Under no circumstances will we be liable for any loss or damage incurred as a result of the use of this Chatbot, including but not limited to any errors or omissions in the content, any unauthorized access to or use of our servers, or any loss of data or profits. 22 | 23 | ## User Responsibility 24 | 25 | The user assumes all responsibility and risk for the use of this Chatbot. It is the user's responsibility to evaluate the accuracy, completeness, or usefulness of any information, opinion, or content available through the Chatbot service. Users are reminded to carefully check with the original sources and to respect the respective licenses of any copyrighted materials. If you are the author of any material used by the Chatbot and wish to opt-out, please contact us via [this form](https://oeway.typeform.com/to/K3j2tJt7). 26 | 27 | ## Third-Party Links 28 | 29 | The Chatbot may provide links to external websites or resources for your convenience. We have no control over these sites and resources, and we are not responsible for their availability, reliability, or the content provided. 30 | 31 | ## Data Privacy 32 | 33 | User interactions with the Chatbot may be stored for analysis and improvement of the service. All data will be handled in accordance with our Privacy Policy. 34 | 35 | ## Privacy Policy 36 | 37 | The personal data you may provide will be used to disseminate information pertaining to the execution of the Horizon Europe Funded AI4Life project (Grant number: 101057970). In accordance with the Grant Agreement, your data will be retained during the project and deleted when it has ended as soon as the retention period established by the EC is over. If you would like to update or delete your data during the course of the project, please contact us using [this form](https://oeway.typeform.com/to/K3j2tJt7?typeform-source=bioimage.io). 38 | 39 | ## Modifications 40 | 41 | We reserve the right to modify this disclaimer at any time, effective upon posting of an updated version on this website. Continued use of the Chatbot after any such changes shall constitute your consent to such changes. 42 | -------------------------------------------------------------------------------- /bioimageio_chatbot/utils.py: -------------------------------------------------------------------------------- 1 | 2 | import requests 3 | import yaml 4 | import os 5 | from tqdm import tqdm 6 | from pydantic import BaseModel, Field 7 | from typing import Callable, Optional 8 | import typing 9 | from inspect import signature 10 | from typing import Any, Callable, Dict, Optional 11 | from bioimageio_chatbot.jsonschema_pydantic import json_schema_to_pydantic_model 12 | from schema_agents import schema_tool 13 | 14 | def get_manifest(): 15 | # If no manifest is provided, download from the repo 16 | if not os.path.exists("./knowledge-base-manifest.yaml"): 17 | print("Downloading the knowledge base manifest...") 18 | response = requests.get("https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/main/knowledge-base-manifest.yaml") 19 | assert response.status_code == 200 20 | with open("./knowledge-base-manifest.yaml", "wb") as f: 21 | f.write(response.content) 22 | 23 | return yaml.load(open("./knowledge-base-manifest.yaml", "r"), Loader=yaml.FullLoader) 24 | 25 | 26 | def download_file(url, filename): 27 | response = requests.get(url, stream=True) 28 | file_size = int(response.headers.get('content-length', 0)) 29 | 30 | # Initialize the progress bar 31 | progress = tqdm(response.iter_content(1024), f"Downloading {filename}", total=file_size, unit="B", unit_scale=True, unit_divisor=1024) 32 | 33 | with open(filename, 'wb') as f: 34 | for data in progress: 35 | # Update the progress bar 36 | progress.update(len(data)) 37 | f.write(data) 38 | 39 | 40 | def extract_schemas(function): 41 | sig = signature(function) 42 | positional_annotation = [ 43 | p.annotation 44 | for p in sig.parameters.values() 45 | if p.kind == p.POSITIONAL_OR_KEYWORD 46 | ][0] 47 | output_schemas = ( 48 | [sig.return_annotation] 49 | if not isinstance(sig.return_annotation, typing._UnionGenericAlias) 50 | else list(sig.return_annotation.__args__) 51 | ) 52 | input_schemas = ( 53 | [positional_annotation] 54 | if not isinstance(positional_annotation, typing._UnionGenericAlias) 55 | else list(positional_annotation.__args__) 56 | ) 57 | return input_schemas, output_schemas 58 | 59 | class ChatbotExtension(BaseModel): 60 | """Chatbot extension.""" 61 | 62 | id: str 63 | name: str 64 | description: str 65 | tools: Optional[Dict[str, Any]] = {} 66 | get_schema: Optional[Callable] = None 67 | get_state: Optional[Callable] = None 68 | info: Optional[Dict[str, Any]] = {} 69 | 70 | class LegacyChatbotExtension(BaseModel): 71 | """A class that defines the interface for a user extension""" 72 | name: str = Field(..., description="The name of the extension") 73 | description: str = Field(..., description="A description of the extension") 74 | get_schema: Optional[Callable] = Field(None, description="A function that returns the schema for the extension") 75 | execute: Callable = Field(..., description="The extension's execution function") 76 | schema_class: Optional[BaseModel] = Field(None, description="The schema class for the extension") 77 | 78 | def convert_to_dict(obj): 79 | if isinstance(obj, BaseModel): 80 | return obj.dict() 81 | if isinstance(obj, dict): 82 | return {k: convert_to_dict(v) for k, v in obj.items()} 83 | if isinstance(obj, list): 84 | return [convert_to_dict(v) for v in obj] 85 | return obj 86 | 87 | 88 | async def legacy_extension_to_tool(extension: LegacyChatbotExtension): 89 | if extension.get_schema: 90 | schema = await extension.get_schema() 91 | extension.schema_class = json_schema_to_pydantic_model(schema) 92 | else: 93 | input_schemas, _ = extract_schemas(extension.execute) 94 | extension.schema_class = input_schemas[0] 95 | 96 | assert extension.schema_class, f"Extension {extension.name} has no valid schema class." 97 | 98 | # NOTE: Right now, the first arguments has to be req 99 | async def execute(req: extension.schema_class): 100 | print("Executing extension:", extension.name, req) 101 | # req = extension.schema_class.parse_obj(req) 102 | result = await extension.execute(req) 103 | return convert_to_dict(result) 104 | 105 | execute.__name__ = extension.name 106 | 107 | if extension.get_schema: 108 | execute.__doc__ = schema['description'] 109 | 110 | if not execute.__doc__: 111 | # if extension.execute is partial 112 | if hasattr(extension.execute, "func"): 113 | execute.__doc__ = extension.execute.func.__doc__ or extension.description 114 | else: 115 | execute.__doc__ = extension.execute.__doc__ or extension.description 116 | return schema_tool(execute) -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 |
25 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /bioimageio_chatbot/chatbot_extensions/__init__.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import re 3 | import pkgutil 4 | import importlib.util 5 | from pydantic import BaseModel 6 | from bioimageio_chatbot.utils import ChatbotExtension 7 | from bioimageio_chatbot.jsonschema_pydantic import json_schema_to_pydantic_model 8 | from schema_agents import schema_tool 9 | 10 | def get_builtin_extensions(): 11 | extensions = [] 12 | for module in pkgutil.walk_packages(__path__, __name__ + '.'): 13 | if module.name.endswith('_extension'): 14 | if hasattr(module.module_finder, 'find_module'): 15 | ext_module = module.module_finder.find_module(module.name).load_module(module.name) 16 | else: 17 | # for newer python versions, find_spec is used instead of find_module 18 | module_spec = importlib.util.find_spec(module.name) 19 | ext_module = importlib.util.module_from_spec(module_spec) 20 | module_spec.loader.exec_module(ext_module) 21 | exts = ext_module.get_extension() or [] 22 | if isinstance(exts, ChatbotExtension): 23 | exts = [exts] 24 | for ext in exts: 25 | if not isinstance(ext, ChatbotExtension): 26 | print(f"Failed to load chatbot extension: {module.name}.") 27 | continue 28 | if ext.id in [e.id for e in extensions]: 29 | raise ValueError(f"Extension name {ext.id} already exists.") 30 | extensions.append(ext) 31 | 32 | return extensions 33 | 34 | def convert_to_dict(obj): 35 | if isinstance(obj, BaseModel): 36 | return obj.model_dump() 37 | if isinstance(obj, dict): 38 | return {k: convert_to_dict(v) for k, v in obj.items()} 39 | if isinstance(obj, list): 40 | return [convert_to_dict(v) for v in obj] 41 | return obj 42 | 43 | def create_tool_name(ext_id, tool_id=""): 44 | text = f"{ext_id}_{tool_id}" 45 | text = text.replace("-", " ").replace("_", " ").replace(".", " ") 46 | words = re.findall(r'[A-Z]?[a-z]+|[A-Z]+(?=[A-Z]|$)|\d+', text) 47 | return ''.join(word if word.istitle() else word.capitalize() for word in words) 48 | 49 | def tool_factory(ext_id, tool_id, ext_tool, schema): 50 | input_model = json_schema_to_pydantic_model(schema) 51 | ext_tool.__name__ = create_tool_name(ext_id, tool_id) 52 | ext_tool.__doc__ = input_model.__doc__ 53 | return schema_tool(ext_tool, input_model=input_model) 54 | 55 | async def extension_to_tools(extension: ChatbotExtension): 56 | 57 | if extension.get_schema: 58 | schemas = await extension.get_schema() 59 | tools = [] 60 | for k in schemas: 61 | assert k in extension.tools, f"Tool `{k}` not found in extension `{extension.id}`." 62 | ext_tool = extension.tools[k] 63 | tool = tool_factory(extension.id, k, ext_tool, schemas[k]) 64 | tool.__tool_id__ = k 65 | tools.append(tool) 66 | else: 67 | tools = [] 68 | for k in extension.tools: 69 | ext_tool = extension.tools[k] 70 | ext_tool.__name__ = create_tool_name(extension.id, k) 71 | ext_tool.__tool_id__ = k 72 | tools.append(ext_tool) 73 | 74 | return tools 75 | 76 | async def main(): 77 | extensions = get_builtin_extensions() 78 | tools = [] 79 | for svc in extensions: 80 | tool = await extension_to_tools(svc) 81 | tools.append(tool) 82 | print(tools) 83 | 84 | if __name__ == "__main__": 85 | import json 86 | 87 | schema = { 88 | "type": "object", 89 | "title": "RunScript", 90 | "description": "description", 91 | "properties": { 92 | "script": { 93 | "type": "string", 94 | "description": "Python script to execute", 95 | }, 96 | "inputs": { 97 | "type": "array", 98 | "description": "Input objects to be restored into the script", 99 | "items": { 100 | "type": "string", 101 | "properties": { 102 | "key": { 103 | "type": "string", 104 | "description": "Key of the object from the store to be restored", 105 | }, 106 | "name": { 107 | "type": "string", 108 | "description": "Variable name of the object", 109 | } 110 | } 111 | } 112 | }, 113 | "outputs": { 114 | "type": "array", 115 | "description": "Objects produced by the script as outputs or for further use", 116 | "items": { 117 | "type": "string" 118 | } 119 | } 120 | }, 121 | "required": ["script", "outputs"], 122 | "allow_additional_properties": False, 123 | } 124 | 125 | model = json_schema_to_pydantic_model(schema) 126 | print(model) 127 | asyncio.run(main()) -------------------------------------------------------------------------------- /tests/test_chatbot_answer.py: -------------------------------------------------------------------------------- 1 | import os 2 | from bioimageio_chatbot.chatbot import create_customer_service, get_builtin_extensions, QuestionWithHistory, UserProfile 3 | from bioimageio_chatbot.evaluation import evaluate 4 | from schema_agents.schema import Message 5 | import json 6 | import pandas as pd 7 | import asyncio 8 | import pytest 9 | 10 | KNOWLEDGE_BASE_PATH = "./bioimageio-knowledge-base" 11 | builtin_extensions = get_builtin_extensions() 12 | extensions = [{key:value for key, value in ext.model_dump().items() if key in ["name", "description"]} for ext in builtin_extensions] 13 | customer_service = create_customer_service(builtin_extensions) 14 | 15 | dir_path = os.path.dirname(os.path.realpath(__file__)) 16 | 17 | @pytest.fixture 18 | def eval_questions(): 19 | 20 | eval_file = os.path.join(dir_path, "Minimal-Eval-Test-20240111.csv") 21 | if os.path.exists(eval_file): 22 | query_answer = pd.read_csv(eval_file) 23 | else: 24 | query_answer = pd.read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vTVgE2_eqBiAktHmg13jLrFrQJhbANkByY40f9vxptC6pShcjLzEuHzx93ATo0c0XcSYs9W1RRbaDdu/pub?gid=1280822572&single=true&output=csv") 25 | eval_index = range(1,10) 26 | query_answer = query_answer.iloc[eval_index] 27 | 28 | question_col = "Question" 29 | channel_id_col = "GT: Retrieved channel id" 30 | question_list = list(query_answer[question_col]) 31 | reference_answer_list = list(query_answer["GPT-4-turbo Answer (With Context)- GT"]) 32 | # ground_type = "Document Retrieval" 33 | # make it as list as the length equals to question_list 34 | # ground_type_list = [ground_type] * len(question_list) 35 | channel_id_list_gt = list(query_answer[channel_id_col]) 36 | return question_list, reference_answer_list, channel_id_list_gt 37 | 38 | 39 | async def validate_chatbot_answer(question, reference_answer, use_tools_gt, channel_id_gt, relevance_gt, similary_score_gt): 40 | chat_history=[] 41 | profile = UserProfile(name="", occupation="", background="") 42 | 43 | m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), chatbot_extensions=extensions) 44 | resp = await customer_service.handle(Message(content=m.model_dump_json(), data=m , role="User")) 45 | # use_tools =resp[0].data.steps[0].details["use_tools"] 46 | # assert use_tools == use_tools_gt 47 | # execute_tool = resp[0].data.steps[1].name 48 | # # get the string after 'Execute: ' 49 | # channel_id = execute_tool.split(": ")[1] 50 | # assert channel_id == channel_id_gt+"(docs)" 51 | 52 | # eval score 53 | # relevance = resp[0].data.steps[-1].details["relevant"] 54 | # assert relevance == relevance_gt 55 | chatbot_answer = resp[0].data.steps[-1].details['details'][0]['response'] 56 | similary_score = await evaluate(question, reference_answer, chatbot_answer) 57 | assert similary_score >= similary_score_gt 58 | 59 | 60 | @pytest.mark.asyncio 61 | async def test_chatbot1(eval_questions): 62 | 63 | # await validate_chatbot_answer( 64 | # question="What is deepImageJ?", 65 | # reference_answer="DeepImageJ is a user-friendly plugin designed to facilitate the utilization of pre-trained neural networks within ImageJ and Fiji. It serves as a bridge between developers of deep-learning models and end-users in life-science applications, promoting the sharing of trained models across research groups. DeepImageJ is particularly valuable in various imaging domains and does not necessitate deep learning expertise or programming skills.", 66 | # use_tools_gt=True, 67 | # channel_id_gt="deepimagej(docs)", 68 | # relevance_gt=True, 69 | # similary_score_gt=4.0 70 | # ) 71 | 72 | # await validate_chatbot_answer( 73 | # question="What is a Bioimage Model Zoo community partner?", 74 | # reference_answer="A BioImage Model Zoo community partner is an organization, company, research group, or software team that can consume and/or produce resources of the BioImage.IO model zoo. These partners continuously and openly contribute resources of their own, and they can participate in the decision-making process of the model specification. Additionally, they can show their logo in BioImage.IO, connect CI to automatically test new model compatibility with their software, and use other infrastructure features provided by BioImage.IO. The community partners can host their own Github repository for storing models and other relevant resources, which are then dynamically linked to the central repository of BioImage.IO. Each community partner is responsible for maintaining the resources that are relevant.", 75 | # use_tools_gt=True, 76 | # channel_id_gt="bioimage.io(docs)", 77 | # relevance_gt=True, 78 | # similary_score_gt=4.0 79 | # ) 80 | 81 | questions, reference_answers, channel_id_list_gt = eval_questions 82 | for question, reference_answer, channel_id_gt in zip(questions, reference_answers, channel_id_list_gt): 83 | await validate_chatbot_answer( 84 | question=question, 85 | reference_answer=reference_answer, 86 | use_tools_gt=True, 87 | channel_id_gt=channel_id_gt, 88 | relevance_gt=True, 89 | similary_score_gt=80 90 | ) -------------------------------------------------------------------------------- /bioimageio_chatbot/quota.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import time 3 | 4 | class QuotaManager: 5 | def __init__(self, db_file=':memory:', vip_list=None, default_quota=1.0, default_reset_period='daily'): 6 | self.db_file = db_file 7 | self.conn = sqlite3.connect(self.db_file) 8 | self.vip_list = vip_list or [] 9 | self.default_quota = default_quota 10 | self.default_reset_period = self.period_to_seconds(default_reset_period) 11 | self._setup_database() 12 | 13 | def _setup_database(self): 14 | cursor = self.conn.cursor() 15 | cursor.execute(''' 16 | CREATE TABLE IF NOT EXISTS quotas ( 17 | user_id TEXT PRIMARY KEY, 18 | quota REAL, 19 | last_reset REAL, 20 | reset_period INTEGER, -- Reset period in seconds 21 | max_quota REAL 22 | ) 23 | ''') 24 | self.conn.commit() 25 | 26 | def set_user_quota(self, user_id, max_quota, reset_period): 27 | reset_seconds = self.period_to_seconds(reset_period) 28 | cursor = self.conn.cursor() 29 | cursor.execute(''' 30 | INSERT OR REPLACE INTO quotas (user_id, quota, last_reset, reset_period, max_quota) 31 | VALUES (?, ?, ?, ?, ?) 32 | ''', (user_id, max_quota, time.time(), reset_seconds, max_quota)) 33 | self.conn.commit() 34 | 35 | def check_quota(self, user_id): 36 | if user_id in self.vip_list: 37 | return float('inf') 38 | 39 | cursor = self.conn.cursor() 40 | cursor.execute("SELECT quota, last_reset, reset_period, max_quota FROM quotas WHERE user_id = ?", (user_id,)) 41 | row = cursor.fetchone() 42 | if row: 43 | quota, last_reset, reset_period, max_quota = row 44 | if time.time() - last_reset >= reset_period: 45 | quota = max_quota 46 | last_reset = time.time() 47 | cursor.execute("UPDATE quotas SET quota = ?, last_reset = ? WHERE user_id = ?", 48 | (quota, last_reset, user_id)) 49 | self.conn.commit() 50 | else: 51 | quota, last_reset, reset_period, max_quota = self.default_quota, time.time(), self.default_reset_period, self.default_quota 52 | cursor.execute("INSERT INTO quotas (user_id, quota, last_reset, reset_period, max_quota) VALUES (?, ?, ?, ?, ?)", 53 | (user_id, quota, last_reset, reset_period, max_quota)) 54 | self.conn.commit() 55 | return quota 56 | 57 | def use_quota(self, user_id, amount): 58 | if self.check_quota(user_id) >= amount: 59 | cursor = self.conn.cursor() 60 | cursor.execute("UPDATE quotas SET quota = quota - ? WHERE user_id = ?", 61 | (amount, user_id)) 62 | self.conn.commit() 63 | return True 64 | else: 65 | return False 66 | 67 | def reset_quota(self, user_id): 68 | cursor = self.conn.cursor() 69 | cursor.execute("UPDATE quotas SET quota = max_quota, last_reset = ? WHERE user_id = ?", 70 | (time.time(), user_id)) 71 | self.conn.commit() 72 | 73 | def period_to_seconds(self, period): 74 | if period == 'monthly': 75 | return 30 * 86400 76 | elif period == 'weekly': 77 | return 7 * 86400 78 | elif period == 'daily': 79 | return 86400 80 | elif period == 'hourly': 81 | return 3600 82 | else: 83 | raise ValueError("Invalid period. Choose from 'daily', 'weekly', 'monthly'.") 84 | 85 | if __name__ == '__main__': 86 | # Testing the functionality with asserts 87 | quota_manager = QuotaManager(vip_list=['userVIP'], default_quota=2.0, default_reset_period='daily') 88 | # quota_manager.set_user_quota('user123', 1.0, 'daily') 89 | quota_manager.set_user_quota('user234', 10.0, 'weekly') 90 | quota_manager.set_user_quota('user345', 30.0, 'monthly') 91 | 92 | # Assert initial quotas are set correctly 93 | assert quota_manager.check_quota('userVIP') == float('inf'), "VIP user quota should be infinite" 94 | assert quota_manager.check_quota('user123') == 2.0, "user123 initial quota should be 2.0" 95 | assert quota_manager.check_quota('user234') == 10.0, "user234 initial quota should be 10.0" 96 | assert quota_manager.check_quota('user345') == 30.0, "user345 initial quota should be 30.0" 97 | 98 | # Assert usage and quota management 99 | assert quota_manager.use_quota('user123', 0.5) == True, "Should allow using 0.5 quota" 100 | assert quota_manager.check_quota('user123') == 2-0.5, "user123 quota after use should be 2-0.5" 101 | assert quota_manager.use_quota('user123', 3.6) == False, "Should not allow using 3.6 quota (not enough left)" 102 | assert quota_manager.use_quota('userVIP', 1000) == True, "VIP should always be allowed to use quota" 103 | assert quota_manager.check_quota('user234') == 10.0, "user234 should still have full quota" 104 | 105 | # Test manual reset 106 | quota_manager.reset_quota('user123') 107 | assert quota_manager.check_quota('user123') == 2.0, "user123 should have full quota after reset" 108 | 109 | # Display final test results 110 | print("Tests completed successfully.") 111 | -------------------------------------------------------------------------------- /bioimageio_chatbot/chatbot_extensions/web_search_extension/langchain_websearch.py: -------------------------------------------------------------------------------- 1 | import re 2 | import asyncio 3 | from typing import Union 4 | 5 | import httpx 6 | from bs4 import BeautifulSoup 7 | from langchain_community.document_transformers import EmbeddingsRedundantFilter 8 | from langchain.retrievers.document_compressors import DocumentCompressorPipeline 9 | from langchain.retrievers.ensemble import EnsembleRetriever 10 | from langchain_openai import OpenAIEmbeddings # HuggingFaceEmbeddings 11 | from langchain.text_splitter import RecursiveCharacterTextSplitter 12 | from langchain_community.vectorstores import FAISS 13 | from langchain.retrievers.document_compressors.embeddings_filter import EmbeddingsFilter 14 | from langchain.retrievers import ContextualCompressionRetriever 15 | from langchain.schema import Document 16 | try: 17 | from langchain_community.retrievers import BM25Retriever 18 | except ImportError: 19 | BM25Retriever = None 20 | 21 | 22 | class LangchainCompressor: 23 | 24 | def __init__(self, device="cuda"): 25 | self.embeddings = OpenAIEmbeddings() # HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2", model_kwargs={"device": device}) 26 | self.spaces_regex = re.compile(r" {3,}") 27 | 28 | def preprocess_text(self, text: str) -> str: 29 | text = text.replace("\n", " \n") 30 | text = self.spaces_regex.sub(" ", text) 31 | text = text.strip() 32 | return text 33 | 34 | async def faiss_embedding_query_urls(self, query: str, url_list: list[str], num_results: int = 5, 35 | similarity_threshold: float = 0.5, chunk_size: int = 500) -> list[Document]: 36 | html_url_tuples = [] 37 | 38 | # Creating a list of tasks for each URL 39 | tasks = [download_html(url) for url in url_list] 40 | 41 | # Using asyncio.gather to run all tasks concurrently 42 | results = await asyncio.gather(*tasks, return_exceptions=True) 43 | 44 | # Processing results and exceptions 45 | for result, url in zip(results, url_list): 46 | if isinstance(result, Exception): 47 | print(f'LLM_Web_search | An exception occurred for {url}: {result}') 48 | else: 49 | html_url_tuples.append((result, url)) 50 | 51 | if not html_url_tuples: 52 | return [] 53 | 54 | documents = [html_to_plaintext_doc(html, url) for html, url in html_url_tuples] 55 | 56 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=10, 57 | separators=["\n\n", "\n", ".", ", ", " ", ""]) 58 | split_docs = text_splitter.split_documents(documents) 59 | # filtered_docs = pipeline_compressor.compress_documents(documents, query) 60 | faiss_retriever = FAISS.from_documents(split_docs, self.embeddings).as_retriever( 61 | search_kwargs={"k": num_results} 62 | ) 63 | if not BM25Retriever: 64 | raise ImportError("Could not import BM25Retriever. Please ensure that you have installed " 65 | "langchain==0.0.352") 66 | 67 | # This sparse retriever is good at finding relevant documents based on keywords, 68 | # while the dense retriever is good at finding relevant documents based on semantic similarity. 69 | bm25_retriever = BM25Retriever.from_documents(split_docs, preprocess_func=self.preprocess_text) 70 | bm25_retriever.k = num_results 71 | 72 | redundant_filter = EmbeddingsRedundantFilter(embeddings=self.embeddings) 73 | embeddings_filter = EmbeddingsFilter(embeddings=self.embeddings, k=None, 74 | similarity_threshold=similarity_threshold) 75 | pipeline_compressor = DocumentCompressorPipeline( 76 | transformers=[redundant_filter, embeddings_filter] 77 | ) 78 | 79 | compression_retriever = ContextualCompressionRetriever(base_compressor=pipeline_compressor, 80 | base_retriever=faiss_retriever) 81 | 82 | ensemble_retriever = EnsembleRetriever( 83 | retrievers=[bm25_retriever, compression_retriever], weights=[0.4, 0.5] 84 | ) 85 | 86 | compressed_docs = await ensemble_retriever.aget_relevant_documents(query) 87 | 88 | # Ensemble may return more than "num_results" results, so cut off excess ones 89 | return compressed_docs[:num_results] 90 | 91 | 92 | async def download_html(url: str) -> bytes: 93 | headers = { 94 | "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0", 95 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", 96 | "Accept-Language": "en-US,en;q=0.5" 97 | } 98 | 99 | async with httpx.AsyncClient() as client: 100 | response = await client.get(url, headers=headers, timeout=8) 101 | response.raise_for_status() 102 | 103 | content_type = response.headers.get("Content-Type", "") 104 | if not content_type.startswith("text/html"): 105 | raise ValueError(f"Expected content type text/html. Got {content_type}.") 106 | return response.content 107 | 108 | def html_to_plaintext_doc(html_text: Union[str, bytes], url: str) -> Document: 109 | soup = BeautifulSoup(html_text, features="lxml") 110 | for script in soup(["script", "style"]): 111 | script.extract() 112 | 113 | strings = '\n'.join([s.strip() for s in soup.stripped_strings]) 114 | webpage_document = Document(page_content=strings, metadata={"source": url}) 115 | return webpage_document 116 | -------------------------------------------------------------------------------- /bioimageio_chatbot/chatbot_extensions/vision_extension.py: -------------------------------------------------------------------------------- 1 | from bioimageio_chatbot.utils import ChatbotExtension 2 | from openai import AsyncOpenAI 3 | from schema_agents import schema_tool 4 | import base64 5 | from pydantic import Field, BaseModel 6 | from typing import Optional, List 7 | import httpx 8 | from PIL import Image 9 | from io import BytesIO 10 | import matplotlib.pyplot as plt 11 | # make sure matplotlib is operating headless (no GUI) 12 | plt.switch_backend("agg") 13 | 14 | # Function to encode the image 15 | def encode_image(image_path): 16 | with open(image_path, "rb") as image_file: 17 | return base64.b64encode(image_file.read()).decode('utf-8') 18 | 19 | async def aask(images, messages, max_tokens=1024): 20 | aclient = AsyncOpenAI() 21 | user_message = [] 22 | # download the images and save it into a list of PIL image objects 23 | img_objs = [] 24 | for image in images: 25 | async with httpx.AsyncClient() as client: 26 | response = await client.get(image.url) 27 | response.raise_for_status() 28 | try: 29 | img = Image.open(BytesIO(response.content)) 30 | except Exception as e: 31 | raise ValueError(f"Failed to read image {image.title or ''} from {image.url}. Error: {e}") 32 | img_objs.append(img) 33 | 34 | if len(img_objs) == 1: 35 | # plot the image with matplotlib 36 | plt.imshow(img_objs[0]) 37 | if images[0].title: 38 | plt.title(images[0].title) 39 | fig = plt.gcf() 40 | else: 41 | # plot them in subplots with matplotlib in a row 42 | fig, ax = plt.subplots(1, len(img_objs), figsize=(15, 5)) 43 | for i, img in enumerate(img_objs): 44 | ax[i].imshow(img) 45 | if images[0].title: 46 | ax[i].set_title(images[i].title) 47 | # save the plot to a buffer as png format and convert to base64 48 | buffer = BytesIO() 49 | fig.tight_layout() 50 | # if the image size (width or height) is smaller than 512, use the original size and aspect ratio 51 | # otherwise set the maximun width of the image to n*512 pixels, where n is the number of images; the maximum total width is 1024 pixels 52 | fig_width = min(1024, len(img_objs)*512, fig.get_figwidth()*fig.dpi) 53 | # make sure the pixel size (not inches) 54 | fig.set_size_inches(fig_width/fig.dpi, fig.get_figheight(), forward=True) 55 | 56 | # save fig 57 | fig.savefig(buffer, format="png") 58 | buffer.seek(0) 59 | base64_image = base64.b64encode(buffer.read()).decode("utf-8") 60 | # append the image to the user message 61 | user_message.append({ 62 | "type": "image_url", 63 | "image_url": { 64 | "url": f"data:image/png;base64,{base64_image}" 65 | } 66 | }) 67 | 68 | 69 | for message in messages: 70 | assert isinstance(message, str), "Message must be a string." 71 | user_message.append({"type": "text", "text": message}) 72 | 73 | response = await aclient.chat.completions.create( 74 | model="gpt-4o", 75 | messages=[ 76 | { 77 | "role": "system", 78 | "content": "You are a helpful AI assistant that help user to inspect the provided images visually based on the context, make insightful comments and answer questions about the provided images." 79 | }, 80 | { 81 | "role": "user", 82 | "content": user_message 83 | } 84 | ], 85 | max_tokens=max_tokens, 86 | ) 87 | return response.choices[0].message.content 88 | 89 | class ImageInfo(BaseModel): 90 | """Image information.""" 91 | url: str=Field(..., description="The URL of the image.") 92 | title: Optional[str]=Field(None, description="The title of the image.") 93 | 94 | @schema_tool 95 | async def inspect_tool(images: List[ImageInfo]=Field(..., description="A list of images to be inspected, each with a http url and title"), query: str=Field(..., description="user query about the image"), context_description: str=Field(..., description="describe the context for the visual inspection task")) -> str: 96 | """Inspect an image using GPT4-Vision.""" 97 | # assert image_url.startswith("http"), "Image URL must start with http." 98 | for image in images: 99 | assert image.url.startswith("http"), "Image URL must start with http." 100 | 101 | response = await aask(images, [context_description, query]) 102 | return response 103 | 104 | def get_extension(): 105 | return ChatbotExtension( 106 | id="vision", 107 | name="Vision Inspector", 108 | description="Perform visual inspection on images using GPT4-Vision model, used for describing images and answer image related questions. The images will be plotted using matplotlib and then sent to the GPT4-Vision model for inspection.", 109 | tools=dict( 110 | inspect=inspect_tool 111 | ) 112 | ) 113 | 114 | if __name__ == "__main__": 115 | import asyncio 116 | async def main(): 117 | extension = get_extension() 118 | print(await extension.tools["inspect"](images=[ImageInfo(url="https://bioimage.io/static/img/bioimage-io-icon.png", title="BioImage.io Icon"), ImageInfo(url="https://bioimage.io/static/img/bioimage-io-logo.png", title="BioImage.io Logo")], query="What are these?", context_description="Inspect the BioImage.io icon and logo.")) 119 | # test only one image 120 | # print(await extension.tools["inspect"](images=[ImageInfo(url="https://bioimage.io/static/img/bioimage-io-icon.png", title="BioImage.io Icon")], query="What is this?", context_description="Inspect the BioImage.io icon.")) 121 | # Run the async function 122 | asyncio.run(main()) -------------------------------------------------------------------------------- /bioimageio_chatbot/chatbot_extensions/bia_extension.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | from pydantic import BaseModel, Field 3 | from typing import Dict, Any, Optional 4 | from bioimageio_chatbot.utils import ChatbotExtension 5 | from schema_agents import schema_tool 6 | 7 | class BioImageArchiveClient: 8 | def __init__(self): 9 | self._base_url = "https://www.ebi.ac.uk/biostudies/api/v1" 10 | 11 | async def search_bioimage_archive(self, 12 | query: str = Field(..., description="The search query string."), 13 | pageSize: int = Field(10, gt=0, description="Number of search results per page."), 14 | page: int = Field(1, description="Page number of the search results."), 15 | sortOrder: Optional[str] = Field("descending", description="Sort order: ascending or descending.") 16 | ) -> Dict[str, Any]: 17 | """Search the BioImage Archive for studies and image datasets, returning a list of studies. The link format to each study in the results is: https://www.ebi.ac.uk/biostudies/bioimages/studies/{accession}.""" 18 | url = f"{self._base_url}/bioimages/search" 19 | params = { 20 | "query": query, 21 | "pageSize": pageSize, 22 | "page": page, 23 | "sortOrder": sortOrder 24 | } 25 | async with httpx.AsyncClient() as client: 26 | response = await client.get(url, params=params) 27 | response.raise_for_status() 28 | return self._simplify_search_results(response.json()) 29 | 30 | def _simplify_search_results(self, results: Dict[str, Any]) -> Dict[str, Any]: 31 | simplified_results = { 32 | "hits": [ 33 | { 34 | "title": hit["title"], 35 | "author": hit["author"], 36 | "content": hit["content"], 37 | "accession": hit["accession"] 38 | } for hit in results.get("hits", []) 39 | ], 40 | "totalHits": results.get("totalHits"), 41 | "page": results.get("page"), 42 | "pageSize": results.get("pageSize") 43 | } 44 | return simplified_results 45 | 46 | async def read_bioimage_archive_study(self, accession: str = Field(..., description="Accession number of the study.")) -> Dict[str, Any]: 47 | """Read detailed information about a specific study from the BioImage Archive, returning a simplified dictionary. The link format to the study is: https://www.ebi.ac.uk/biostudies/bioimages/studies/{accession}.""" 48 | url = f"{self._base_url}/studies/{accession}" 49 | async with httpx.AsyncClient() as client: 50 | response = await client.get(url) 51 | response.raise_for_status() 52 | return self._simplify_study_details(response.json()) 53 | 54 | def _simplify_study_details(self, study_details: Dict[str, Any]) -> Dict[str, Any]: 55 | # Initialize simplified details with placeholders for title and description 56 | simplified_details = { 57 | "title": "", 58 | "description": "", 59 | "accession": study_details.get("accno", ""), 60 | "link": f"https://www.ebi.ac.uk/biostudies/bioimages/studies/{study_details.get('accno', '')}", 61 | "authors": [] 62 | } 63 | 64 | # Extract title and description from the attributes array by name 65 | for attribute in study_details.get("section", {}).get("attributes", []): 66 | if attribute.get("name") == "Title": 67 | simplified_details["title"] = attribute.get("value", "") 68 | elif attribute.get("name") == "Description": 69 | simplified_details["description"] = attribute.get("value", "") 70 | 71 | # Extracting author information 72 | author_subsections = [sub for sub in study_details.get("section", {}).get("subsections", []) if sub.get("type") == "Author"] 73 | for author in author_subsections: 74 | author_attributes = {attr["name"]: attr["value"] for attr in author.get("attributes", [])} 75 | simplified_details["authors"].append(author_attributes.get("Name", "")) 76 | 77 | return simplified_details 78 | 79 | 80 | 81 | def get_extension(): 82 | bioimage_archive_client = BioImageArchiveClient() 83 | search_tool = schema_tool(bioimage_archive_client.search_bioimage_archive) 84 | read_tool = schema_tool(bioimage_archive_client.read_bioimage_archive_study) 85 | 86 | async def get_schema(): 87 | return { 88 | "search": search_tool.input_model.schema(), 89 | "read": read_tool.input_model.schema(), 90 | } 91 | 92 | return ChatbotExtension( 93 | id="bioimage_archive", 94 | name="Search BioImage Archive", 95 | description="Search for biological images related studies in the BioImage Archive, it provide studies and image datasets related to microscopy images and other imaging modalities.", 96 | get_schema=get_schema, # This is optional, exists only for testing purposes 97 | tools=dict( 98 | search=search_tool, 99 | read=read_tool 100 | ) 101 | ) 102 | 103 | if __name__ == "__main__": 104 | import asyncio 105 | async def main(): 106 | bioimage_archive_client = BioImageArchiveClient() 107 | # Example to search in BioImage Archive with simplified results 108 | search_results = await bioimage_archive_client.search_bioimage_archive(query="cells", pageSize=1) 109 | print(search_results) 110 | 111 | # Example to read a specific study from BioImage Archive with simplified details 112 | study_details = await bioimage_archive_client.read_bioimage_archive_study(accession="S-BSST314") 113 | print(study_details) 114 | 115 | # Run the async function 116 | asyncio.run(main()) 117 | -------------------------------------------------------------------------------- /docs/technical-overview.md: -------------------------------------------------------------------------------- 1 | # Design and Functionality of BioImage.IO Chatbot: A User Guide and Technical Overview 2 | 3 | ## Chatbot Interface 4 | 5 | After following the installation guidelines from the [README](/README.md), the chat interface will resemble Figure 1. 6 | 7 | ![BioImage.IO-Chatbot](./screenshots/chat-interface.png) 8 | *Figure 1. The chat interface of the BioImage.IO Chatbot.* 9 | 10 | Users can input their profiles as depicted in Figure 2. 11 | ![user-profile](./screenshots/user-profile.png) 12 | *Figure 2. Users can personalize responses by clicking `Edit Profile` and save their settings for future conversations by clicking `Save`.* 13 | 14 | As of today, our chatbot integrates 6 extensions including document search in bioimage.io knowledge base, tools search on Bioimage Informatics Index (biii.eu), bioimage topics search in Bioimage Archive and Image.cs Forum, web search, and information search in Bioimage Model Zoo. The document search utilizes knowledge bases from the following pivotal communities: bioimage.io [2], Imjoy [3], deepimageJ [4], ImageJ [5], bio.tools [6], and scikit-image [7]. We also allow users to specify a preferred extension for information retrieval, as shown in Figure 3. If an extension is designated, the chatbot sources information using the specific extension and its corresponding source. Otherwise, it uses an intelligent selection process driven by a schema-based agent to choose the most relevant extension based on the user's query. 15 | 16 | ![channels](./screenshots/extensions.png) 17 | *Figure 3. Users can personalize the conversation by selecting a specific channel from the ‘Knowledge Base Channel’.* 18 | 19 | ### Building the Knowledge Base 20 | 21 | The knowledge base is efficiently and collaboratively constructed by downloading documentation from given URLs. These can be repositories, PDFs, or other forms of documentation. We use a regular expression splitter to segment the documentation into manageable chunks for efficient and accurate retrieval. These chunks are then embedded and stored as vectors in a FAISS [1]-based vector database. 22 | 23 | ## Schema-Based Agent Design 24 | 25 | The chatbot's ability to understand and respond to user queries is substantially improved by employing a schema-based agent design. Unlike traditional context-based models, our approach utilizes predefined schemas to guide the conversation and information retrieval process. 26 | 27 | The schema-based agent operates on the function-call LLM [8], and uses input and output schemas to generate text output. Within this implementation, we construct a customer service chatbot by defining a role class, as shown in Figure 4. 28 | 29 | ![role_create](./screenshots/role_create.png) 30 | *Figure 4. Creation of a chatbot role class named ‘CustomerServiceRole’ by defining fields of the role class.* 31 | 32 | ## Extensions 33 | The BioImage.IO Chatbot employs diverse methods to generate responses, currently encompassing five distinct response modes. The response mode is chosen by the schema-based agent based on the user's query and the selected channel. 34 | 35 | ### Search BioImage Docs 36 | This extension allows the chatbot to search information in a community-driven bioimage related knowledge base. With a specific query, the chatbot extracts essential elements from the user's question to fetch information from the relevant documentation. 37 | ![direct-response](./screenshots/search-bioimage-docs.png) 38 | *Figure 6. Search in Bioimage Knolwedge base documentation.* 39 | 40 | ### Search BioImage Information Index (biii.eu) 41 | This extension allows the chatbot to search online software tool in biii.eu. 42 | ![search-biii](./screenshots/search-biii.png) 43 | *Figure 7. Search in biii.eu.* 44 | 45 | The process begins with an initial response based on the user's query (`request`), which serves as a foundation for generating a new `query` for targeted information retrieval. This is combined with user profile data (`user_info`) and the query to produce a comprehensive final response. 46 | 47 | ### Search Bioimage Archive 48 | This extension allows the chatbot to search for dataset index in bioimage archive. 49 | ![search-bioimage-archive](./screenshots/search-bioimage-archive.png) 50 | *Figure 8. Search in bioimage archive.* 51 | 52 | ### Search image.sc Forum 53 | This extension allows the chatbot to search bioimage related topics and software issues in the image.sc forum. 54 | ![search-image-sc](./screenshots/search-image-forum.png) 55 | *Figure 9. Search in image.sc forum.* 56 | 57 | ### Search Web 58 | This extension allows the chatbot to search for information from the web. This extension is triggered while the chatbot realizes it can not find relevant information from the knowledge base. 59 | ![search-web](./screenshots/web-search.png) 60 | *Figure 10. Search in the web.* 61 | 62 | 63 | ### BioImage Model Zoo 64 | This mode is designed for queries requiring detailed model information or specific actions, generating and executing Python scripts for tailored solutions. 65 | ![script-gen-exe-retrieval](./screenshots/search-model-zoo.png) 66 | *Figure 11. Scripting retrieval for complex queries.* 67 | 68 | It involves creating a `ModelZooInfoScript` schema with fields like `request`, `user info`, and `script`, where `script` is Python code for API interactions or data manipulation. The final response is formulated by integrating the script's output with the `request` and `user info`. 69 | 70 | ## References 71 | 72 | 1. [FAISS](https://engineering.fb.com/2017/03/29/data-infrastructure/faiss-a-library-for-efficient-similarity-search/) 73 | 2. [Bioimage.io](https://bioimage.io/docs/#/) 74 | 3. [Imjoy](https://imjoy.io/docs/#/) 75 | 4. [DeepImageJ](https://deepimagej.github.io/) 76 | 5. [ImageJ](https://imagej.net) 77 | 6. [bio.tools](https://bio.tools) 78 | 7. [scikit-image](https://scikit-image.org/docs/stable/) 79 | 8. [Function-Calling API](https://openai.com/blog/function-calling-and-other-api-updates) 80 | 9. [CellPose](https://www.cellpose.org) 81 | -------------------------------------------------------------------------------- /tests/test_chatbot.py: -------------------------------------------------------------------------------- 1 | import os 2 | from bioimageio_chatbot.chatbot import create_assistants, get_builtin_extensions, QuestionWithHistory, UserProfile 3 | from schema_agents.schema import Message 4 | import pytest 5 | 6 | KNOWLEDGE_BASE_PATH = "./bioimageio-knowledge-base" 7 | 8 | @pytest.fixture 9 | def builtin_extensions(): 10 | return get_builtin_extensions() 11 | 12 | @pytest.fixture 13 | def melman(builtin_extensions): 14 | assistants = create_assistants(builtin_extensions) 15 | # find an assistant name Melman 16 | m = [assistant for assistant in assistants if assistant['name'] == "Melman"][0] 17 | return m['agent'] 18 | 19 | @pytest.mark.asyncio 20 | async def test_chatbot(builtin_extensions, melman): 21 | select_extensions = [ 22 | {"id": "bioimage_archive"} 23 | ] 24 | chat_history=[] 25 | question = "Which tool can I use to analyse western blot image?" 26 | profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI") 27 | m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions) 28 | resp = await melman.handle(Message(content="", data=m , role="User")) 29 | assert resp 30 | str_resp = [str(element) for element in resp] 31 | assert any(["BioimageArchiveSearch" in element for element in str_resp]) 32 | 33 | question = "Which tool can I use to segment an cell image?" 34 | profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI") 35 | m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions) 36 | resp = await melman.handle(Message(content="", data=m , role="User")) 37 | assert resp 38 | str_resp = [str(element) for element in resp] 39 | assert any(["BioimageArchiveSearch" in element for element in str_resp]) 40 | 41 | question = "How can I test the models?" 42 | profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI") 43 | m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions) 44 | resp = await melman.handle(Message(content="", data=m , role="User")) 45 | assert resp 46 | str_resp = [str(element) for element in resp] 47 | assert any(["BioimageArchiveSearch" in element for element in str_resp]) 48 | 49 | question = "What are Model Contribution Guidelines?" 50 | m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions) 51 | resp = await melman.handle(Message(content="", data=m , role="User")) 52 | assert resp 53 | str_resp = [str(element) for element in resp] 54 | assert any(["BioimageArchiveSearch" in element for element in str_resp]) 55 | 56 | 57 | # test biii extension 58 | select_extensions = [ 59 | {"id": "biii"} 60 | ] 61 | question = "What bioimage analysis tools are available for quantifying cell migration?" 62 | profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI") 63 | m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions) 64 | resp = await melman.handle(Message(content="", data=m , role="User")) 65 | assert resp 66 | str_resp = [str(element) for element in resp] 67 | assert any(["BiiiSearch" in element for element in str_resp]) 68 | 69 | question = "Are there any workflows on biii.eu for 3D reconstruction of neuronal networks from electron microscopy images?" 70 | profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI") 71 | m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions) 72 | resp = await melman.handle(Message(content="", data=m , role="User")) 73 | assert resp 74 | str_resp = [str(element) for element in resp] 75 | assert any(["BiiiSearch" in element for element in str_resp]) 76 | 77 | 78 | # test image_sc extension 79 | select_extensions = [ 80 | {"id": "image_sc_forum"} 81 | ] 82 | question = "I got a problem, StarDist stops working! help me find it in image.sc forum." 83 | profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI") 84 | m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions) 85 | resp = await melman.handle(Message(content="", data=m , role="User")) 86 | assert resp 87 | # make resp a string 88 | resp = [str(element) for element in resp] 89 | assert any(["ImageScForumSearch" in element for element in resp]) 90 | assert any(['''posts":''' in element for element in resp]) 91 | 92 | 93 | # test web extension 94 | select_extensions = [ 95 | {"id": "web"} 96 | ] 97 | question = "I want to know more about the BioImage Archive" 98 | profile = UserProfile(name="lulu", occupation="data scientist", background="machine learning and AI") 99 | m = QuestionWithHistory(question=question, chat_history=chat_history, user_profile=UserProfile.model_validate(profile), channel_id=None, chatbot_extensions=select_extensions) 100 | resp = await melman.handle(Message(content="", data=m , role="User")) 101 | assert resp 102 | str_resp = [str(element) for element in resp] 103 | assert any(["WebSearch" in element for element in str_resp]) 104 | assert any(['''"content": ''' in element for element in str_resp]) 105 | -------------------------------------------------------------------------------- /docs/figure-2-use-cases.md: -------------------------------------------------------------------------------- 1 | # Reproducing Example Usage Scenarios of the BioImage.IO Chatbot Figure 2 2 | 3 | This section provides detailed instructions for reproducing the example usage scenarios of the BioImage.IO Chatbot illustrated in Figure 2 of the main text: 4 | 5 | 6 | 7 | 8 | These steps will guide users through querying documents, utilizing online services, executing AI models, and developing extensions. 9 | 10 | ## Access the BioImage.IO Chatbot Interface 11 | Launch the chatbot through the BioImage.IO website [here](https://bioimage.io/chat/) or use the dedicated user interface. 12 | 13 | ## Video for Reproducing the Scenarios 14 | * **[A video showcasing information retrieval (as described in senario a-c)](https://zenodo.org/records/10967840/files/Supplementary-Video-1-bioimageio-chatbot-information-retrieval.mp4?download=1)** 15 | * **[A video showcasing AI model execution (as described in senario d)](https://zenodo.org/records/10967840/files/Supplementary-Video-2-bioimageio-chatbot-ai-image-analysis.mp4?download=1)** 16 | 17 | 18 | ### Scenario (a): Querying Bioimage Analysis Documentation 19 | 20 | - **Initiate a Query**: Type a question related to bioimage analysis, e.g., "What are the best practices for optimizing model performance on bioimage.io?" 21 | - **Review the Chatbot's Response**: The chatbot will provide an answer that includes information extracted from the BioImage Model Zoo documentation. 22 | 23 | ### Scenario (b): Exploring the Human Protein Atlas 24 | 25 | - **Initiate a Query**: Ask the chatbot to find protein information in the Human Protein Atlas by typing "Tell me about PML protein and show me the cell images" 26 | - **Interpret the Results**: The chatbot will respond by constructing an API call to the Protein Atlas database and displaying the relevant information about the PML protein, including cell images. 27 | 28 | ### Scenario (c): Querying the BioImage Archive 29 | 30 | - **Initiate a Query**: Ask the chatbot to find cell images at the G1 phase by typing "Please, find datasets of cell images at G1 phase." 31 | - **Interpret the Results**: The chatbot will initiate an API call to the BioImage Archive server, and return results such as a study titled "DeepCycle: Deep learning reconstruction of the closed cell cycle trajectory from single-cell unsegmented microscopy images." 32 | 33 | ### Scenario (d): Running AI Models for Image Analysis 34 | 35 | - **Prereqsitues**: Ensure you have Chrome or a Chromium-based browser installed on your computer. 36 | - **Download Image Data**: Begin by creating a new folder on your computer named `test-images`. Download the image data file from [this link](https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/main/docs/example-data/nuclei.tif) and save it into the `test-images` folder. 37 | - **Initiate Bioimage Analyst**: Navigate to the BioImage.IO chatbot interface at https://bioimage.io/chat/. Note that only Chrome or Chromium-based browser is supported at the moment. Select "Bioimage Analyst(Bridget)" located in the upper right corner of the chatbot interface. 38 | - **Mount your Data Folder**: Within the chat interface, click on the "Mount Files" button located below the dialog window. This action will allow you to mount the test-image folder that contains your downloaded image data. The chatbot will confirm the successful mounting of the folder, you can now ask it to list the files contained within, and ensuring that your data is ready for analysis. 39 | - **Perform segmentation using Cellpose model**: Type "Segment the image `/mnt/nuclei.tif` using Cellpose" to run the Cellpose model on the image data. Upon successful execution of the model, the chatbot will notify you that the segmentation process is complete and will display the analyzed results. Optionally, you can ask it to "count the number of nuclei in the image" if successfully segmented, "plot the size distribution of nuclei", or you can tell it to "use the visual inspection tool to analyze the figure and create a report about the size distribution". 40 | 41 | ### Scenario (e): Developing New Extensions 42 | 43 | Follow the steps below to develop a new extension for microscope stage control and image capture. For a detailed tutorial, visit our [GitHub repository](https://github.com/bioimage-io/bioimageio-chatbot/blob/main/docs/bioimage-chatbot-extension-tutorial.ipynb) or access the Jupyter Notebook directly through ImJoy [here](https://imjoy-notebook.netlify.app/lab/index.html?load=https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/main/docs/bioimage-chatbot-extension-tutorial.ipynb&open=1). 44 | 45 | ### Scenario (f): Controlling a Microscope Stage and Capturing Images 46 | 47 | - **Pre-requisites**: You will need a microscope and the squid control software 48 | 49 | - **Create microscope extension**: Following the example in the above [chatbot extension example notebook](https://imjoy-notebook.netlify.app/lab/index.html?load=https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/main/docs/bioimage-chatbot-extension-tutorial.ipynb&open=1), create a hypha service extension for controlling the microscope: 50 | 1. **Setup the Developer Environment**: Open a Jupyter Notebook. Install and import the `imjoy_rpc`, `hypha_rpc` and `pydantic` packages. 51 | 2. **Define Input Schemas**: Create classes for `MoveStageInput` and `SnapImageInput` to structure the user input. (Note: To help the chatbot understand the "center", you will need to tell the chatbot about the boundaries of the stage via the docstring of the `MoveStageInput` class) 52 | 3. **Implement Control Functions**: Write asynchronous functions `move_stage` and `snap_image`. 53 | 4. **Setup Extension Interface**: Develop the extension interface and define a schema getter function. 54 | 5. **Register the Extension**: Register the extension as hypha server and connect to the the chatbot. 55 | - **Initiate a Query**: Ask the chatbot to "Please move to the center and snap an image". 56 | - **Interpret the Results**: The chatbot will execute the `move_stage` function to move the microscope stage to the center and then capture an image using the `snap_image` function. The chatbot will confirm the successful completion of the tasks. 57 | -------------------------------------------------------------------------------- /bioimageio_chatbot/chatbot_extensions/hpa_extension.py: -------------------------------------------------------------------------------- 1 | from bioimageio_chatbot.utils import ChatbotExtension 2 | from schema_agents import schema_tool 3 | from pydantic import Field, BaseModel 4 | from typing import Optional, List, Dict, Any 5 | import pandas as pd 6 | from pathlib import Path 7 | import requests 8 | import re 9 | import os 10 | from bioimageio_chatbot.utils import download_file 11 | 12 | class HPAClient: 13 | def __init__(self): 14 | self._base_url = 'https://www.proteinatlas.org/download/proteinatlas.tsv.zip' 15 | folder = Path('./data') 16 | file_path = os.path.join(folder, 'proteinatlas.tsv.zip') 17 | # firstly check if the data is already downloaded in the /data folder 18 | if not os.path.exists(file_path): 19 | os.makedirs(folder, exist_ok=True) 20 | # download the data 21 | download_file(self._base_url, file_path) 22 | # Load and preprocess data at startup 23 | self.data = pd.read_csv(file_path, delimiter='\t') 24 | # Convert all textual data to lowercase strings for faster case-insensitive searching 25 | self.preprocessed_data = self.data.apply(lambda x: x.astype(str).str.lower()) 26 | 27 | async def search_hpa(self, 28 | query: str = Field(..., description="Enter gene names, functions, or disease terms to search in the Human Protein Atlas."), 29 | limitSize: int = Field(10, gt=0, description="Number of returned items per search.") 30 | ) -> Dict[str, Any]: 31 | """Search the Human Protein Atlas for proteins based on a query string, return the top search results.""" 32 | query = query.lower() 33 | 34 | # Search for the query in the preprocessed data 35 | query_results = self.preprocessed_data.apply(lambda x: x.str.contains(query)).sum(axis=1) 36 | query_results = query_results.sort_values(ascending=False) 37 | query_results = query_results.head(limitSize) 38 | 39 | selected_columns = ['Gene', 'Gene synonym', 'Ensembl', 40 | 'Gene description', 'Subcellular location', 'Subcellular main location', 'Subcellular additional location', 41 | 'Biological process', 'Molecular function', 'Uniprot', 'Antibody', 42 | 'Disease involvement', 'Secretome function', 'CCD Protein', 'CCD Transcript', 43 | 'Evidence', 'Protein class'] 44 | 45 | info_list = [] 46 | for index in query_results.index: 47 | items = self.data.loc[index, selected_columns] 48 | info_list.append(items.to_dict()) 49 | return info_list 50 | 51 | async def read_protein_info(self, 52 | ensembl: str = Field(..., description="Ensembl ID of the protein.") 53 | )-> Dict[str, Any]: 54 | """Get detailed information about a protein from the Human Protein Atlas.""" 55 | json_link = f"https://www.proteinatlas.org/{ensembl}.json" 56 | response = requests.get(json_link) 57 | # check if the request was successful 58 | response.raise_for_status() 59 | # return the content 60 | return response.json() 61 | 62 | 63 | 64 | async def get_cell_image(self, 65 | gene: str = Field(..., description="Gene name of the protein."), 66 | ensembl: str = Field(..., description="Ensembl ID of the protein."), 67 | section: str = Field("subcellular", description="Section of the Human Protein Atlas to search for the protein. Valid options are 'subcellular', 'tissue',") 68 | ) -> List[str]: 69 | """Retrieve a list of cell image links from the Human Protein Atlas, where a specific protein is tagged in the green channel. 70 | ALWAYS render the result thumbnail images as a horizatal table and create link (format: `[![](http://..._thumb.jpg)](http://....jpg)`) to the full-size image without the '_thumb' suffix.""" 71 | link_name = f"{ensembl}-{gene}" 72 | http_link = f"https://www.proteinatlas.org/{link_name}/{section}" 73 | # read the source code of the page 74 | response = requests.get(http_link) 75 | if '

Not available

' in response.text: 76 | return 'No cell image available.' 77 | # Search for image links, capturing the part after 'src="' 78 | pattern = r'src="(?P//images\.proteinatlas\.org/.*?_red_green_thumb\.jpg)"' 79 | image_links = re.findall(pattern, response.text) 80 | # replace the 'red_green' with 'blue_red_green_yellow' if 'blue' not in the link, otherwise replace 'blue_red_green' with 'blue_red_green_yellow' 81 | image_links = [link.replace('red_green', 'blue_red_green_yellow') if 'blue' not in link else link.replace('blue_red_green', 'blue_red_green_yellow') for link in image_links] 82 | # Remove '_thumb' from each link and print or process them 83 | final_image_links = [] 84 | for link in image_links: 85 | final_image_links.append(f"https:{link}") 86 | return final_image_links 87 | 88 | 89 | def get_extension(): 90 | hpa_client = HPAClient() 91 | search_tool = schema_tool(hpa_client.search_hpa) 92 | read_tool = schema_tool(hpa_client.read_protein_info) 93 | get_cell_image_tool = schema_tool(hpa_client.get_cell_image) 94 | 95 | return ChatbotExtension( 96 | id="hpa", 97 | name="Human Protein Atlas", 98 | description="Search the Human Protein Atlas to find human protein-related information, including gene expressions, functions, locations, disease associations, and cell images etc. When searching for cell images, always search for the gene name and Ensembl ID of the protein.", 99 | tools=dict( 100 | search=search_tool, 101 | read=read_tool, 102 | get_cell_image=get_cell_image_tool 103 | ) 104 | ) 105 | 106 | if __name__ == "__main__": 107 | import asyncio 108 | async def main(): 109 | extension = get_extension() 110 | query = "brain" 111 | limitSize = 2 112 | print(await extension.tools["search"](query=query, limitSize=limitSize)) 113 | # test only one image 114 | # print(await extension.tools["inspect"](images=[ImageInfo(url="https://bioimage.io/static/img/bioimage-io-icon.png", title="BioImage.io Icon")], query="What is this?", context_description="Inspect the BioImage.io icon.")) 115 | # Run the async function 116 | asyncio.run(main()) 117 | -------------------------------------------------------------------------------- /bioimageio_chatbot/chatbot_extensions/biii_extension.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import requests 3 | from bs4 import BeautifulSoup 4 | import pandas as pd 5 | from typing import List, Optional 6 | from pydantic import BaseModel, Field 7 | from bioimageio_chatbot.utils import ChatbotExtension 8 | from schema_agents import schema_tool 9 | 10 | class BiiiQuery(BaseModel): 11 | """Queries parameters for biii.eu search""" 12 | 13 | queries: List[str] = Field(description="A list of keywords to search for") 14 | 15 | 16 | class BiiiRow(BaseModel): 17 | """Search result row from biii.eu""" 18 | 19 | name: str = Field(description="Name") 20 | relevance: str = Field(description="Relevance score") 21 | image_dimension: Optional[str] = Field( 22 | None, description="Supported image dimension" 23 | ) 24 | requires: Optional[str] = Field(description="Dependent software") 25 | excerpt: str = Field(description="Description") 26 | 27 | 28 | def extract_table_with_links(table, base_url) -> pd.DataFrame: 29 | """ 30 | Extracts a table from HTML and includes hyperlinks in the cells if available. 31 | 32 | Args: 33 | table (bs4.element.Tag): A BeautifulSoup Tag object representing a table. 34 | 35 | Returns: 36 | pd.DataFrame: A DataFrame representation of the table with text and hyperlinks. 37 | """ 38 | rows = table.find_all("tr") 39 | data = [] 40 | 41 | for index, row in enumerate(rows): 42 | columns = row.find_all(["td", "th"]) 43 | row_data = [] 44 | 45 | for column in columns: 46 | cell_text = column.get_text(strip=True) 47 | 48 | # Check for a hyperlink in the cell 49 | link = column.find("a", href=True) 50 | if index != 0 and link and cell_text: 51 | cell_text += f"({link['href'] if link['href'].startswith('http') else base_url + link['href']})" 52 | 53 | row_data.append(cell_text) 54 | 55 | data.append(row_data) 56 | 57 | if data: 58 | columns = data[0] 59 | columns[0] = "Name" 60 | if columns[3] == "Supported Image Dimension": 61 | columns[2] = "Logo" 62 | df = pd.DataFrame(data[1:], columns=columns) if data and columns else pd.DataFrame() 63 | # remove column named "Content type" if exists 64 | if "Content type" in df.columns: 65 | df = df.drop(columns=["Content type"]) 66 | 67 | # convert to list of BiiiRow 68 | df = df.to_dict(orient="records") 69 | return [ 70 | BiiiRow( 71 | name=row["Name"], 72 | relevance=row["Relevance"], 73 | image_dimension=row.get("Supported Image Dimension"), 74 | requires=row.get("Requires"), 75 | excerpt=row["Excerpt"], 76 | ) 77 | for row in df 78 | ] 79 | 80 | 81 | def search_biii_with_links( 82 | queries: List[str], content_type="software", base_url="https://biii.eu" 83 | ) -> dict: 84 | """ 85 | Modified search function to include hyperlinks in the extracted tables. 86 | 87 | Args: 88 | queries (List[str]): A list of search queries. 89 | 90 | Returns: 91 | dict: A dictionary where each key is a "Content type" and value is a pandas dataframe of the table with links. 92 | """ 93 | search_base_url = "https://biii.eu/search?search_api_fulltext=" 94 | 95 | for query in queries: 96 | url = search_base_url + ",".join(query.split()) 97 | response = requests.get(url) 98 | soup = BeautifulSoup(response.text, "html.parser") 99 | tables = soup.find_all("table") 100 | 101 | for table in tables: 102 | caption = table.find("caption") 103 | if caption: 104 | caption = ( 105 | caption.get_text().strip().replace("Content type: ", "").lower() 106 | ) 107 | else: 108 | continue # Skip tables without a caption 109 | 110 | if caption != content_type: 111 | continue 112 | 113 | df = extract_table_with_links(table, base_url) 114 | return df 115 | 116 | 117 | class BiiiSearchResult(BaseModel): 118 | """Search results from biii.eu""" 119 | results: List[BiiiRow] = Field(description="Search results from biii.eu") 120 | base_url: str = Field( 121 | description="The based URL of the search results, e.g. ImageJ (/imagej) will become /imagej" 122 | ) 123 | 124 | 125 | class BiiiResponse(BaseModel): 126 | """Summarize the search results from biii.eu""" 127 | 128 | response: str = Field( 129 | description="The answer to the user's question based on the search results. Can be either a detailed response in markdown format if the search results are relevant to the user's question or 'I don't know'. It should resolve relative URLs in the search results using the base_url." 130 | ) 131 | 132 | @schema_tool 133 | async def search_biii( 134 | keywords: List[str] = Field( 135 | description="A list of search keywords, no space allowed in each keyword." 136 | ), 137 | top_k: int = Field( 138 | 10, 139 | description="The maximum number of search results to return. Should use a small number to avoid overwhelming the user.", 140 | )): 141 | """Search software tools on BioImage Informatics Index (biii.eu) is a platform for sharing bioimage analysis software and tools.""" 142 | # limit top_k from 1 to 15 143 | top_k = max(1, min(top_k, 15)) 144 | print(f"Searching biii.eu with keywords: {keywords}, top_k: {top_k}") 145 | loop = asyncio.get_running_loop() 146 | # steps.append(ResponseStep(name="Search on biii.eu", details=dict())) 147 | results = await loop.run_in_executor( 148 | None, search_biii_with_links, keywords, "software", "" 149 | ) 150 | if results: 151 | results = BiiiSearchResult( 152 | results=results[: top_k], 153 | base_url="https://biii.eu", 154 | ) 155 | return results 156 | else: 157 | return f"Sorry I didn't find relevant information in biii.eu about {keywords}" 158 | 159 | def get_extension(): 160 | return ChatbotExtension( 161 | id="biii", 162 | name="Search BioImage Informatics Index (biii.eu)", 163 | description="Search software tools on BioImage Informatics Index (biii.eu) is a platform for sharing bioimage analysis software and tools. Provide a list of keywords to search for software tools on biii.eu. Returns a list of relevant documents.", 164 | tools=dict(search=search_biii), 165 | ) 166 | 167 | 168 | if __name__ == "__main__": 169 | results = search_biii_with_links(["image segmentation"]) 170 | # Index(['Name', 'Relevance', 'Logo', 'Supported Image Dimension', 'requires', 171 | # 'Content type', 'Excerpt'], 172 | # dtype='object') 173 | print(results) 174 | -------------------------------------------------------------------------------- /bioimageio_chatbot/chatbot_extensions/image_sc_extension.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | import os 3 | import os 4 | import urllib.parse 5 | import asyncio 6 | import html2text 7 | import logging 8 | from pydantic import Field 9 | from bioimageio_chatbot.utils import ChatbotExtension 10 | from typing import List, Dict, Any, Optional 11 | from schema_agents import schema_tool 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | class DiscourseClient: 16 | def __init__(self, base_url: str, username: str, api_key: str): 17 | self._base_url = base_url 18 | self._username = username 19 | self._api_key = api_key 20 | 21 | def _build_query_string(self, query: str, order: str, status: str) -> str: 22 | # Construct the query string with the provided parameters. 23 | # Note: `urllib.parse.quote` is used to ensure the query is URL encoded. 24 | query_components = [ 25 | f"{query}", 26 | f"order:{order}", 27 | ] 28 | if status: 29 | query_components.append(f"status:{status}") 30 | return "q=" + urllib.parse.quote(" ".join(query_components)) 31 | 32 | def _get_headers(self) -> Dict[str, str]: 33 | return { 34 | "Content-Type": "application/json", 35 | "Api-Username": self._username, 36 | "Api-Key": self._api_key, 37 | } 38 | 39 | def _cleanup_search_results(self, results: Dict[str, Any], top_k: int=10) -> Dict[str, Any]: 40 | cleaned_results = { 41 | "posts": [ 42 | {"id": post["id"], "topic_id": post["topic_id"], "blurb": post["blurb"]} 43 | for post in results.get("posts", []) 44 | if "id" in post and "topic_id" in post and "blurb" in post 45 | ], 46 | "topics": [ 47 | {"title": topic["title"], "slug": topic["slug"]} 48 | for topic in results.get("topics", []) 49 | if "title" in topic and "slug" in topic 50 | ] 51 | } 52 | cleaned_results["posts"] = cleaned_results["posts"][:top_k] 53 | cleaned_results["topics"] = cleaned_results["topics"][:top_k] 54 | return cleaned_results 55 | 56 | async def search_image_sc(self, query: str = Field(..., description="The search query string."), 57 | top_k: int = Field(..., gt=0, description="Maximum number of search results to return."), 58 | order: Optional[str] = Field("latest", description="Order of the search results, options: latest, likes, views, latest_topic."), 59 | status: Optional[str] = Field(None, description="The status filter for the search results, options: solved, unsolved, open, closed."), 60 | ): 61 | """Search the Image.sc Forum(a forum for scientific image software) for posts and topics.""" 62 | # Prepare headers for authentication 63 | headers = self._get_headers() 64 | 65 | # Build the query string 66 | query_string = self._build_query_string(query, order, status) 67 | 68 | # Construct the full URL 69 | url = f"{self._base_url}/search.json?{query_string}" 70 | logger.info(f"Searching Image.sc Forum for: {query}") 71 | 72 | # Perform the asynchronous HTTP GET request 73 | async with httpx.AsyncClient() as client: 74 | response = await client.get(url, headers=headers) 75 | 76 | # Check if the request was successful 77 | if response.status_code == 200: 78 | return self._cleanup_search_results(response.json(), top_k) # Return the JSON response 79 | else: 80 | response.raise_for_status() # Raise an error for bad responses 81 | 82 | async def read_image_sc_posts(self, 83 | type: str = Field(..., description="type: `post` or `topic`"), 84 | id: int = Field(..., description="topic id") 85 | ): 86 | """Read a single or all the posts in a topic from the Image.sc Forum (a discussion forum for scientific image software).""" 87 | if type == "post": 88 | return await self.get_post_content(id) 89 | elif type == "topic": 90 | return await self.get_topic_content(id) 91 | 92 | async def get_topic_content(self, topic_id: int) -> Dict[str, Any]: 93 | url = f"{self._base_url}/t/{topic_id}.json" 94 | headers = self._get_headers() 95 | async with httpx.AsyncClient() as client: 96 | response = await client.get(url, headers=headers) 97 | response.raise_for_status() 98 | topic_data = response.json() 99 | 100 | post_ids = [post['id'] for post in topic_data['post_stream']['posts']] 101 | messages = await asyncio.gather(*[self.get_post_content(post_id) for post_id in post_ids]) 102 | posts = [] 103 | for msg in messages: 104 | posts.append(f"{msg['username']}: {html2text.html2text(msg['content'])}") 105 | return {"posts": posts, "url": f"{self._base_url}/t/{topic_data['slug']}"} 106 | 107 | async def get_post_content(self, post_id: int) -> str: 108 | url = f"{self._base_url}/posts/{post_id}.json" 109 | headers = self._get_headers() 110 | async with httpx.AsyncClient() as client: 111 | response = await client.get(url, headers=headers) 112 | response.raise_for_status() 113 | post_data = response.json() 114 | return {"username": post_data["username"], "content": post_data["cooked"], "url": f"{self._base_url}/t/{post_data['topic_slug']}"} 115 | 116 | def get_extension(): 117 | username = os.environ.get("DISCOURSE_USERNAME") 118 | api_key = os.environ.get("DISCOURSE_API_KEY") 119 | if not username or not api_key: 120 | print("WARNING: Image.sc Forum extensions require DISCOURSE_USERNAME and DISCOURSE_API_KEY environment variables to be set, disabling it for now.") 121 | return None 122 | 123 | discourse_client = DiscourseClient(base_url="https://forum.image.sc/", username=username, api_key=api_key) 124 | return ChatbotExtension( 125 | id="image_sc_forum", 126 | name="Search image.sc Forum", 127 | description="Search the Image.sc Forum for posts and topics. Provide a search query to search the Image.sc Forum for posts or post, and read a specific topic", 128 | tools=dict( 129 | search=schema_tool(discourse_client.search_image_sc), 130 | read=schema_tool(discourse_client.read_image_sc_posts) 131 | ) 132 | ) 133 | 134 | if __name__ == "__main__": 135 | import json 136 | async def main(): 137 | discourse_client = DiscourseClient(base_url="https://forum.image.sc", username="oeway", api_key="1b8819f9f95bc7f4eb51d3f9bac6d4dd0245569314a7801f670c1067d06c8268") 138 | results = await discourse_client.search_image_sc("python", 5, "latest") 139 | print(json.dumps(results)) 140 | results = await discourse_client.read_image_sc_posts('topic', 44826) 141 | print(results) 142 | 143 | # Run the async function 144 | asyncio.run(main()) 145 | -------------------------------------------------------------------------------- /bioimageio_chatbot/jsonschema_pydantic.py: -------------------------------------------------------------------------------- 1 | """Jsonschema to pydantic schema from https://github.com/c32168/dyntamic""" 2 | from typing import Annotated, Union, Any, Optional 3 | 4 | import typing 5 | from pydantic import create_model 6 | from pydantic.fields import Field, PydanticUndefined 7 | 8 | Model = typing.TypeVar('Model', bound='BaseModel') 9 | 10 | 11 | class DyntamicFactory: 12 | 13 | TYPES = { 14 | 'string': str, 15 | 'array': list, 16 | 'boolean': bool, 17 | 'integer': int, 18 | 'float': float, 19 | 'number': float, 20 | 'null': None, 21 | } 22 | 23 | def __init__(self, 24 | json_schema: dict, 25 | base_model: Union[type[Model], tuple[type[Model], ...], None] = None, 26 | ref_template: str = "definitions" 27 | ) -> None: 28 | """ 29 | Creates a dynamic pydantic model from a JSONSchema, dumped from and existing Pydantic model elsewhere. 30 | JSONSchema dump must be called with ref_template='{model}' like: 31 | 32 | SomeSampleModel.model_json_schema(ref_template='{model}') 33 | Use: 34 | >> _factory = DyntamicFactory(schema) 35 | >> _factory.make() 36 | >> _model = create_model(_factory.class_name, **_factory.model_fields) 37 | >> _instance = dynamic_model.model_validate(json_with_data) 38 | >> validated_data = model_instance.model_dump() 39 | """ 40 | self.class_name = json_schema.get('title') 41 | self.description = json_schema.get('description') 42 | self.class_type = json_schema.get('type') 43 | self.required = json_schema.get('required', []) 44 | self.default = json_schema.get('default') 45 | self.raw_fields = json_schema.get('properties') 46 | self.ref_template = ref_template 47 | self.definitions = json_schema.get(ref_template) 48 | self.fields = {} 49 | self.model_fields = {} 50 | self._base_model = base_model 51 | 52 | def get_factory(self, field_name, field) -> Any: 53 | """Get the factory for a given type""" 54 | f_type = field.get('type') 55 | if f_type is None and 'anyOf' in field: 56 | factory = tuple([self.get_factory(None, t) for t in field.get('anyOf')]) 57 | if None in factory and len(factory) == 2: 58 | if field_name and field_name not in self.required: 59 | factory = [f for f in factory if f is not None][0] 60 | else: 61 | factory = Optional[[f for f in factory if f is not None][0]] 62 | else: 63 | factory = Union[factory] 64 | else: 65 | factory = self.TYPES.get(f_type) 66 | return factory 67 | 68 | def make(self) -> Model: 69 | """Factory method, dynamically creates a pydantic model from JSON Schema""" 70 | for field in self.raw_fields: 71 | if field not in self.required: 72 | default = self.raw_fields[field].get('default') 73 | else: 74 | default = PydanticUndefined 75 | if '$ref' in self.raw_fields[field]: 76 | model_name = self.raw_fields[field].get('$ref') 77 | # resolve $ref 78 | # consider all the cases in standard json schema 79 | 80 | if model_name.startswith('#/'): 81 | model_name = model_name.replace('#/', '') 82 | elif model_name.startswith('#'): 83 | model_name = model_name.replace('#', '') 84 | 85 | if model_name.startswith(self.ref_template+"/"): 86 | model_name = model_name.replace(self.ref_template+"/", '') 87 | 88 | self._make_nested(model_name, field, default) 89 | else: 90 | factory = self.get_factory(field, self.raw_fields[field]) 91 | if factory is None: 92 | factory = Any 93 | if factory == list: 94 | items = self.raw_fields[field].get('items') 95 | if self.ref_template in items: 96 | self._make_nested(items.get(self.ref_template), field) 97 | 98 | self._make_field(factory, field, self.raw_fields.get('title'), self.raw_fields.get(field).get('description'), default=default) 99 | model = create_model(self.class_name, __base__=self._base_model, **self.model_fields) 100 | model.__doc__ = self.description 101 | return model 102 | 103 | def _make_nested(self, model_name: str, field, default) -> None: 104 | """Create a nested model""" 105 | level = DyntamicFactory({self.ref_template: self.definitions} | self.definitions.get(model_name), 106 | ref_template=self.ref_template) 107 | level.make() 108 | model = create_model(model_name, **level.model_fields) 109 | model.__doc__ = level.description 110 | self._make_field(model, field, field, level.description, default) 111 | 112 | def _make_field(self, factory, field, alias, description, default) -> None: 113 | """Create an annotated field""" 114 | # if field not in self.required: 115 | # factory_annotation = Annotated[Union[factory, None], factory] 116 | # else: 117 | factory_annotation = factory 118 | self.model_fields[field] = ( 119 | Annotated[factory_annotation, Field(default_factory=None, alias=alias, description=description)], default) 120 | 121 | def json_schema_to_pydantic_model(schema): 122 | f = DyntamicFactory(schema) 123 | return f.make() 124 | 125 | if __name__ == "__main__": 126 | input_schema = { 127 | "title": "RunMacro", 128 | "description": "Run a macro", 129 | "type": "object", 130 | "properties": { 131 | "macro": { 132 | "type": "string", 133 | "description": "The macro to run" 134 | }, 135 | "args": {"$ref": "#/definitions/Args"}, 136 | "query": {"description": "The search query string.", "title": "Query", "type": "string"}, 137 | "pageSize": {"default": 10, "description": "Number of search results per page.", "exclusiveMinimum": 0, "title": "Pagesize", "type": "integer"}, 138 | "page": {"default": 1, "description": "Page number of the search results.", "title": "Page", "type": "integer"}, 139 | "sortOrder": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": "descending", "description": "Sort order: ascending or descending.", "title": "Sortorder"} 140 | }, 141 | "required": ["macro", "query"], 142 | "definitions": { 143 | "Args": { 144 | "title": "Args", 145 | "type": "object", 146 | "description": "Arguments for the macro", 147 | "properties": { 148 | "arg1": { 149 | "type": "string", 150 | "description": "arg1" 151 | } 152 | } 153 | } 154 | } 155 | } 156 | RunMacroClass = json_schema_to_pydantic_model(input_schema) 157 | assert RunMacroClass.__name__ == input_schema["title"] 158 | # assert RunMacroClass.__doc__ == input_schema["description"] 159 | m = RunMacroClass(macro="test", args={"test": "test"}, query="test") 160 | schema = RunMacroClass.model_json_schema() 161 | print(schema) 162 | assert schema['title'] == input_schema['title'] 163 | assert schema['description'] == input_schema['description'] 164 | assert schema['properties']['macro']["description"] == input_schema['properties']['macro']["description"] 165 | assert schema['properties']['args']['allOf'][0]['$ref'] == "#/$defs/Args" 166 | assert m.macro == "test" 167 | -------------------------------------------------------------------------------- /bioimageio_chatbot/chatbot_extensions/docs_extension.py: -------------------------------------------------------------------------------- 1 | import os 2 | import asyncio 3 | from functools import partial 4 | from pydantic import BaseModel, Field 5 | from typing import Any, Dict, Optional 6 | from bioimageio_chatbot.knowledge_base import load_knowledge_base 7 | from bioimageio_chatbot.utils import get_manifest 8 | from bioimageio_chatbot.utils import ChatbotExtension 9 | from schema_agents import schema_tool 10 | 11 | class DocWithScore(BaseModel): 12 | """A document with an associated relevance score.""" 13 | 14 | doc: str = Field(description="The document retrieved.") 15 | score: float = Field(description="The relevance score of the retrieved document.") 16 | 17 | 18 | async def run_extension( 19 | docs_store_dict, 20 | channel_id, 21 | query: str = Field( 22 | description="The query used to retrieve documents related to the user's request. It should be a sentence which will be used to match descriptions using the OpenAI text embedding to match document chunks in a vector database." 23 | ), 24 | top_k: int = Field( 25 | 3, 26 | description="The maximum number of search results to return. Should use a small number to avoid overwhelming the user.", 27 | ), 28 | ): 29 | channel_results = [] 30 | # channel_urls = [] 31 | # limit top_k from 1 to 15 32 | top_k = max(1, min(top_k, 15)) 33 | docs_store = docs_store_dict[channel_id] 34 | 35 | print(f"Retrieving documents from database {channel_id} with query: {query}") 36 | channel_results.append( 37 | await docs_store.asimilarity_search_with_relevance_scores( 38 | query, k=top_k 39 | ) 40 | ) 41 | 42 | docs_with_score = [ 43 | DocWithScore( 44 | doc=doc.page_content, 45 | score=round(score, 2), 46 | metadata=doc.metadata, # , base_url=base_url 47 | ) 48 | for results_with_scores in channel_results 49 | for doc, score in results_with_scores 50 | ] 51 | # sort by relevance score 52 | docs_with_score = sorted(docs_with_score, key=lambda x: x.score, reverse=True)[ 53 | : top_k 54 | ] 55 | 56 | if len(docs_with_score) > 2: 57 | print( 58 | f"Retrieved documents:\n{docs_with_score[0].doc[:20] + '...'} (score: {docs_with_score[0].score})\n{docs_with_score[1].doc[:20] + '...'} (score: {docs_with_score[1].score})\n{docs_with_score[2].doc[:20] + '...'} (score: {docs_with_score[2].score})" 59 | ) 60 | else: 61 | print(f"Retrieved documents:\n{docs_with_score}") 62 | return docs_with_score 63 | 64 | 65 | def title_case(s): 66 | return s.replace(".", " ").replace("-", " ").title().replace(" ", "") 67 | 68 | def create_tool(docs_store_dict, collection): 69 | async def run_extension( 70 | query: str = Field( 71 | description="The query used to retrieve documents related to the user's request. It should be a sentence which will be used to match descriptions using the OpenAI text embedding to match document chunks in a vector database." 72 | ), 73 | top_k: int = Field( 74 | 3, 75 | description="The maximum number of search results to return. Should use a small number to avoid overwhelming the user.", 76 | ), 77 | ): 78 | channel_results = [] 79 | # channel_urls = [] 80 | # limit top_k from 1 to 15 81 | top_k = max(1, min(top_k, 15)) 82 | docs_store = docs_store_dict[collection["id"]] 83 | 84 | print(f"Retrieving documents from database {collection['id']} with query: {query}") 85 | channel_results.append( 86 | await docs_store.asimilarity_search_with_relevance_scores( 87 | query, k=top_k 88 | ) 89 | ) 90 | 91 | docs_with_score = [ 92 | DocWithScore( 93 | doc=doc.page_content, 94 | score=round(score, 2), 95 | metadata=doc.metadata, # , base_url=base_url 96 | ) 97 | for results_with_scores in channel_results 98 | for doc, score in results_with_scores 99 | ] 100 | # sort by relevance score 101 | docs_with_score = sorted(docs_with_score, key=lambda x: x.score, reverse=True)[ 102 | : top_k 103 | ] 104 | 105 | if len(docs_with_score) > 2: 106 | print( 107 | f"Retrieved documents:\n{docs_with_score[0].doc[:20] + '...'} (score: {docs_with_score[0].score})\n{docs_with_score[1].doc[:20] + '...'} (score: {docs_with_score[1].score})\n{docs_with_score[2].doc[:20] + '...'} (score: {docs_with_score[2].score})" 108 | ) 109 | else: 110 | print(f"Retrieved documents:\n{docs_with_score}") 111 | return docs_with_score 112 | 113 | channel_id = collection["id"] 114 | base_url = collection.get("base_url") 115 | reference = collection.get("reference") 116 | if base_url: 117 | base_url_prompt = f" The documentation is available at {base_url}." 118 | else: 119 | base_url_prompt = "" 120 | 121 | if reference: 122 | reference_prompt = f" The reference is available at {reference}." 123 | else: 124 | reference_prompt = "" 125 | run_extension.__name__ = "Search" + title_case(channel_id) 126 | run_extension.__doc__ = f"""Searching documentation for {channel_id}: {collection['description']}.{base_url_prompt}. {reference_prompt}""" 127 | return schema_tool(run_extension) 128 | 129 | INFO_KEYS = ["name","description", "authors", "license", "reference"] 130 | 131 | def get_extension(): 132 | collections = get_manifest()["collections"] 133 | knowledge_base_path = os.environ.get( 134 | "BIOIMAGEIO_KNOWLEDGE_BASE_PATH", "./bioimageio-knowledge-base" 135 | ) 136 | assert ( 137 | knowledge_base_path is not None 138 | ), "Please set the BIOIMAGEIO_KNOWLEDGE_BASE_PATH environment variable to the path of the knowledge base." 139 | if not os.path.exists(knowledge_base_path): 140 | print( 141 | f"The knowledge base is not found at {knowledge_base_path}, will download it automatically." 142 | ) 143 | os.makedirs(knowledge_base_path, exist_ok=True) 144 | 145 | knowledge_base_path = os.environ.get( 146 | "BIOIMAGEIO_KNOWLEDGE_BASE_PATH", "./bioimageio-knowledge-base" 147 | ) 148 | docs_store_dict = load_knowledge_base(knowledge_base_path) 149 | 150 | docs_tools = {} 151 | docs_info = {} 152 | books_tools = {} 153 | books_info = {} 154 | for col in collections: 155 | info = {k: col[k] for k in INFO_KEYS if k in col} 156 | if "book" in col["id"]: 157 | books_tools["search_" + col["id"]] = create_tool(docs_store_dict, col) 158 | if info: 159 | books_info["search_" + col["id"]] = info 160 | else: 161 | docs_tools["search_" + col["id"]] = create_tool(docs_store_dict, col) 162 | if info: 163 | docs_info["search_" + col["id"]] = info 164 | 165 | 166 | if docs_tools: 167 | sinfo1 = ChatbotExtension( 168 | id="docs", 169 | name="Search BioImage Docs", 170 | description="Search information in the documents of the bioimage.io knowledge base. Provide a list of keywords to search information in the documents. Returns a list of relevant documents. Ensure that the reference to the document is ALWAYS included!", 171 | tools=docs_tools, 172 | info=docs_info 173 | ) 174 | if books_tools: 175 | sinfo2 = ChatbotExtension( 176 | id="books", 177 | name="Search BioImage Books", 178 | description="Search information in BioImage books. Provide a list of keywords to search information in the books. Returns a list of relevant documents. Ensure that the reference to the book is ALWAYS included!", 179 | tools=books_tools, 180 | info=books_info 181 | ) 182 | 183 | return sinfo1, sinfo2 184 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation Guide 2 | 3 | ## Setup the Chatbot locally 4 | 5 | If you want to run the chatbot server locally, you need to have an OpenAI API key. You can get one by signing up at [OpenAI](https://beta.openai.com/). Once you have your API key, you can install the chatbot package via pip and set the environment variables: 6 | 7 | ```bash 8 | pip install bioimageio-chatbot 9 | ``` 10 | 11 | ```bash 12 | export OPENAI_API_KEY=sk-xxxxxxxx # Required 13 | export BIOIMAGEIO_KNOWLEDGE_BASE_PATH=/path/to/bioimageio-knowledge-base # Optional, default to ./bioimageio-knowledge-base 14 | export BIOIMAGEIO_CHAT_LOGS_PATH=./chat-logs # Optional, default to ./chat-logs 15 | ``` 16 | 17 | The chatbot server backend has been tested on Ubuntu and MacOS, it should work on Windows as well. 18 | 19 | ## Command-line Interface 20 | 21 | BioImage.IO Chatbot comes with a command-line interface to facilitate server management, connection to external servers, and knowledge base creation. 22 | 23 | You can access the command-line interface by running `python -m bioimageio_chatbot` or the `bioimageio-chatbot` command. 24 | 25 | Below are the available commands and options: 26 | 27 | ### Initialize Knowledge Base 28 | 29 | To initialize the knowledge base, use the `init` command: 30 | 31 | ```bash 32 | python -m bioimageio_chatbot init 33 | ``` 34 | 35 | This will load the knowledge base from the location specified by the `BIOIMAGEIO_KNOWLEDGE_BASE_PATH` environment variable, or use the default path `./bioimageio-knowledge-base`. If the knowledge base is not found, it will be downloaded from the predefined URL (by default, it uses https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimageio-knowledge-base. It can be overridden with `BIOIMAGEIO_KNOWLEDGE_BASE_URL`). 36 | 37 | NOTE: It may take some time to download the knowledge base depending on your internet connection. 38 | **Example:** 39 | 40 | ```bash 41 | export BIOIMAGEIO_KNOWLEDGE_BASE_PATH="./my_knowledge_base" 42 | python -m bioimageio_chatbot init 43 | ``` 44 | 45 | After running the `init` command, it will list the databases loaded into the knowledge base. 46 | 47 | #### Start Server 48 | 49 | To start your own server entirely, use the `start-server` command: 50 | 51 | ```bash 52 | python -m bioimageio_chatbot start-server [--host HOST] [--port PORT] [--public-base-url PUBLIC_BASE_URL] 53 | ``` 54 | 55 | **Options:** 56 | 57 | - `--host`: The host address to run the server on (default: `0.0.0.0`) 58 | - `--port`: The port number to run the server on (default: `9000`) 59 | - `--public-base-url`: The public base URL of the server (default: `http://127.0.0.1:9000`) 60 | - `--login-required`: Whether to require users to log in before accessing the chatbot (default to not require login) 61 | 62 | **Example:** 63 | 64 | ```bash 65 | export OPENAI_API_KEY=sk-xxxxxxxx 66 | export BIOIMAGEIO_KNOWLEDGE_BASE_PATH=./bioimageio-knowledge-base 67 | export BIOIMAGEIO_CHAT_LOGS_PATH=./chat-logs 68 | python -m bioimageio_chatbot start-server --host=0.0.0.0 --port=9000 69 | ``` 70 | This will create a local server, and the BioImage.IO Chatbot is available at: https://bioimage.io/chat?server=http://127.0.0.1:9000 71 | 72 | Open the link in a browser, and you will see the chat interface. 73 | 74 | Please note that the chatbot server may not be accessible to users outside your local network. 75 | 76 | A user guide and technical overview can be found [here](./technical-overview.md). 77 | 78 | To be able to share your chatbot service over the internet (especially for users outside your local network), you will need to expose your server publicly. Please, see [Connect to Server](#connect-to-server) 79 | 80 | 81 | #### Connect to Server 82 | 83 | To help you share your chatbot with users external to your local network, you can use our public [BioEngine](https://aicell.io/project/bioengine/) server as a proxy. 84 | 85 | To connect to an external BioEngine server, use the `connect-server` command: 86 | 87 | ```bash 88 | python -m bioimageio_chatbot connect-server [--server-url SERVER_URL] 89 | ``` 90 | 91 | **Options:** 92 | 93 | - `--server-url`: The URL of the external BioEngine server to connect to (default: `https://ai.imjoy.io`) 94 | - `--login-required`: Whether to require users to log in before accessing the chatbot (default to not require login) 95 | 96 | **Example:** 97 | 98 | ```bash 99 | export OPENAI_API_KEY=sk-xxxxxxxx 100 | export BIOIMAGEIO_KNOWLEDGE_BASE_PATH=./bioimageio-knowledge-base 101 | export BIOIMAGEIO_CHAT_LOGS_PATH=./chat-logs 102 | python -m bioimageio_chatbot connect-server --server-url=https://ai.imjoy.io 103 | ``` 104 | 105 | First, you will be asked to log in with a hypha account. Either your GitHub or Google account can be reused. Then, the following message containing a link to the chatbot will be displayed: 'The BioImage.IO Chatbot is available at: https://bioimage.io/chat?server=https://ai.imjoy.io' 106 | 107 | Leave your chatbot running to enable users inside or outside your network to access it from this URL. 108 | 109 | #### User Management 110 | 111 | If you set `--login-required` when running `start-server` or `connect-server`, users will be required to log in before accessing the chatbot. The chatbot will then collect the user's GitHub or Google account information and store it its logs for future analysis. 112 | 113 | You can also provide an optional environment variable `BIOIMAGEIO_AUTHORIZED_USERS_PATH` for the chatbot to load a list of authorized users. The file should be a JSON file containing a list of GitHub or Google account names. For example: 114 | 115 | ```json 116 | { 117 | "users": [ 118 | {"email": "user1@email.org"} 119 | ] 120 | } 121 | ``` 122 | 123 | #### Create Knowledge Base 124 | 125 | To create a new knowledge base, use the `create-knowledge-base` command: 126 | 127 | ```bash 128 | python -m bioimageio_chatbot create-knowledge-base [--output-dir OUTPUT_DIR] 129 | ``` 130 | 131 | **Options:** 132 | 133 | - `--output-dir`: The directory where the knowledge base will be created (default: `./bioimageio-knowledge-base`) 134 | 135 | **Example:** 136 | 137 | ```bash 138 | export OPENAI_API_KEY=sk-xxxxxxxx 139 | export BIOIMAGEIO_KNOWLEDGE_BASE_PATH=./bioimageio-knowledge-base 140 | python -m bioimageio_chatbot create-knowledge-base --output-dir=./bioimageio-knowledge-base 141 | ``` 142 | 143 | 144 | ### Running the BioImage.IO Chatbot in a Docker Container 145 | 146 | #### Step 1: Build the Docker Image 147 | 148 | To run the BioImage.IO Chatbot using a Docker container, follow these steps. First, build the Docker image by running the following command in your terminal: 149 | 150 | ```bash 151 | docker build -t bioimageio-chatbot:latest . 152 | ``` 153 | 154 | If you prefer to use a pre-built Docker image from Docker Hub, you can pull the image using the following command: 155 | 156 | ```bash 157 | docker pull alalulu/bioimageio-chatbot:latest 158 | ``` 159 | 160 | 161 | #### Step 2: Start the Chatbot Server 162 | 163 | After building the Docker image, you can start the chatbot server with the following command: 164 | 165 | ```bash 166 | docker run -e OPENAI_API_KEY=sk-xxxxxxxxxxxxx -e BIOIMAGEIO_KNOWLEDGE_BASE_PATH=/knowledge-base -p 3000:9000 -v /path/to/local/knowledge-base:/knowledge-base bioimageio-chatbot:latest python -m bioimageio_chatbot start-server --host=0.0.0.0 --port=9000 --public-base-url=http://localhost:3000 167 | ``` 168 | 169 | Replace the placeholders in the command with the following values: 170 | 171 | - `sk-xxxxxxxxxxxxx`: Your OpenAI API key. 172 | - `/path/to/local/knowledge-base`: The local path to your knowledge base folder. 173 | 174 | Optionally, for improved reproducibility, you can change `latest` to a version tag such as `v0.1.18`. 175 | 176 | #### Step 3: Access the Chatbot 177 | 178 | The BioImage.IO Chatbot is now running in the Docker container. You can access it locally in your web browser by visiting: 179 | 180 | ``` 181 | https://bioimage.io/chat?server=http://localhost:3000 182 | ``` 183 | 184 | Make sure to replace `3000` with the host port you specified in the `docker run` command. 185 | 186 | 187 | Enjoy using the BioImage.IO Chatbot! 188 | 189 | -------------------------------------------------------------------------------- /bioimageio_chatbot/static/bioimage-model-zoo-extension.imjoy.html: -------------------------------------------------------------------------------- 1 | 2 | [TODO: write documentation for this plugin.] 3 | 4 | 5 | 6 | { 7 | "name": "SearchBioImageModelZoo", 8 | "type": "web-python", 9 | "version": "0.1.0", 10 | "description": "BioImage.IO Chatbot Extension for getting information about models, applications, datasets, etc. in the BioImage Model Zoo.", 11 | "tags": [], 12 | "ui": "", 13 | "cover": "", 14 | "inputs": null, 15 | "outputs": null, 16 | "flags": [], 17 | "icon": "extension", 18 | "api_version": "0.1.8", 19 | "env": "", 20 | "permissions": [], 21 | "requirements": ["pydantic"], 22 | "dependencies": [] 23 | } 24 | 25 | 26 | 188 | -------------------------------------------------------------------------------- /docs/usage-example.md: -------------------------------------------------------------------------------- 1 | # BioImage.IO Chatbot Usage Example Guide 2 | 3 | ## Introduction 4 | Welcome to the world of BioImage.IO Chatbot, a revolutionary conversational assistant designed exclusively for the bioimage community. Our chatbot is more than just a virtual conversationalist; it's a sophisticated AI-powered tool that offers personalized, context-aware responses by drawing from a diverse array of databases, tool-specific documentation, and structured data sources. 5 | 6 | The BioImage.IO Chatbot is all about efficiency, knowledge enrichment, and accessibility. In this guide, we will walk you through various aspects of its functionality, so you can harness its capabilities to enhance your computational bioimaging experience. Whether you're a biologist, bioimage analyst, or developer, this guide is your key to unlocking the full potential of the BioImage.IO Chatbot. 7 | 8 | 9 | ## Basic Interaction 10 | ### How to Start a Conversation 11 | Firstly, to make the most of your chatbot experience, consider setting your profile information. This helps the chatbot understand your background and tailor responses accordingly. 12 | 13 | To start a conversation, simply type `Hi` or `Hello`, and the chatbot will warmly greet you in return. 14 | 15 | ![Basic Interaction](./screenshots/chatbot-hi.png) 16 | 17 | 18 | ## Asking Questions 19 | ### Simple Questions 20 | You can ask the BioImage.IO Chatbot a wide range of general and straightforward questions. The chatbot will promptly retrieve and provide you with the relevant information. For example, you can inquire about a "Community Partner" in the BioImage Model Zoo. 21 | 22 | ![What is a Community Partner in the BioImage Model Zoo](./screenshots/chatbot-community-partner.png) 23 | 24 | ### Technical Questions 25 | If you have more technical questions, the chatbot is equipped to assist you effectively. It retrieves the necessary information, summarizes it, and presents it in a simple and understandable manner. For instance, you can ask the chatbot about procedures like "How to segment an image using Fiji" or "How to upload a model to the bioimage.io repository." 26 | 27 | ![Segment an Image using Fiji](./screenshots/search-bioimage-docs.png) 28 | 29 | 30 | ### Personalization: tailored Responses 31 | The BioImage.IO Chatbot is designed to understand and cater to the unique backgrounds and needs of its users. Whether you're a biologist, bioimage analyst, or developer, the chatbot tailors its responses to provide you with the most relevant and context-aware information. 32 | 33 | To illustrate this personalized approach, we have two screenshots of the chatbot answering the same question: "How can I analyze biological images?" In the first screenshot, we have Abby, a biologist, posing this query. In the second screenshot, it's Abby again, but this time, she identifies as a developer. As expected, the chatbot is able to provide different feedback to Abby based on her background and needs. 34 | 35 | ![Abby, a biologist, willing to analyze biological images.](./screenshots/chatbot-biologist.png) 36 | ![Abby, a developer, willing to analyze biological images.](./screenshots/chatbot-developer.png) 37 | 38 | ### How to switch chatbot's assistants 39 | We have three assistants built in the BioImage.IO Chatbot to cater to the different purpose and needs of the users. BioImage Seeker is designated for users that seek for information about bioimaging, it is equiped with 6+ extensions including community-driven knowledge base, as well as online source like image.sc forum, web search. BioImage Executor is desginated for users that want to execute model using BioEngine such as CellPose. BioImage Tutor is designated for users that seek for tutoring in bioimaging, it is equiped with Bioimage Books extension combined with web search and Bioimage Model Zoo search. 40 | ![BioImage Tutor](./screenshots/bia-tutor.png) 41 | ![BioImage Executor](./screenshots/bia-executor.png) 42 | 43 | ## Database Integration 44 | Our system harnesses knowledge from a variety of valuable sources, including established databases such as [ELIXIR bio.tools](https://bio.tools) and the [ImageJ Wiki](https://imagej.net/software/imagej2/), tool-specific documentation like [deepImageJ](https://deepimagej.github.io), [ilastik](https://www.ilastik.org), and [ImJoy](https://imjoy.io/#/), and structured databases such as the [BioImage Model Zoo repository](https://bioimage.io). 45 | In the chatbot's user interface, you'll find a list of the available knowledge base channels that the chatbot can access. s. You can choose to select a specific knowledge base channel or opt for the `auto` mode to query information from all integrated databases. 46 | 47 | ## Extension Selection 48 | 49 | If you are interested in using specific extensions in the chatbot, you can click 'More Options' and select one or several extensions from the extension list. This way, the information provided will be exclusively by using the specified extensions you have selected. 50 | 51 | For this example, you can select the `Search Image.cs Forum` extension and ask the chatbot for software issues. 52 | 53 | ![Select extension](./screenshots/select-extension.png) 54 | ![Image.sc Forum](./screenshots/search-image-forum.png) 55 | 56 | ### Steps details of Chatbot's working trajectory 57 | In order to check the chatbot working trajectories, with every query, all steps of chatbot's working trajectory are integrated in the response. Click 'More Details' on the bottom of the response, users can see the detail of each step. 58 | 59 | ![More details](./screenshots/more-details.png) 60 | 61 | ### Search in Knowledge Base 62 | Our system harnesses knowledge from a variety of valuable sources, including established databases such as [ELIXIR bio.tools](https://bio.tools) and the [ImageJ Wiki](https://imagej.net/software/imagej2/), tool-specific documentation like [deepImageJ](https://deepimagej.github.io), [ilastik](https://www.ilastik.org), and [ImJoy](https://imjoy.io/#/), and structured databases such as the [BioImage Model Zoo repository](https://bioimage.io). This comprehensive integration enables our chatbot to not only respond to questions related to each of these individual tools but also tackle complex inquiries that demand information from multiple databases. 63 | 64 | To illustrate this capability, consider the following scenario: You have a Whole Slide Image (WSI) that you'd like to open with [QuPath](https://qupath.github.io), apply the Chatty-Frog model ([StarDist H&E Nuclei Segmentation](https://bioimage.io/#/?tags=chatty-frog&id=10.5281%2Fzenodo.6338614)) from BioImage.IO using deepImageJ within Fiji. How can this be accomplished? You can present this intricate question to the BioImage.IO Chatbot. 65 | 66 | The chatbot provides a detailed breakdown of the steps required to complete the requested pipeline. This example demonstrates how our chatbot excels at efficiently retrieving and summarizing information from various sources, offering a valuable solution for your bioimaging needs. 67 | 68 | ![Cross-Database Query](./screenshots/search-bioimage-docs.png) 69 | 70 | ### Retrieving Models 71 | The BioImage.IO Chatbot is a versatile tool that can generate and execute code in response to user queries. This means that when a user asks about specific models available in bioimage.io, the chatbot can fetch this information by generating a custom script. 72 | 73 | For instance, if a user inquires about the total number of models in the BioImage Model Zoo, the chatbot can effortlessly retrieve this information using a straightforward script. However, the questions can become more intricate. Users might want to know which models in bioimage.io are suitable for a particular tag or task (*i.e., segmentation*)or similar criteria. The chatbot is adept at handling these more complex queries, too. 74 | 75 | ![Model Queries](./screenshots/search-model-zoo.png) 76 | 77 | 78 | ### Model execution 79 | TODO: Update the description for model execution and screenshot for cellpose example. 80 | 81 | The BioImage.IO Chatbot can also execute functions in external APIs, analysis pipelines, plugins, and user code. Cellpose image segmentation has been added as a Chatbot function calling extension and can be used as a template for additional APIs. Users may currently use the Cellpose API function calling by uploading a file and requesting the Chatbot to segment it either from the `auto` or `cellpose` channels. 82 | 83 | ## Conclusion 84 | The BioImage.IO Chatbot is a powerful tool designed to provide you with accurate and personalized information from a wide range of databases. Whether you're a biologist or a developer, our chatbot is here to assist you. Feel free to explore its capabilities, ask questions, and customize your experience. 85 | 86 | Explore more, learn more, and enjoy the benefits of BioImage.IO Chatbot! 87 | 88 | ## Additional Resources 89 | - For documentation, visit our [GitHub repository](https://github.com/bioimage-io/bioimageio-chatbot). 90 | - Do you have questions or need assistance? Contact us through [GitHub Issues]((https://github.com/bioimage-io/bioimageio-chatbot/issues)). 91 | -------------------------------------------------------------------------------- /bioimageio_chatbot/static/worker-manager.js: -------------------------------------------------------------------------------- 1 | class PyodideWorkerManager { 2 | hyphaServices = {} 3 | workers = {} 4 | workerApps = {} 5 | subscribers = [] 6 | workerRecords = {} 7 | // native file system handle 8 | constructor(dirHandle, mountPoint) { 9 | this.workers = {} 10 | this.workerRecords = {} 11 | this.dirHandle = dirHandle 12 | this.mountPoint = mountPoint || "/mnt" 13 | } 14 | 15 | getDirHandle() { 16 | return this.dirHandle 17 | } 18 | 19 | // Subscribe method 20 | subscribe(callback) { 21 | this.subscribers.push(callback) 22 | 23 | // Return an unsubscribe function 24 | return () => { 25 | this.subscribers = this.subscribers.filter(sub => sub !== callback) 26 | } 27 | } 28 | 29 | // Call this method whenever the workers list changes 30 | notify() { 31 | this.subscribers.forEach(callback => callback()) 32 | } 33 | 34 | getWorkerApps() { 35 | // return appInfo 36 | return Object.values(this.workerApps) 37 | } 38 | 39 | async createWorker(info) { 40 | const id = Math.random().toString(36).substring(7) 41 | console.log("Creating worker:", id) 42 | const worker = new Worker("/chat/pyodide-worker.js") 43 | await new Promise(resolve => (worker.onmessage = () => resolve())) 44 | this.workers[id] = worker 45 | worker.kill = () => { 46 | worker.terminate() 47 | worker.terminated = true; 48 | } 49 | this.workerRecords[id] = [] 50 | this.hyphaServices[id] = [] 51 | const self = this 52 | const appService = { 53 | id, 54 | appInfo: info, 55 | worker, 56 | async runScript(script, ioContext) { 57 | return await self.runScript(id, script, ioContext) 58 | }, 59 | async run_script(script, io_context) { 60 | return await self.runScript(id, script, io_context) 61 | }, 62 | async mount(mountPoint, dirHandle) { 63 | return await self.mountNativeFs(id, mountPoint, dirHandle) 64 | }, 65 | async render(container) { 66 | self.render(id, container) 67 | }, 68 | async renderSummary(container) { 69 | return self.renderSummary(id, container) 70 | }, 71 | async close() { 72 | await self.closeWorker(id) 73 | }, 74 | getLogs() { 75 | return self.workerRecords[id] 76 | }, 77 | get_logs() { 78 | return self.workerRecords[id] 79 | }, 80 | async listHyphaServices() { 81 | return self.hyphaServices[id] 82 | }, 83 | async list_hypha_services() { 84 | return self.hyphaServices[id] 85 | } 86 | } 87 | this.workerApps[id] = appService 88 | if (this.dirHandle) { 89 | await this.mountNativeFs(id) 90 | } 91 | this.notify() 92 | return appService 93 | } 94 | 95 | async closeWorker(id) { 96 | if (this.workers[id]) { 97 | this.workers[id].kill(); 98 | delete this.workers[id] 99 | delete this.workerRecords[id] 100 | delete this.workerApps[id] 101 | this.notify() 102 | } 103 | } 104 | 105 | async getWorker(id) { 106 | if (id && this.workers[id]) { 107 | return this.workers[id] 108 | } else { 109 | throw new Error("No worker found with ID: " + id) 110 | } 111 | } 112 | 113 | async mountNativeFs(workerId, mountPoint, dirHandle) { 114 | if (!workerId) { 115 | throw new Error("No worker ID provided and no current worker available.") 116 | } 117 | const worker = await this.getWorker(workerId) 118 | return new Promise((resolve, reject) => { 119 | const handler = e => { 120 | if (e.data.mounted) { 121 | worker.removeEventListener("message", handler) 122 | resolve(true) 123 | } else if (e.data.mountError) { 124 | worker.removeEventListener("message", handler) 125 | reject(new Error(e.data.mountError)) 126 | } 127 | } 128 | worker.addEventListener("message", handler) 129 | worker.postMessage({ 130 | mount: { 131 | mountPoint: mountPoint || this.mountPoint, 132 | dirHandle: dirHandle || this.dirHandle 133 | } 134 | }) 135 | }) 136 | } 137 | 138 | addToRecord(workerId, record) { 139 | if (!this.workerRecords[workerId]) { 140 | this.workerRecords[workerId] = [] 141 | } 142 | this.workerRecords[workerId].push(record) 143 | } 144 | 145 | renderOutputSummary(container, record) { 146 | // return a string preview of the output 147 | if (record.type === "store") { 148 | return `Store: ${record.key}` 149 | } 150 | else if (record.type === "script") { 151 | return `Script>>>:\n\`\`\`python\n${record.content}\n\`\`\`\n` 152 | } else if (record.type === "stdout") { 153 | if(record.content.trim() === "\n") { 154 | return "\n" 155 | } 156 | return `${record.content}\n` 157 | } else if (record.type === "stderr") { 158 | if(record.content.trim() === "\n") { 159 | return "\n" 160 | } 161 | return `${record.content}\n` 162 | } else if (record.type === "service") { 163 | return `Service: ${record.content}` 164 | } else if (record.type === "audio" || record.type === "img") { 165 | return `Image: ` 166 | } 167 | } 168 | 169 | renderOutput(container, record) { 170 | if (record.type === "stdout" || record.type === "stderr") { 171 | if(record.content.trim() !== "\n" && record.content.trim() !== ""){ 172 | const outputEl = document.createElement("pre") 173 | if (record.type === "stderr") { 174 | outputEl.style.color = "red" 175 | } 176 | outputEl.textContent = record.content 177 | container.appendChild(outputEl) 178 | } 179 | } 180 | else if (record.type === "store") { 181 | const storeEl = document.createElement("pre") 182 | storeEl.textContent = `Store: ${record.key}` 183 | container.appendChild(storeEl) 184 | } 185 | else if (record.type === "script") { 186 | const scriptEl = document.createElement("pre") 187 | scriptEl.textContent = `Script: ${record.content}` 188 | container.appendChild(scriptEl) 189 | } else if (record.type === "service") { 190 | // display service info 191 | const serviceEl = document.createElement("div") 192 | serviceEl.textContent = `Service: ${record.content}` 193 | container.appendChild(serviceEl) 194 | } else if (record.type === "audio" || record.type === "img") { 195 | const el = document.createElement(record.type) 196 | el.src = record.content 197 | if (record.attrs) { 198 | record.attrs.forEach(([attr, value]) => { 199 | el.setAttribute(attr, value) 200 | }) 201 | } 202 | if (record.type === "audio") { 203 | el.controls = true 204 | } 205 | container.appendChild(el) 206 | } 207 | } 208 | 209 | async readStoreItem(workerId, key) { 210 | const records = this.workerRecords[workerId] 211 | return records.filter(record => record.type === "store" && (!key || record.key === key))[0] 212 | } 213 | 214 | async runScript(workerId, script, ioContext) { 215 | const outputContainer = ioContext && ioContext.output_container 216 | if(outputContainer) { 217 | delete ioContext.output_container 218 | } 219 | const worker = await this.getWorker(workerId) 220 | if(worker.terminated){ 221 | throw new Error("Worker already terminated") 222 | } 223 | return new Promise((resolve, reject) => { 224 | worker.onerror = e => console.error(e) 225 | worker.kill = () => { 226 | worker.terminate() 227 | worker.terminated = true; 228 | reject("Python runtime was killed") 229 | } 230 | const outputs = [] 231 | const handler = e => { 232 | if (e.data.type !== undefined) { 233 | if(!ioContext || !ioContext.skip_record) 234 | this.addToRecord(workerId, e.data) 235 | outputs.push(e.data) 236 | if (outputContainer) { 237 | this.renderOutput(outputContainer, e.data) 238 | } 239 | if (e.data.type === "service") { 240 | this.hyphaServices[workerId].push(e.data.attrs) 241 | } 242 | } else if (e.data.executionDone) { 243 | worker.removeEventListener("message", handler) 244 | resolve(outputs) 245 | } else if (e.data.executionError) { 246 | console.error("Execution Error", e.data.executionError) 247 | worker.removeEventListener("message", handler) 248 | reject(e.data.executionError) 249 | } 250 | } 251 | worker.addEventListener("message", handler) 252 | if(!ioContext || !ioContext.skip_record) 253 | this.addToRecord(workerId, { type: 'script', content: script }); 254 | worker.postMessage({ source: script, io_context: ioContext }) 255 | }) 256 | } 257 | 258 | render(workerId, container) { 259 | const records = this.workerRecords[workerId] 260 | if (!records) { 261 | console.error("No records found for worker:", workerId) 262 | return 263 | } 264 | records.forEach(record => this.renderOutput(container, record)) 265 | } 266 | 267 | renderSummary(workerId, container) { 268 | const records = this.workerRecords[workerId] 269 | if (!records) { 270 | console.error("No records found for worker:", workerId) 271 | return 272 | } 273 | 274 | let outputSummay = "" 275 | records.forEach(record => { 276 | const summary = this.renderOutputSummary(container, record) 277 | outputSummay += summary 278 | }) 279 | return outputSummay 280 | } 281 | } 282 | 283 | window.PyodideWorkerManager = PyodideWorkerManager; -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # 🦒 BioImage.IO Chatbot 🤖 2 | 3 | **📣 Publication on Nature Methods: https://www.nature.com/articles/s41592-024-02370-y. Free access: https://rdcu.be/dQuw7 ** 4 | 5 | **👇 Want to Try the Chatbot? [Visit here!](https://bioimage.io/chat)** 6 | 7 | **📖 Read the full Documentation [here](https://bioimage-io.github.io/bioimageio-chatbot/#/)** 8 | ## Your Personal Assistant in Computational Bioimaging 9 | 10 | Welcome to the BioImage.IO Chatbot user guide. This guide will help you get the most out of the chatbot, providing detailed information on how to interact with it and retrieve valuable insights related to computational bioimaging. 11 | 12 | ## Introduction 13 | 14 | The BioImage.IO Chatbot is a versatile conversational agent designed to assist users in accessing information related to computational bioimaging. It leverages the power of Large Language Models (LLMs) and integrates user-specific data to provide contextually accurate and personalized responses. Whether you're a researcher, developer, or scientist, the chatbot is here to make your bioimaging journey smoother and more informative. 15 | 16 | 17 | ![screenshot for the chatbot](./screenshots/chatbot-animation.gif) 18 | 19 | You can watch a demonstration of the BioImage.IO Chatbot performing image analysis in this [video](https://zenodo.org/records/10967840/preview/Supplementary-Video-2-bioimageio-chatbot-ai-image-analysis.mp4?include_deleted=0). 20 | 21 | 22 | The following diagram shows how the chatbot works: 23 | 24 | 25 | 26 | ## Chatbot Features 27 | 28 | The BioImage.IO Chatbot is equipped with an array of capabilities designed to enhance the bioimaging experience: 29 | 30 | * **Contextual and Personalized Response**: Interprets the context of inquiries to deliver relevant and accurate responses. Adapts interactions based on user-specific background information to provide customized advice. 31 | 32 | * **Comprehensive Data Source Integration**: Accesses a broad range of databases and documentation for bioimaging, including [bio.tools](https://bio.tools), [ImageJ.net](https://imagej.net/), [deepImageJ](https://deepimagej.github.io/deepimagej/), [ImJoy](https://imjoy.io/#/), and [bioimage.io](https://bioimage.io). Details on the supported sources are maintained in the [`knowledge-base-manifest.yaml`](knowledge-base-manifest.yaml) file. 33 | 34 | * **Advanced Query Capabilities**: Generates and executes Python scripts for detailed queries within structured databases such as CSV, JSON files, or SQL databases, facilitating complex data retrievals. 35 | 36 | * **AI-Powered Analysis and Code Interpretation**: Directly runs complex image analysis tasks using advanced AI models like Cellpose, via an embedded code interpreter. 37 | 38 | * **Performance Enhancements with ReAct and RAG**: Utilizes a Retrieval Augmented Generation system with a ReAct loop for dynamic, iterative reasoning and tool engagement, improving response quality. 39 | 40 | * **Extension Mechanism for Developers**: Allows for the development of custom extensions using ImJoy plugins or hypha services within Jupyter notebooks, enhancing flexibility and integration possibilities. 41 | 42 | * **Vision Inspection and Hardware Control**: Features a Vision Inspector extension powered by GPT-4 for visual feedback on image content and analysis outcomes, and demonstrates potential for controlling microscopy hardware in smart microscopy setups. 43 | 44 | * **Interactive User Interface and Documentation**: Offers a user-friendly interface with comprehensive support documents, ensuring easy access to its features and maximizing user engagement. 45 | 46 | ## Using the Chatbot 47 | 48 | We are providing a public chatbot service for you to try out. You can access the chatbot [here](https://chat.bioimage.io/chat). 49 | 50 | Please note that the chatbot is still in beta and is being actively developed, we will log the message you input into the chatbot for further investigation of issues and support our development. See the [Disclaimer for BioImage.IO Chatbot](./DISCLAIMER.md). If you want to to remove your chat logs, please contact us via [this form](https://oeway.typeform.com/to/K3j2tJt7). 51 | 52 | Here you can find usage guide and more examples: [Usage guide and example screenshots](docs/usage-example.md). 53 | 54 | If you encounter any issues, please report them via [Github](https://github.com/bioimage-io/bioimageio-chatbot/issues). 55 | 56 | 57 | ### Asking Questions 58 | 59 | To ask the chatbot a question, type your query and send it. The chatbot will analyze your question and provide a relevant response. You can ask questions related to bioimaging, software tools, models, and more. 60 | 61 | ### Personalized Responses 62 | 63 | The chatbot uses your user profile information, such as your name, occupation, and background, to personalize its responses. This ensures that the information you receive is tailored to your specific needs. 64 | 65 | 66 | ## Setup Your Own Chatbot 67 | 68 | You can also set up your own chatbot server. Please refer to the [installation guide](./installation.md) for detailed instructions on how to set up the chatbot server on your local machine or server. 69 | 70 | ## Technical Overview 71 | 72 | Please read the [technical overview](./technical-overview.md) for more details about the chatbot's design and implementation. 73 | 74 | ## Develop Chatbot Extensions 75 | 76 | The BioImage.IO Chatbot is designed to be extensible, allowing developers to create custom extensions to add new functionalities to the chatbot. You can create extensions to integrate new tools, databases, and services into the chatbot, making it more powerful and versatile. See the [development guide](./development.md) for more details. 77 | 78 | ## Join Us as a Community Partner 79 | 80 | The BioImage.IO Chatbot is a community-driven project. We welcome contributions from the community to help improve the chatbot's knowledge base and make it more informative and useful to the community. 81 | 82 | For more information, please visit the [contribution guidelines](docs/CONTRIBUTING.md). 83 | 84 | If you are a tool developer or a database maintainer related to bioimaging, you can join us as a community partner. Please get in touch with us via [Github issues](https://github.com/bioimage-io/bioimageio-chatbot/issues). 85 | 86 | ## Contact Us 87 | 88 | If you have any questions, need assistance, or want to contribute to the chatbot's knowledge base, please do not hesitate to contact us via [Github issues](https://github.com/bioimage-io/bioimageio-chatbot/issues). Our team is here to help you get started and make valuable contributions. 89 | 90 | Thanks for your support and helping make the BioImage.IO Chatbot more informative and useful to the community. 91 | 92 | ## Publication 93 | 94 | For detailed description of our work, please read our preprint: **[![arXiv](https://img.shields.io/badge/arXiv-2310.18351-red.svg)](https://arxiv.org/abs/2310.18351) 10.5281/zenodo.10032227** 95 | 96 | 97 | To reproduce the use cases described in [Figure 2](https://docs.google.com/drawings/d/e/2PACX-1vTIRwRldQBnTFqz0hvS01znGOEdoeDMJmZC-PlBM-O59u_xo7DfJlUEE9SlRsy6xO1hT2HuSOBrLmUz/pub?w=1324&h=1063) in the manuscript, please refer to the [reproducing example usage scenarios](./figure-2-use-cases.md). 98 | 99 | 100 | 101 | 102 | ## Cite Us 103 | 104 | If you use the BioImage.IO Chatbot in your research, please cite us: 105 | 106 | ``` 107 | Lei, W., Fuster-Barceló, C., Reder, G. et al. BioImage.IO Chatbot: a community-driven AI assistant for integrative computational bioimaging. Nat Methods 21, 1368–1370 (2024). https://doi.org/10.1038/s41592-024-02370-y 108 | ``` 109 | 110 | ## Acknowledgements 111 | 112 | The BioImage.IO Chatbot is greatly enriched by the valuable resources contributed by the global bioimage analysis community. We extend our deepest gratitude to all authors and contributors listed in the [knowledge base manifest](https://github.com/bioimage-io/bioimageio-chatbot/blob/main/knowledge-base-manifest.yaml), especially the original authors of the materials integrated into our extensions. Their work has significantly enhanced the capabilities of this chatbot, providing users with high-quality information and tools. 113 | 114 | We would also like to thank the open-source community for their ongoing support and innovation, which continuously drives the development and improvement of the BioImage.IO platform. 115 | 116 | We thank [AI4Life consortium](https://ai4life.eurobioimaging.eu/) for its crucial support in the development of the BioImage.IO Chatbot. 117 | 118 | ![AI4Life](https://ai4life.eurobioimaging.eu/wp-content/uploads/2022/09/AI4Life-logo_giraffe-nodes-2048x946.png) 119 | 120 | AI4Life has received funding from the European Union’s Horizon Europe research and innovation programme under grant agreement number 101057970. Views and opinions expressed are, however those of the author(s) only and do not necessarily reflect those of the European Union or the European Research Council Executive Agency. Neither the European Union nor the granting authority can be held responsible for them. 121 | 122 | Thank you for supporting the BioImage.IO project and contributing to the advancement of bioimage analysis! 123 | 124 | ## Disclaimer 125 | 126 | 127 | These resources are primarily utilized for research purposes in compliance with the European Union's copyright exception on Text and Data Mining (TDM) as outlined in [Directive (EU) 2019/790](https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A32019L0790). 128 | 129 | The full list of sources and resources used by the BioImage.IO Chatbot can be found in our [knowledge base manifest](https://github.com/bioimage-io/bioimageio-chatbot/blob/main/knowledge-base-manifest.yaml). We kindly ask users to carefully verify the accuracy and completeness of information with the original sources. For any work derived from these resources, please ensure that you respect the respective licenses and adhere to the conditions set forth by the original authors. 130 | 131 | If you are an author of any material used within the BioImage.IO Chatbot and wish to have your content removed, please contact us directly. We are committed to respecting the rights of all contributors and will promptly address your request. 132 | 133 | See the full [Disclaimer for BioImage.IO Chatbot](./DISCLAIMER.md). 134 | -------------------------------------------------------------------------------- /bioimageio_chatbot/knowledge_base.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | import zipfile 4 | import shutil 5 | from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter 6 | from langchain_community.vectorstores import FAISS 7 | from langchain_community.document_loaders import TextLoader, PyPDFLoader 8 | from langchain_openai import OpenAIEmbeddings 9 | from langchain.docstore.document import Document 10 | import json 11 | import pickle 12 | from bioimageio_chatbot.utils import get_manifest, download_file 13 | 14 | 15 | def load_docs_store(db_path, collection_name): 16 | # Each collection has two files [collection_name].faiss and [collection_name].pkl 17 | # Check if it exists, otherwise, download from {KNOWLEDGE_BASE_URL}/[collection].faiss 18 | if not os.path.exists(os.path.join(db_path, f"{collection_name}.faiss")): 19 | raise Exception(f"Please build the docs store {collection_name} by running create_vector_knowledge_base first.") 20 | if not os.path.exists(os.path.join(db_path, f"{collection_name}.pkl")): 21 | raise Exception(f"Please build the docs store {collection_name} by running create_vector_knowledge_base first.") 22 | # Load from vector store 23 | embeddings = OpenAIEmbeddings() 24 | docs_store = FAISS.load_local(index_name=collection_name, folder_path=db_path, embeddings=embeddings, allow_dangerous_deserialization=True) 25 | return docs_store 26 | 27 | 28 | def load_knowledge_base(db_path): 29 | collections = get_manifest()['collections'] 30 | docs_store_dict = {} 31 | 32 | for collection in collections: 33 | channel_id = collection['id'] 34 | try: 35 | docs_store = load_docs_store(db_path, channel_id) 36 | length = len(docs_store.docstore._dict.keys()) 37 | assert length > 0, f"Please make sure the docs store {channel_id} is not empty." 38 | print(f"Loaded {length} documents from {channel_id}") 39 | docs_store_dict[channel_id] = docs_store 40 | except Exception as e: 41 | print(f"Failed to load docs store for {channel_id}. Error: {e}") 42 | 43 | if len(docs_store_dict) == 0: 44 | raise Exception("No docs store is loaded, please make sure the docs store is not empty.") 45 | 46 | return docs_store_dict 47 | 48 | def extract_biotools_information(json_file_path): 49 | with open(json_file_path, 'r') as f: 50 | data = json.load(f) 51 | 52 | extracted_info = [] 53 | data['url'] = f"https://bio.tools/{data['name']}" 54 | # Extracting required information 55 | if 'name' in data: 56 | extracted_info.append(f"Name: {data['name']}") 57 | if 'description' in data: 58 | extracted_info.append(f"Description: {data['description']}") 59 | 60 | if 'toolType' in data: 61 | extracted_info.append(f"Tags: {', '.join(data['toolType'])}") 62 | 63 | if 'topic' in data: 64 | topics = [item['term'] for item in data['topic']] 65 | extracted_info.append(f"Topics: {', '.join(topics)}") 66 | 67 | if 'publication' in data: 68 | for pub in data['publication']: 69 | if 'metadata' in pub and 'authors' in pub['metadata']: 70 | authors = [author['name'] for author in pub['metadata']['authors']] 71 | extracted_info.append(f"Publication Authors: {', '.join(authors)}") 72 | # Write extracted information to text file 73 | return "\n".join(extracted_info), data 74 | 75 | # Read text_files folder to get all txt files including the ones in subfolders 76 | def parse_docs(root_folder, md_separator=None, pdf_separator=None, chunk_size=1000, chunk_overlap=10): 77 | chunk_list = [] 78 | for foldername, _, filenames in os.walk(root_folder): 79 | for filename in filenames: 80 | file_path = os.path.join(foldername, filename) 81 | if filename.endswith(".md"): 82 | print(f"Reading {file_path}...") 83 | documents = TextLoader(file_path).load() 84 | text_splitter = CharacterTextSplitter(separator=md_separator or "\n## ", chunk_size=chunk_size, chunk_overlap=chunk_overlap) 85 | chunks =text_splitter.split_documents(documents) 86 | elif filename.endswith(".pdf"): 87 | print(f"Reading {file_path}...") 88 | documents = PyPDFLoader(file_path).load() 89 | text_splitter = RecursiveCharacterTextSplitter(separators=pdf_separator or ["\n\n", "\n", " ", ""], chunk_size=chunk_size, chunk_overlap=chunk_overlap) 90 | chunks = text_splitter.split_documents(documents) 91 | elif filename.endswith(".txt"): 92 | print(f"Reading {file_path}...") 93 | documents = TextLoader(file_path).load() 94 | text_splitter = CharacterTextSplitter(separator="\n", chunk_size=chunk_size, chunk_overlap=chunk_overlap) 95 | chunks = text_splitter.split_documents(documents) 96 | elif filename.endswith(".biotools.json"): 97 | # convert json to yaml 98 | print(f"Reading {file_path}...") 99 | content, metadata = extract_biotools_information(file_path) 100 | chunks = [Document(page_content=content, metadata=metadata)] 101 | else: 102 | print(f"Skipping {file_path}") 103 | continue 104 | chunk_list.extend(chunks) 105 | 106 | return chunk_list 107 | 108 | def download_docs(root_dir, url): 109 | os.makedirs(root_dir, exist_ok=True) 110 | # extract filename from url, remove query string 111 | filename = url.split("/")[-1].split("?")[0] 112 | # target directory is ./repos 113 | target_directory = os.path.join(root_dir) 114 | # if the target directory exists, remove it anyway and create a new one 115 | if os.path.exists(target_directory): 116 | shutil.rmtree(target_directory) 117 | os.mkdir(target_directory) 118 | if filename.endswith(".zip"): 119 | # Define the file and folder names 120 | zip_file_path = os.path.join(target_directory, filename) 121 | print(f"Downloading {url} to {zip_file_path}") 122 | # Download the ZIP file 123 | download_file(url, zip_file_path) 124 | 125 | result_folder = os.path.join(target_directory, filename + "-unzipped") 126 | # Unzip the downloaded file 127 | with zipfile.ZipFile(zip_file_path, "r") as zip_ref: 128 | zip_ref.extractall(result_folder) 129 | 130 | # Clean up - remove the downloaded ZIP file 131 | os.remove(zip_file_path) 132 | print(f"Downloaded and unzipped {url} to {result_folder}") 133 | elif filename.endswith(".pdf"): 134 | result_folder = os.path.join(target_directory, ".".join(filename.split(".")[:-1])) 135 | os.makedirs(result_folder, exist_ok=True) 136 | print(f"Downloading {url} to {result_folder}") 137 | pdf_file_path = os.path.join(result_folder, filename) 138 | download_file(url, pdf_file_path) 139 | print(f"Downloaded {url} to {result_folder}") 140 | else: 141 | raise Exception("Unsupported file format") 142 | 143 | if len(os.listdir(result_folder)) == 0: 144 | raise Exception("Downloaded folder is empty") 145 | elif len(os.listdir(result_folder)) == 1: 146 | # strip the folder name of the unzipped repo 147 | r_dir = os.path.join(result_folder, os.listdir(result_folder)[0]) 148 | if os.path.isdir(r_dir): 149 | return r_dir 150 | # get the folder name of the unzipped repo 151 | return result_folder 152 | 153 | 154 | def create_vector_knowledge_base(output_dir=None, collections=None): 155 | """Create a vector knowledge base from the downloaded documents""" 156 | if output_dir is None: 157 | output_dir = os.environ.get("BIOIMAGEIO_KNOWLEDGE_BASE_PATH", "./bioimageio-knowledge-base") 158 | os.makedirs(output_dir, exist_ok=True) 159 | 160 | if not collections: 161 | collections = get_manifest()['collections'] 162 | 163 | embeddings = OpenAIEmbeddings() 164 | for collection in collections: 165 | if collection.get("format") and collection.get("format").startswith("custom:"): 166 | print(f"Skipping {collection['id']} because it is a custom collection.") 167 | continue 168 | url = collection['source'] 169 | cached_docs_file = os.path.join(output_dir, collection['id'] + "-docs.pickle") 170 | if os.path.exists(cached_docs_file): 171 | with open(cached_docs_file, "rb") as f: 172 | documents = pickle.load(f) 173 | else: 174 | docs_dir = download_docs("./data", url) 175 | documents = parse_docs(os.path.join(docs_dir, collection.get('directory', '')),md_separator=collection.get('md_separator', None), pdf_separator=collection.get('pdf_separator', None), chunk_size=collection.get('chunk_size', 1000), chunk_overlap=collection.get('chunk_overlap', 10)) 176 | if len(documents) > 10000: 177 | print(f"Waring: {len(documents)} documents found in {url}.") 178 | # save the vector db to output_dir 179 | print(f"Creating embeddings (#documents={len(documents)}))") 180 | 181 | # Choose an appropriate batch size 182 | batch_size = 1000 183 | 184 | # Initialize an empty list to store all the batch_embedding_pairs 185 | all_embedding_pairs = [] 186 | all_metadata = [] 187 | 188 | total_length = len(documents) 189 | 190 | # Loop over your documents in batches 191 | for batch_start in range(0, total_length, batch_size): 192 | batch_end = min(batch_start + batch_size, total_length) 193 | batch_texts = documents[batch_start:batch_end] 194 | 195 | # Generate embeddings for the batch of texts 196 | batch_embeddings = embeddings.embed_documents([t.page_content for t in batch_texts]) 197 | batch_embedding_pairs = zip([t.page_content for t in batch_texts], batch_embeddings) 198 | 199 | # Append the batch_embedding_pairs to the all_embedding_pairs list 200 | all_embedding_pairs.extend(batch_embedding_pairs) 201 | all_metadata.extend([t.metadata for t in batch_texts]) 202 | 203 | print(f"Processed {batch_end}/{total_length} documents") 204 | 205 | # Create the FAISS index from all the embeddings 206 | vectordb = FAISS.from_embeddings(all_embedding_pairs, embeddings, metadatas=all_metadata) 207 | print("Saving the vector database...") 208 | vectordb.save_local(output_dir, index_name=collection['id']) 209 | print("Created a vector database from the downloaded documents.") 210 | 211 | if __name__ == "__main__": 212 | create_vector_knowledge_base() -------------------------------------------------------------------------------- /docs/development.md: -------------------------------------------------------------------------------- 1 | # Developing Chatbot Extensions 2 | 3 | ## Introduction 4 | The BioImage.IO Chatbot offers a framework designed for easy extensibility, allowing developers to enrich its capabilities with custom extensions. This guide walks you through the process of developing and integrating new extensions into the chatbot, emphasizing the minimal requirements and the steps involved in using ImJoy to interact with the chatbot. 5 | 6 | Extensions must expose a callable function that adheres to a specific interface: it should accept a dictionary of parameters as its single argument and return a dictionary containing the results of its operations. This design facilitates seamless integration and communication between the chatbot and its extensions. 7 | 8 | A chatbot extension object is a dictionary with the following keys: 9 | - `id`: a unique identifier for the extension; 10 | - `name`: the name of the extension; 11 | - `description`: a short description of the extension; 12 | - `type`: it must be `bioimageio-chatbot-extension`; 13 | - `tools`: a dictionary with functions of tools, it represents the set of functions your extension offers, each accepting configuration parameters as input. These functions should carry out specific tasks and return their results in a dictionary; 14 | - `get_schema`: a function returns the schema for the tools, it returns a JSON schema for each tool function, specifying the structure and types of the expected parameters. This schema is crucial for instructing the chatbot to generate the correct input paramters and validate the inputs and ensuring they adhere to the expected format. Importantly, the chatbot uses the title and description for each field to understand what expected for the tool will generating a function call to run the tool (also see the detailed instructions below). To produce the schema, you can either create it manually, or generated it automatically using libraries such as `pydantic`. 15 | 16 | The following is a chatbot extension object defined in Python: 17 | ```python 18 | def my_tool(config): 19 | print(config["my_param"]) 20 | return {"result": "success"} 21 | 22 | def get_schema(): 23 | return { 24 | "my_tool": { 25 | "type": "object", 26 | "title": "my_tool", 27 | "description": "my tool", 28 | "properties": { 29 | "my_param": { 30 | "type": "number", 31 | "description": "This is my parameter" 32 | } 33 | } 34 | } 35 | } 36 | 37 | chatbot_extension = { 38 | "id": "my-extension", 39 | "type": "bioimageio-chatbot-extension", 40 | "name": "My Extension", 41 | "description": "This is my extension", 42 | "get_schema": get_schema, 43 | "tools": { 44 | "my_tool": my_tool 45 | } 46 | } 47 | ``` 48 | 49 | 50 | Instead of writing the schema manually, it is recommended to use libraries such as `pydantic` to generate the schema: 51 | ```python 52 | class MyTool(BaseModel): 53 | """my tool description""" 54 | my_param: float = Field(..., description="This is my parameter doc") 55 | 56 | def my_tool(config): 57 | print(config["my_param"]) 58 | return {"result": "success"} 59 | 60 | def get_schema(): 61 | return { 62 | "my_tool": MyTool.schema() 63 | } 64 | 65 | chatbot_extension = { 66 | "id": "my-extension", 67 | "type": "bioimageio-chatbot-extension", 68 | "name": "My Extension", 69 | "description": "This is my extension", 70 | "get_schema": get_schema, 71 | "tools": { 72 | "my_tool": my_tool 73 | } 74 | } 75 | ``` 76 | 77 | In JavaScript, you can also create the extension similarily: 78 | ```javascript 79 | 80 | const chatbotExtension = { 81 | id: "my-extension", 82 | type: "bioimageio-chatbot-extension", 83 | name: "My Extension", 84 | description: "This is my extension", 85 | get_schema() { 86 | return { 87 | my_tool: { 88 | type: "object", 89 | title: "my_tool", 90 | description: "my tool description", 91 | properties: { 92 | my_param: { 93 | type: "number", 94 | description: "This is my parameter doc" 95 | } 96 | } 97 | } 98 | }; 99 | }, 100 | tools: { 101 | my_tool(config) { 102 | console.log(config.my_param); 103 | return {result: "success"}; 104 | } 105 | } 106 | } 107 | 108 | ``` 109 | 110 | After creating the extension object, there are two ways to serve the extensions, one is to use the [ImJoy](https://imjoy.io) plugin framework for running extensions in the browser, the other way is to use [Hypha](https://ha.amun.ai) framework to serve the extensions remotely, either in another browser tab or in a native Python process running on your local machine or a remote server. 111 | 112 | ## Option 1: Register Extension with ImJoy 113 | 114 | Below are examples demonstrating how to register an extension with the chatbot using both JavaScript and Python in ImJoy: 115 | 116 | You can try them here: https://imjoy.io/lite?plugin=https://if.imjoy.io 117 | 118 | ### Register Chatbot Extension with ImJoy in JavaScript 119 | 120 | ```javascript 121 | const chatbot = await api.createWindow({ 122 | src: "https://bioimage.io/chat", 123 | name:"BioImage.IO Chatbot", 124 | }); 125 | chatbotExtension._rintf = true; // make the chatbot extension as an interface 126 | chatbot.registerExtension(chatbotExtension); 127 | ``` 128 | 129 | ### Register Chatbot Extension with ImJoy in Python 130 | 131 | ```python 132 | from imjoy_rpc import api 133 | 134 | chatbot = await api.createWindow( 135 | src="https://bioimage.io/chat", 136 | name="BioImage.IO Chatbot", 137 | ) 138 | chatbotExtension._rintf = True # make the chatbot extension as an interface 139 | await chatbot.registerExtension(chatbot_extension) 140 | ``` 141 | 142 | ## Option 2: Serve Extension with Hypha 143 | 144 | With Hypha, you can serve your extension remotely, enabling seamless integration with the chatbot. 145 | 146 | Below are examples demonstrating how to serve an extension with Hypha using both JavaScript and Python: 147 | 148 | ### Serve Chatbot Extension with Hypha in JavaScript 149 | 150 | ```javascript 151 | const token = await login({server_url: "https://chat.bioimage.io"}) 152 | const server = await connectToServer({server_url: "https://chat.bioimage.io", token}); 153 | const svc = await server.registerService(chatbotExtension); 154 | console.log(`Extension service registered with id: ${svc.id}, you can visit the service at: https://bioimage.io/chat?server=${server_url}&extension=${svc.id}`); 155 | ``` 156 | 157 | **IMPORTANT: The above hypha service can only be accessed by the same user who registered the service, below you will find a section about making it public** 158 | 159 | ### Serve Chatbot Extension with Hypha in Python 160 | 161 | ```python 162 | from hypha_rpc import connect_to_server, login 163 | 164 | server_url = "https://chat.bioimage.io" 165 | token = await login({"server_url": server_url}) 166 | server = await connect_to_server({"server_url": server_url, "token": token}) 167 | svc = await server.register_service(chatbot_extension) 168 | print(f"Extension service registered with id: {svc.id}, you can visit the service at: https://bioimage.io/chat?server={server_url}&extension={svc.id}") 169 | ``` 170 | 171 | After registering the extension with Hypha, you can access the chatbot with the extension by visiting the following URL: `https://bioimage.io/chat?server=https://chat.bioimage.io&extension=`, where `` is the ID of the registered extension service. 172 | 173 | **IMPORTANT: The above hypha service can only be accessed by the same user who registered the service, below you will find a section about making it public** 174 | 175 | ### Making Chatbot Extension Public 176 | 177 | To make it public, you need to set the visibility of the chatbot extension service to `public`. 178 | 179 | See the example below: 180 | 181 | ```python 182 | from hypha_rpc import connect_to_server, login 183 | 184 | server_url = "https://chat.bioimage.io" 185 | token = await login({"server_url": server_url}) 186 | server = await connect_to_server({"server_url": server_url, "token": token}) 187 | # Below, we set the visibility to public 188 | chatbot_extension['config'] = {"visibility": "public"} 189 | svc = await server.register_service(chatbot_extension) 190 | print(f"Extension service registered with id: {svc.id}, you can visit the service at: https://bioimage.io/chat?server={server_url}&extension={svc.id}") 191 | ``` 192 | 193 | You can also implement authorization logic in the tool function, see [hypha service authorization](https://ha.amun.ai/#/?id=service-authorization). 194 | 195 | ## Tutorial 196 | 197 | For an in-depth understanding, refer to [our detailed tutorial](./bioimage-chatbot-extension-tutorial.ipynb), accessible directly through the ImJoy Jupyter Notebook in your browser without installation. [Click here to launch the notebook](https://imjoy-notebook.netlify.app/lab/index.html?load=https://raw.githubusercontent.com/bioimage-io/bioimageio-chatbot/main/docs/bioimage-chatbot-extension-tutorial.ipynb&open=1). 198 | 199 | ## Extension Development Details 200 | 201 | ### `tools` and `get_schema` 202 | When developing extensions, it's essential to define the `tools` and `get_schema` functionalities carefully: 203 | - **`tools`**: Represents the set of functions your extension offers, each accepting configuration parameters as input. These functions should carry out specific tasks and return their results in a dictionary. 204 | - **`get_schema`**: Returns a JSON schema for each tool function, specifying the structure and types of the expected parameters. This schema is crucial for instructing the chatbot to generate the correct input paramters and validate the inputs and ensuring they adhere to the expected format. Importantly, the chatbot uses the title and description for each field to understand what expected for the tool will generating a function call to run the tool (also see the detailed instructions below). 205 | 206 | ### Notes on Function Input/Output 207 | The input and output of tool functions are restricted to primitive types (e.g., numbers, strings) that can be encoded in JSON. This limitation ensures compatibility and facilitates the exchange of data between the chatbot and extensions. 208 | 209 | ### Importance of Detailed Descriptions 210 | Providing a detailed description for your extension and its arguments is vital. These descriptions assist the chatbot in correctly invoking the tools and help the chatbot understand the functionality and purpose of your extension. Ensure that each argument is accompanied by a clear title and a comprehensive description to improve usability and interaction quality of the chatbot. 211 | 212 | By adhering to these guidelines, you will enhance the clarity, utility, and ease of integration of your chatbot extensions, contributing to a richer ecosystem of tools within the BioImage.IO community. 213 | -------------------------------------------------------------------------------- /bioimageio_chatbot/static/pyodide-worker.js: -------------------------------------------------------------------------------- 1 | const indexURL = 'https://cdn.jsdelivr.net/pyodide/v0.25.0/full/' 2 | importScripts(`${indexURL}pyodide.js`); 3 | 4 | (async () => { 5 | self.pyodide = await loadPyodide({ indexURL }) 6 | await self.pyodide.loadPackage("micropip"); 7 | const micropip = self.pyodide.pyimport("micropip"); 8 | await micropip.install(['numpy', 'imjoy-rpc', 'pyodide-http']); 9 | // NOTE: We intentionally avoid runPythonAsync here because we don't want this to pre-load extra modules like matplotlib. 10 | self.pyodide.runPython(setupCode) 11 | self.postMessage({loading: true}) // Inform the main thread that we finished loading. 12 | })() 13 | 14 | let outputs = [] 15 | 16 | function write(type, content) { 17 | self.postMessage({ type, content }) 18 | outputs.push({ type, content }) 19 | return content.length 20 | } 21 | 22 | function logService(type, url, attrs) { 23 | outputs.push({type, content: url, attrs: attrs?.toJs({dict_converter : Object.fromEntries})}) 24 | self.postMessage({ type, content: url, attrs: attrs?.toJs({dict_converter : Object.fromEntries}) }) 25 | } 26 | 27 | function show(type, url, attrs) { 28 | const turl = url.length > 32 ? url.slice(0, 32) + "..." : url 29 | outputs.push({type, content: turl, attrs: attrs?.toJs({dict_converter : Object.fromEntries})}) 30 | self.postMessage({ type, content: url, attrs: attrs?.toJs({dict_converter : Object.fromEntries}) }) 31 | } 32 | 33 | function store_put(key, value) { 34 | self.postMessage({ type: "store", key, content: `${value}` }) 35 | } 36 | 37 | // Stand-in for `time.sleep`, which does not actually sleep. 38 | // To avoid a busy loop, instead import asyncio and await asyncio.sleep(). 39 | function spin(seconds) { 40 | const time = performance.now() + seconds * 1000 41 | while (performance.now() < time); 42 | } 43 | 44 | // NOTE: eval(compile(source, "", "exec", ast.PyCF_ALLOW_TOP_LEVEL_AWAIT)) 45 | // returns a coroutine if `source` contains a top-level await, and None otherwise. 46 | 47 | const setupCode = ` 48 | import array 49 | import ast 50 | import base64 51 | import contextlib 52 | import io 53 | import js 54 | import pyodide 55 | import sys 56 | import time 57 | import traceback 58 | import wave 59 | import pyodide_http 60 | 61 | python_version = f"{sys.version_info.major}.{sys.version_info.minor}"; print(python_version) 62 | 63 | pyodide_http.patch_all() # Patch all libraries 64 | help_string = f""" 65 | Welcome to BioImage.IO Chatbot Debug console! 66 | Python {python_version} on Pyodide {pyodide.__version__} 67 | 68 | In this console, you can run Python code and interact with the code interpreter used by the chatbot. 69 | You can inspect variables, run functions, and more. 70 | 71 | If this is your first time using Python, you should definitely check out 72 | the tutorial on the internet at https://docs.python.org/{python_version}/tutorial/. 73 | Enter the name of any module, keyword, or topic to get help on writing 74 | Python programs and using Python modules. To quit this help utility and 75 | return to the interpreter, just type "quit". 76 | To get a list of available modules, keywords, symbols, or topics, type 77 | "modules", "keywords", "symbols", or "topics". Each module also comes 78 | with a one-line summary of what it does; to list the modules whose name 79 | or summary contain a given string such as "spam", type "modules spam". 80 | """ 81 | 82 | __builtins__.help = lambda *args, **kwargs: print(help_string) 83 | 84 | # patch hypha services 85 | import imjoy_rpc.hypha 86 | _connect_to_server = imjoy_rpc.hypha.connect_to_server 87 | 88 | async def patched_connect_to_server(*args, **kwargs): 89 | server = await _connect_to_server(*args, **kwargs) 90 | _register_service = server.register_service 91 | async def patched_register_service(*args, **kwargs): 92 | svc_info = await _register_service(*args, **kwargs) 93 | service_id = svc_info['id'].split(':')[1] 94 | service_url = f"{server.config['public_base_url']}/{server.config['workspace']}/services/{service_id}" 95 | js.logService("service", service_url, svc_info) 96 | return svc_info 97 | server.register_service = patched_register_service 98 | server.registerService = patched_register_service 99 | return server 100 | 101 | imjoy_rpc.hypha.connect_to_server = patched_connect_to_server 102 | 103 | # For redirecting stdout and stderr later. 104 | class JSOutWriter(io.TextIOBase): 105 | def write(self, s): 106 | return js.write("stdout", s) 107 | 108 | class JSErrWriter(io.TextIOBase): 109 | def write(self, s): 110 | return js.write("stderr", s) 111 | 112 | def setup_matplotlib(): 113 | import matplotlib 114 | matplotlib.use('Agg') 115 | import matplotlib.pyplot as plt 116 | 117 | def show(): 118 | buf = io.BytesIO() 119 | plt.savefig(buf, format='png') 120 | img = 'data:image/png;base64,' + base64.b64encode(buf.getvalue()).decode('utf-8') 121 | js.show("img", img) 122 | plt.clf() 123 | 124 | plt.show = show 125 | 126 | def show_image(image, **attrs): 127 | from PIL import Image 128 | if not isinstance(image, Image.Image): 129 | image = Image.fromarray(image) 130 | buf = io.BytesIO() 131 | image.save(buf, format='png') 132 | data = 'data:image/png;base64,' + base64.b64encode(buf.getvalue()).decode('utf-8') 133 | js.show("img", data, attrs) 134 | 135 | _store = {} 136 | def store_put(key, value): 137 | _store[key] = value 138 | js.store_put(key, value) 139 | 140 | def store_get(key): 141 | return _store.get(key) 142 | 143 | def show_animation(frames, duration=100, format="apng", loop=0, **attrs): 144 | from PIL import Image 145 | buf = io.BytesIO() 146 | img, *imgs = [frame if isinstance(frame, Image.Image) else Image.fromarray(frame) for frame in frames] 147 | img.save(buf, format='png' if format == "apng" else format, save_all=True, append_images=imgs, duration=duration, loop=0) 148 | img = f'data:image/{format};base64,' + base64.b64encode(buf.getvalue()).decode('utf-8') 149 | js.show("img", img, attrs) 150 | 151 | def convert_audio(data): 152 | try: 153 | import numpy as np 154 | is_numpy = isinstance(data, np.ndarray) 155 | except ImportError: 156 | is_numpy = False 157 | if is_numpy: 158 | if len(data.shape) == 1: 159 | channels = 1 160 | if len(data.shape) == 2: 161 | channels = data.shape[0] 162 | data = data.T.ravel() 163 | else: 164 | raise ValueError("Too many dimensions (expected 1 or 2).") 165 | return ((data * (2**15 - 1)).astype("", "exec", ast.PyCF_ALLOW_TOP_LEVEL_AWAIT) 236 | 237 | result = eval(code, context) 238 | if result is not None: 239 | result = await result 240 | if last_expression: 241 | if isinstance(last_expression.value, ast.Await): 242 | # If last expression is an await, compile and execute it as async 243 | last_expr_code = compile(ast.Expression(last_expression.value), "", "eval", flags=ast.PyCF_ALLOW_TOP_LEVEL_AWAIT) 244 | result = await eval(last_expr_code, context) 245 | else: 246 | # If last expression is not an await, compile and evaluate it normally 247 | last_expr_code = compile(ast.Expression(last_expression.value), "", "eval") 248 | result = eval(last_expr_code, context) 249 | if result is not None: 250 | print(result) 251 | for op in outputs: 252 | if op not in context: 253 | raise Exception("Error: The script did not produce an variable named: " + op) 254 | store_put(op, context[op]) 255 | except: 256 | traceback.print_exc() 257 | raise 258 | ` 259 | const mountedFs = {} 260 | 261 | self.onmessage = async (event) => { 262 | if(event.data.source){ 263 | try{ 264 | const { source, io_context } = event.data 265 | self.pyodide.globals.set("source", source) 266 | self.pyodide.globals.set("io_context", io_context && self.pyodide.toPy(io_context)) 267 | outputs = [] 268 | // see https://github.com/pyodide/pyodide/blob/b177dba277350751f1890279f5d1a9096a87ed13/src/js/api.ts#L546 269 | // sync native ==> browser 270 | await new Promise((resolve, _) => self.pyodide.FS.syncfs(true, resolve)); 271 | await self.pyodide.runPythonAsync("await run(source, io_context)") 272 | // sync browser ==> native 273 | await new Promise((resolve, _) => self.pyodide.FS.syncfs(false, resolve)), 274 | console.log("Execution done", outputs) 275 | self.postMessage({ executionDone: true, outputs }) 276 | outputs = [] 277 | } 278 | catch(e){ 279 | console.error("Execution Error", e) 280 | self.postMessage({ executionError: e.message }) 281 | } 282 | } 283 | if(event.data.mount){ 284 | try{ 285 | const { mountPoint, dirHandle } = event.data.mount 286 | if(mountedFs[mountPoint]){ 287 | console.log("Unmounting native FS:", mountPoint) 288 | await self.pyodide.FS.unmount(mountPoint) 289 | delete mountedFs[mountPoint] 290 | } 291 | const nativefs = await self.pyodide.mountNativeFS(mountPoint, dirHandle) 292 | mountedFs[mountPoint] = nativefs 293 | console.log("Native FS mounted:", mountPoint, nativefs) 294 | self.postMessage({ mounted: mountPoint }) 295 | } 296 | catch(e){ 297 | self.postMessage({ mountError: e.message }) 298 | } 299 | } 300 | 301 | } 302 | --------------------------------------------------------------------------------