├── .dockerignore ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yaml │ ├── feature_request.yaml │ └── question.yaml └── workflows │ ├── deploy-webapp.yaml │ ├── pre-commit.yaml │ ├── publish.yaml │ └── tests.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── CITATION.cff ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── benchmarks └── ehrsql-naacl2024 │ ├── README.md │ ├── claude-sonnet-4 │ └── EHRSQL_benchmark.csv │ └── gpt-oss-20B │ ├── EHRSQL_benchmark.csv │ └── conversations │ ├── 10.conversation.json │ ├── 100.conversation.json │ ├── 101.conversation.json │ ├── 11.conversation.json │ ├── 12.conversation.json │ ├── 13.conversation.json │ ├── 14.conversation.json │ ├── 15.conversation.json │ ├── 16.conversation.json │ ├── 17.conversation.json │ ├── 18.conversation.json │ ├── 19.conversation.json │ ├── 2.conversation.json │ ├── 20.conversation.json │ ├── 21.conversation.json │ ├── 22.conversation.json │ ├── 23.conversation.json │ ├── 24.conversation.json │ ├── 25.conversation.json │ ├── 26.conversation.json │ ├── 27.conversation.json │ ├── 28.conversation.json │ ├── 29.conversation.json │ ├── 3.conversation.json │ ├── 30.conversation.json │ ├── 31.conversation.json │ ├── 32.conversation.json │ ├── 33.conversation.json │ ├── 34.conversation.json │ ├── 35.conversation.json │ ├── 36.conversation.json │ ├── 37.conversation.json │ ├── 38.conversation.json │ ├── 39.conversation.json │ ├── 4.conversation.json │ ├── 40.conversation.json │ ├── 41.conversation.json │ ├── 42.conversation.json │ ├── 43.conversation.json │ ├── 44.conversation.json │ ├── 45.conversation.json │ ├── 46.conversation.json │ ├── 47.conversation.json │ ├── 48.conversation.json │ ├── 49.conversation.json │ ├── 5.conversation.json │ ├── 50.conversation.json │ ├── 51.conversation.json │ ├── 52.conversation.json │ ├── 53.conversation.json │ ├── 54.conversation.json │ ├── 55.conversation.json │ ├── 56.conversation.json │ ├── 57.conversation.json │ ├── 58.conversation.json │ ├── 59.conversation.json │ ├── 6.conversation.json │ ├── 60.conversation.json │ ├── 61.conversation.json │ ├── 62.conversation.json │ ├── 63.conversation.json │ ├── 64.conversation.json │ ├── 65.conversation.json │ ├── 66.conversation.json │ ├── 67.conversation.json │ ├── 68.conversation.json │ ├── 69.conversation.json │ ├── 7.conversation.json │ ├── 70.conversation.json │ ├── 71.conversation.json │ ├── 72.conversation.json │ ├── 73.conversation.json │ ├── 74.conversation.json │ ├── 75.conversation.json │ ├── 76.conversation.json │ ├── 77.conversation.json │ ├── 78.conversation.json │ ├── 79.conversation.json │ ├── 8.conversation.json │ ├── 80.conversation.json │ ├── 81.conversation.json │ ├── 82.conversation.json │ ├── 83.conversation.json │ ├── 84.conversation.json │ ├── 85.conversation.json │ ├── 86.conversation.json │ ├── 87.conversation.json │ ├── 88.conversation.json │ ├── 89.conversation.json │ ├── 9.conversation.json │ ├── 90.conversation.json │ ├── 91.conversation.json │ ├── 92.conversation.json │ ├── 93.conversation.json │ ├── 94.conversation.json │ ├── 95.conversation.json │ ├── 96.conversation.json │ ├── 97.conversation.json │ ├── 98.conversation.json │ └── 99.conversation.json ├── docs └── OAUTH2_AUTHENTICATION.md ├── pyproject.toml ├── src └── m3 │ ├── __init__.py │ ├── auth.py │ ├── cli.py │ ├── config.py │ ├── data_io.py │ ├── mcp_client_configs │ ├── __init__.py │ ├── dynamic_mcp_config.py │ └── setup_claude_desktop.py │ └── mcp_server.py ├── tests ├── test_cli.py ├── test_config.py ├── test_config_scripts.py ├── test_data_io.py ├── test_example.py ├── test_mcp_server.py └── test_oauth2_basic.py ├── uv.lock └── webapp ├── .gitignore ├── README.md ├── package-lock.json ├── package.json ├── public ├── banner1.png ├── banner2.png ├── banner3.png ├── favicon.ico ├── index.html ├── logo192.png ├── logo512.png ├── m3_architecture.png ├── m3_logo.png ├── m3_logo_transparent.png ├── manifest.json ├── pypi_logo.svg ├── robots.txt └── videos │ ├── m3_website_1.mp4 │ ├── m3_website_2.mp4 │ ├── m3_website_3.mp4 │ └── m3_website_4.mp4 └── src ├── App.css ├── App.js ├── components ├── ArchitectureDiagram.js ├── CTA.js ├── Citation.js ├── Contact.js ├── Demos.js ├── Documentation.js ├── Explanation.js ├── Features.js ├── Footer.js ├── Header.js ├── Hero.js ├── Installation.js └── Paper.js ├── index.css └── index.js /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | __pycache__ 3 | *.pyc 4 | .venv 5 | .env 6 | webapp/ 7 | benchmarks/ 8 | coverage.xml 9 | .pytest_cache/ 10 | dist/ 11 | build/ 12 | *.egg-info 13 | # Keep only the demo DB 14 | m3_data/** 15 | !m3_data/databases/ 16 | !m3_data/databases/mimic_iv_demo.db 17 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yaml: -------------------------------------------------------------------------------- 1 | name: "🐛 Bug Report" 2 | description: Create a new ticket for a bug in M3. 3 | title: "🐛 [BUG] - " 4 | labels: 5 | - "bug" 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: | 10 | <p align="center"> 11 | <img src="https://miro.medium.com/v2/resize:fit:400/1*QEps725rQjfgqNnlbRYb1g.png" alt="Harvard MIT HST Logo"> 12 | <br> 13 | <em><a href="https://doi.org/10.48550/arXiv.2507.01053">M3's Paper</a>—<a href="https://rafiattrach.github.io/m3/">M3's Website</a>.</em> 14 | </p> 15 | - type: checkboxes 16 | id: checks 17 | attributes: 18 | label: "Before Submitting" 19 | description: Please confirm the following to help us process your issue in timely-manner. 20 | options: 21 | - label: I have checked the documentation and existing issues. 22 | required: true 23 | - type: textarea 24 | id: description 25 | attributes: 26 | label: "Description" 27 | description: Please provide a clear and concise description of the bug. 28 | placeholder: "Hint: ‘brevity is the soul of wit’, Hamlet — by William Shakespeare" 29 | validations: 30 | required: true 31 | - type: input 32 | id: reprod-url 33 | attributes: 34 | label: "Reproduction URL" 35 | description: "If you’ve forked M3, provide a GitHub URL or repository link to reproduce the issue." 36 | placeholder: "Hint: Optional, but it helps us resolve the issue faster. Leave empty if not applicable." 37 | validations: 38 | required: false 39 | - type: textarea 40 | id: reprod 41 | attributes: 42 | label: "Reproduction Steps" 43 | description: "List the steps to reproduce the bug. The more detailed, the better!" 44 | value: | 45 | 1. Go to '...' 46 | 2. Run '....' 47 | 3. Observe '....' 48 | render: bash 49 | validations: 50 | required: true 51 | - type: markdown 52 | attributes: 53 | value: "### Environment Information" 54 | - type: dropdown 55 | id: os 56 | attributes: 57 | label: "Operating System" 58 | description: What operating system were you using when the bug occurred? 59 | options: 60 | - "Windows" 61 | - "Linux" 62 | - "Mac" 63 | - "Other" 64 | - type: input 65 | id: other-os 66 | attributes: 67 | label: "Other Operating System" 68 | description: "Only fill this if you selected 'Other' in the Operating System dropdown." 69 | placeholder: "e.g., Ubuntu 20.04" 70 | validations: 71 | required: false 72 | - type: dropdown 73 | id: backend 74 | attributes: 75 | label: "Backend" 76 | description: Which backend were you using when the bug occurred? 77 | options: 78 | - "SQLite" 79 | - "BigQuery" 80 | - "Other" 81 | - type: input 82 | id: other-backend 83 | attributes: 84 | label: "Other Backend" 85 | description: "Only fill this if you selected 'Other' in the Backend dropdown." 86 | placeholder: "e.g., Custom Database" 87 | validations: 88 | required: false 89 | - type: dropdown 90 | id: mcp-client 91 | attributes: 92 | label: "MCP Client" 93 | description: Which MCP client were you using? 94 | options: 95 | - "Claude Desktop" 96 | - "Other" 97 | - type: input 98 | id: other-mcp-client 99 | attributes: 100 | label: "Other MCP Client" 101 | description: "Only fill this if you selected 'Other' in the MCP Client dropdown." 102 | placeholder: "e.g., Custom Client" 103 | validations: 104 | required: false 105 | - type: dropdown 106 | id: authentication 107 | attributes: 108 | label: "Authentication" 109 | description: Were you using OAuth2 authentication when the bug occurred? 110 | options: 111 | - "Yes" 112 | - "No" 113 | - type: input 114 | id: python-version 115 | attributes: 116 | label: "Python Version" 117 | description: What version of Python are you using? 118 | placeholder: "e.g., 3.10.0" 119 | validations: 120 | required: true 121 | - type: input 122 | id: m3-version 123 | attributes: 124 | label: "M3 Version" 125 | description: What version of M3 are you using? Run 'm3 --version' to check. 126 | placeholder: "e.g., 0.1.0" 127 | validations: 128 | required: true 129 | - type: textarea 130 | id: additional-info 131 | attributes: 132 | label: "Additional Information" 133 | description: "In case none of the above options fit your case, please provide additional information." 134 | placeholder: "... write here ..." 135 | validations: 136 | required: false 137 | - type: markdown 138 | attributes: 139 | value: | 140 | **Security Note:** Please do not share sensitive information such as authentication tokens, personal data, or confidential project details in this report. 141 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yaml: -------------------------------------------------------------------------------- 1 | name: "💡 Feature Request" 2 | description: Suggest a new feature or enhancement for M3. 3 | title: "💡 [FEAT] - <title>" 4 | labels: 5 | - "feature" 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: | 10 | <p align="center"> 11 | <img src="https://miro.medium.com/v2/resize:fit:400/1*QEps725rQjfgqNnlbRYb1g.png" alt="Harvard MIT HST Logo"> 12 | <br> 13 | <em><a href="https://doi.org/10.48550/arXiv.2507.01053">M3's Paper</a>—<a href="https://rafiattrach.github.io/m3/">M3's Website</a>.</em> 14 | </p> 15 | - type: checkboxes 16 | id: checks 17 | attributes: 18 | label: "Before Submitting" 19 | description: Please confirm the following to help us process your request efficiently. 20 | options: 21 | - label: I have checked if this feature already exists or has been requested. 22 | required: true 23 | - type: textarea 24 | id: feature-description 25 | attributes: 26 | label: "Feature Description" 27 | description: Please provide a clear and concise description of the feature you are requesting. 28 | placeholder: "Hint: ‘brevity is the soul of wit’, Hamlet — by William Shakespeare" 29 | validations: 30 | required: true 31 | - type: textarea 32 | id: proposed-solution 33 | attributes: 34 | label: "Proposed Solution" 35 | description: Describe how you envision this feature working. 36 | placeholder: "Explain how the feature should function." 37 | validations: 38 | required: true 39 | - type: textarea 40 | id: examples 41 | attributes: 42 | label: "Examples or References" 43 | description: Provide any examples, links, or references that might help illustrate your request. 44 | placeholder: "Hint: Optional, e.g., screenshots, links to similar features in other projects, etc." 45 | validations: 46 | required: false 47 | - type: dropdown 48 | id: contribution 49 | attributes: 50 | label: "Willing to Contribute?" 51 | description: Would you be willing to help implement this feature? 52 | options: 53 | - "Yes" 54 | - "No" 55 | - type: dropdown 56 | id: roadmap-alignment 57 | attributes: 58 | label: "Roadmap Alignment" 59 | description: Which part of the M3 roadmap does this feature align with? 60 | options: 61 | - "Broader Dataset Coverage" 62 | - "Richer MCP Tooling" 63 | - "Technical Enhancements" 64 | - "Ecosystem and Community Contributions" 65 | - "Other" 66 | - type: input 67 | id: other-roadmap 68 | attributes: 69 | label: "Other Roadmap Alignment" 70 | description: "Only fill this if you selected 'Other' in the Roadmap Alignment dropdown." 71 | placeholder: "Describe how this feature aligns with M3's goals." 72 | validations: 73 | required: false 74 | - type: markdown 75 | attributes: 76 | value: | 77 | **Security Note:** Please do not share sensitive information such as authentication tokens, personal data, or confidential project details in this request. 78 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.yaml: -------------------------------------------------------------------------------- 1 | name: "❓ Ask a Question" 2 | description: Ask a question about M3. 3 | title: "❓ [QUESTION] - <title>" 4 | labels: 5 | - "question" 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: | 10 | <p align="center"> 11 | <img src="https://miro.medium.com/v2/resize:fit:400/1*QEps725rQjfgqNnlbRYb1g.png" alt="Harvard MIT HST Logo"> 12 | <br> 13 | <em><a href="https://doi.org/10.48550/arXiv.2507.01053">M3's Paper</a>—<a href="https://rafiattrach.github.io/m3/">M3's Website</a>.</em> 14 | </p> 15 | - type: checkboxes 16 | id: checks 17 | attributes: 18 | label: "Before Submitting" 19 | description: Please confirm the following to help us answer your question efficiently. 20 | options: 21 | - label: I have checked the documentation and existing issues. 22 | required: true 23 | - type: textarea 24 | id: question 25 | attributes: 26 | label: "Question" 27 | description: Please provide a clear and concise question. 28 | placeholder: "Hint: ‘brevity is the soul of wit’, Hamlet — by William Shakespeare" 29 | validations: 30 | required: true 31 | - type: textarea 32 | id: context 33 | attributes: 34 | label: "Additional Context" 35 | description: Provide any additional context or details that might help answer your question. 36 | placeholder: "Hint: Optional, e.g., code snippets, error messages, etc." 37 | validations: 38 | required: false 39 | - type: input 40 | id: project-area 41 | attributes: 42 | label: "Project Area" 43 | description: Specify the area of M3 your question relates to. 44 | placeholder: "Hint: Optional, e.g., CLI, MCP Server, OAuth2, etc." 45 | validations: 46 | required: false 47 | - type: markdown 48 | attributes: 49 | value: | 50 | **Security Note:** Please do not share sensitive information such as authentication tokens, personal data, or confidential project details in this question. 51 | -------------------------------------------------------------------------------- /.github/workflows/deploy-webapp.yaml: -------------------------------------------------------------------------------- 1 | name: Deploy Webapp to GitHub Pages 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | paths: 7 | - "webapp/**" 8 | workflow_dispatch: 9 | 10 | permissions: 11 | contents: read 12 | pages: write 13 | id-token: write 14 | 15 | concurrency: 16 | group: "pages" 17 | cancel-in-progress: false 18 | 19 | jobs: 20 | build: 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v4 24 | 25 | - uses: actions/setup-node@v4 26 | with: 27 | node-version: "18" 28 | cache: "npm" 29 | cache-dependency-path: webapp/package-lock.json 30 | 31 | - uses: actions/configure-pages@v4 32 | 33 | - name: Create .env file 34 | run: | 35 | cat <<EOF > ./webapp/.env 36 | REACT_APP_FORMSPREE_FORM_IDS=${{ secrets.REACT_APP_FORMSPREE_FORM_IDS }} 37 | REACT_APP_CONTACT_EMAILS=${{ secrets.REACT_APP_CONTACT_EMAILS }} 38 | EOF 39 | 40 | - run: npm ci 41 | working-directory: ./webapp 42 | 43 | - run: npm run build 44 | working-directory: ./webapp 45 | 46 | - uses: actions/upload-pages-artifact@v3 47 | with: 48 | path: ./webapp/build 49 | 50 | deploy: 51 | environment: 52 | name: github-pages 53 | url: ${{ steps.deployment.outputs.page_url }} 54 | runs-on: ubuntu-latest 55 | needs: build 56 | steps: 57 | - name: Deploy to GitHub Pages 58 | id: deployment 59 | uses: actions/deploy-pages@v4 60 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yaml: -------------------------------------------------------------------------------- 1 | name: Pre-commit checks 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | jobs: 10 | pre-commit: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | with: 15 | fetch-depth: 0 16 | 17 | - name: Install UV and Python 18 | uses: astral-sh/setup-uv@v5 19 | with: 20 | version: "latest" 21 | python-version: "3.11" 22 | - name: Symlink uv to expected path for pre-commit-uv 23 | run: | 24 | mkdir -p ~/.local/bin 25 | ln -s $(which uv) ~/.local/bin/uv 26 | - run: uv venv 27 | - run: uv sync --dev 28 | - run: uv add pytest==7.4.3 29 | - uses: tox-dev/action-pre-commit-uv@v1 30 | with: 31 | extra_args: --all-files 32 | -------------------------------------------------------------------------------- /.github/workflows/publish.yaml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI 2 | 3 | on: 4 | release: 5 | types: [published] 6 | workflow_dispatch: # Allow manual triggering 7 | 8 | jobs: 9 | publish: 10 | runs-on: ubuntu-latest 11 | environment: 12 | name: pypi 13 | url: https://pypi.org/p/m3-mcp 14 | permissions: 15 | id-token: write # IMPORTANT: this permission is mandatory for trusted publishing 16 | contents: read 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - name: Set up uv 22 | uses: astral-sh/setup-uv@v5 23 | with: 24 | version: "latest" 25 | python-version: "3.11" 26 | 27 | - name: Extract version from tag 28 | id: get_version 29 | run: | 30 | # Get version from git tag (removes 'v' prefix if present) 31 | VERSION=${GITHUB_REF#refs/tags/} 32 | VERSION=${VERSION#v} 33 | echo "version=$VERSION" >> $GITHUB_OUTPUT 34 | echo "Publishing version: $VERSION" 35 | 36 | - name: Update version in pyproject.toml 37 | run: | 38 | # Update version in pyproject.toml to match the git tag 39 | sed -i "s/version = \".*\"/version = \"${{ steps.get_version.outputs.version }}\"/" pyproject.toml 40 | echo "Updated pyproject.toml version to ${{ steps.get_version.outputs.version }}" 41 | cat pyproject.toml | grep version 42 | 43 | - name: Lock dependencies 44 | run: uv lock --locked 45 | 46 | - name: Sync dependencies including dev 47 | run: uv sync --all-groups 48 | 49 | - name: Run quick tests 50 | run: | 51 | uv add pytest==7.4.3 52 | uv add pytest-asyncio 53 | uv run pytest tests/ -v --tb=short 54 | 55 | - name: Build package 56 | run: uv build 57 | 58 | - name: Verify package 59 | run: uv run --with twine twine check dist/* 60 | 61 | - name: Publish to PyPI 62 | uses: pypa/gh-action-pypi-publish@release/v1 63 | with: 64 | print-hash: true 65 | -------------------------------------------------------------------------------- /.github/workflows/tests.yaml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | 8 | jobs: 9 | test: 10 | runs-on: ubuntu-latest 11 | strategy: 12 | matrix: 13 | python-version: ["3.10", "3.12"] 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | 18 | - name: Install UV and Python 19 | uses: astral-sh/setup-uv@v5 20 | with: 21 | version: "latest" 22 | python-version: ${{ matrix.python-version }} 23 | 24 | - name: Create virtual environment 25 | run: uv venv 26 | 27 | - name: Install dependencies 28 | run: | 29 | uv sync --all-groups 30 | uv add pytest==7.4.3 31 | 32 | - name: Run tests 33 | run: uv run pytest -v 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | *.egg-info/ 7 | .installed.cfg 8 | *.egg 9 | *.whl 10 | 11 | # Distribution / packaging 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | MANIFEST 24 | 25 | # Virtual environments 26 | .env 27 | .*venv* 28 | env/ 29 | venv/ 30 | ENV/ 31 | env.bak/ 32 | venv.bak/ 33 | .python-version 34 | 35 | # IDE specific files 36 | .idea/ 37 | .vscode/ 38 | *.sublime-project 39 | *.sublime-workspace 40 | 41 | # Test and coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | junit.xml 54 | 55 | # Logs and temporary files 56 | *.log 57 | *.tmp 58 | *.swp 59 | 60 | # SQLite databases (if you don't want to commit local test/demo dbs) 61 | *.db 62 | *.db-journal* 63 | mimic*.db 64 | 65 | # Configuration files 66 | config.json 67 | *config*.json 68 | 69 | # Operating System specific files 70 | .DS_Store 71 | Thumbs.db 72 | Desktop.ini 73 | 74 | # MyPy 75 | .mypy_cache/ 76 | 77 | # Ruff 78 | .ruff_cache/ 79 | 80 | # Datasets and other large files 81 | data/ 82 | m3_data/ 83 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.6.0 4 | hooks: 5 | - id: trailing-whitespace 6 | - id: end-of-file-fixer 7 | - id: check-yaml 8 | - id: check-toml 9 | - id: check-added-large-files 10 | args: ['--maxkb=1024'] 11 | exclude: '\.(mp4|avi|mov|mkv)$' 12 | - id: mixed-line-ending 13 | 14 | - repo: https://github.com/astral-sh/ruff-pre-commit 15 | rev: v0.11.9 16 | hooks: 17 | - id: ruff 18 | args: [--fix, --exit-non-zero-on-fix] 19 | - id: ruff-format 20 | 21 | - repo: local 22 | hooks: 23 | - id: pytest 24 | name: pytest 25 | entry: pytest 26 | language: system # Assumes pytest is installed in your environment (via pip install .[dev]) 27 | types: [python] # Run on changes to Python files 28 | pass_filenames: false # Pytest typically runs on the whole suite 29 | # stages: [commit] # This is the default, explicitly stating it 30 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "Citation for this repository" 3 | authors: 4 | - family-names: Al Attrach 5 | given-names: Rafi 6 | - family-names: Moreira 7 | given-names: Pedro 8 | - family-names: Fani 9 | given-names: Rajna 10 | - family-names: Umeton 11 | given-names: Renato 12 | - family-names: Celi 13 | given-names: Leo Anthony 14 | title: "Conversational LLMs Simplify Secure Clinical Data Access, Understanding, and Analysis" 15 | date-released: 2025-06-27 16 | doi: 10.48550/arXiv.2507.01053 17 | url: https://arxiv.org/abs/2507.01053 18 | preferred-citation: 19 | type: article 20 | authors: 21 | - family-names: Al Attrach 22 | given-names: Rafi 23 | - family-names: Moreira 24 | given-names: Pedro 25 | - family-names: Fani 26 | given-names: Rajna 27 | - family-names: Umeton 28 | given-names: Renato 29 | - family-names: Celi 30 | given-names: Leo Anthony 31 | title: "Conversational LLMs Simplify Secure Clinical Data Access, Understanding, and Analysis" 32 | doi: 10.48550/arXiv.2507.01053 33 | year: 2025 34 | month: 6 35 | url: https://arxiv.org/abs/2507.01053 36 | publisher: arXiv 37 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1 2 | 3 | # Build stage: create wheel 4 | FROM python:3.11-slim AS builder 5 | 6 | WORKDIR /build 7 | COPY pyproject.toml uv.lock README.md ./ 8 | COPY src ./src 9 | 10 | RUN pip install --no-cache-dir build && \ 11 | python -m build --wheel 12 | 13 | # Base runtime: install m3 and baked SQLite DB 14 | FROM python:3.11-slim AS base 15 | 16 | ENV PYTHONUNBUFFERED=1 \ 17 | M3_BACKEND=sqlite \ 18 | M3_DB_PATH=/root/m3_data/databases/mimic_iv_demo.db 19 | 20 | WORKDIR /app 21 | 22 | COPY --from=builder /build/dist/*.whl /tmp/ 23 | RUN pip install --no-cache-dir /tmp/*.whl && rm /tmp/*.whl 24 | 25 | # Download and initialize demo DB using m3 init 26 | RUN m3 init mimic-iv-demo 27 | 28 | # Lite: SQLite only 29 | FROM base AS lite 30 | CMD ["python", "-m", "m3.mcp_server"] 31 | 32 | # BigQuery: add GCP client 33 | FROM base AS bigquery 34 | RUN pip install --no-cache-dir google-cloud-bigquery 35 | CMD ["python", "-m", "m3.mcp_server"] 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Rafi Al Attrach, Pedro Moreira, Rajna Fani 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | include pyproject.toml 4 | recursive-include src *.py 5 | global-exclude *.pyc 6 | global-exclude __pycache__ 7 | global-exclude .DS_Store 8 | prune tests 9 | prune webapp 10 | prune benchmarks 11 | prune build 12 | prune dist 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # M3: MIMIC-IV + MCP + Models 🏥🤖 2 | 3 | <div align="center"> 4 | <img src="webapp/public/m3_logo_transparent.png" alt="M3 Logo" width="300"/> 5 | </div> 6 | 7 | > **Query MIMIC-IV medical data using natural language through MCP clients** 8 | 9 | <a href="https://www.python.org/downloads/"><img alt="Python" src="https://img.shields.io/badge/Python-3.10+-blue?logo=python&logoColor=white"></a> 10 | <a href="https://modelcontextprotocol.io/"><img alt="MCP" src="https://img.shields.io/badge/MCP-Compatible-green?logo=ai&logoColor=white"></a> 11 | <a href="https://github.com/rafiattrach/m3/actions/workflows/tests.yaml"><img alt="Tests" src="https://github.com/rafiattrach/m3/actions/workflows/tests.yaml/badge.svg"></a> 12 | <a href="https://github.com/rafiattrach/m3/actions/workflows/pre-commit.yaml"><img alt="Code Quality" src="https://github.com/rafiattrach/m3/actions/workflows/pre-commit.yaml/badge.svg"></a> 13 | <a href="https://github.com/rafiattrach/m3/pulls"><img alt="PRs Welcome" src="https://img.shields.io/badge/PRs-welcome-brightgreen.svg"></a> 14 | 15 | Transform medical data analysis with AI! Ask questions about MIMIC-IV data in plain English and get instant insights. Choose between local demo data (free) or full cloud dataset (BigQuery). 16 | 17 | ## ✨ Features 18 | 19 | - 🔍 **Natural Language Queries**: Ask questions about MIMIC-IV data in plain English 20 | - 🏠 **Local SQLite**: Fast queries on demo database (free, no setup) 21 | - ☁️ **BigQuery Support**: Access full MIMIC-IV dataset on Google Cloud 22 | - 🔒 **Enterprise Security**: OAuth2 authentication with JWT tokens and rate limiting 23 | - 🛡️ **SQL Injection Protection**: Read-only queries with comprehensive validation 24 | 25 | ## 🚀 Quick Start 26 | 27 | > 💡 **Need more options?** Run `m3 --help` to see all available commands and options. 28 | 29 | ### 📦 Installation 30 | 31 | Choose your preferred installation method: 32 | 33 | #### Option A: Install from PyPI (Recommended) 34 | 35 | **Step 1: Create Virtual Environment** 36 | ```bash 37 | # Create virtual environment (recommended) 38 | python -m venv .venv 39 | source .venv/bin/activate # Windows: .venv\Scripts\activate 40 | ``` 41 | 42 | **Step 2: Install M3** 43 | ```bash 44 | # Install M3 45 | pip install m3-mcp 46 | ``` 47 | 48 | #### Option B: Docker 49 | 50 | ```bash 51 | # Clone repo first 52 | git clone https://github.com/rafiattrach/m3.git && cd m3 53 | 54 | # SQLite (demo DB) 55 | docker build -t m3:lite --target lite . 56 | docker run -d --name m3-server m3:lite tail -f /dev/null 57 | 58 | # BigQuery (full dataset - requires GCP credentials) 59 | docker build -t m3:bigquery --target bigquery . 60 | docker run -d --name m3-server \ 61 | -e M3_BACKEND=bigquery \ 62 | -e M3_PROJECT_ID=YOUR_PROJECT_ID \ 63 | -v $HOME/.config/gcloud:/root/.config/gcloud:ro \ 64 | m3:bigquery tail -f /dev/null 65 | ``` 66 | 67 | **MCP client config** (Claude Desktop, LM Studio, etc.): 68 | ```json 69 | { 70 | "mcpServers": { 71 | "m3": { 72 | "command": "docker", 73 | "args": ["exec", "-i", "m3-server", "python", "-m", "m3.mcp_server"] 74 | } 75 | } 76 | } 77 | ``` 78 | 79 | Stop container: `docker stop m3-server && docker rm m3-server` 80 | 81 | #### Option C: Install from Source 82 | 83 | #### Using standard `pip` 84 | **Step 1: Clone and Navigate** 85 | ```bash 86 | # Clone the repository 87 | git clone https://github.com/rafiattrach/m3.git 88 | cd m3 89 | ``` 90 | 91 | **Step 2: Create Virtual Environment** 92 | ```bash 93 | # Create virtual environment 94 | python -m venv .venv 95 | source .venv/bin/activate # Windows: .venv\Scripts\activate 96 | ``` 97 | 98 | **Step 3: Install M3** 99 | ```bash 100 | # Install M3 101 | pip install . 102 | ``` 103 | 104 | #### Using `UV` (Recommended) 105 | Assuming you have [UV](https://docs.astral.sh/uv/getting-started/installation/) installed. 106 | 107 | **Step 1: Clone and Navigate** 108 | ```bash 109 | # Clone the repository 110 | git clone https://github.com/rafiattrach/m3.git 111 | cd m3 112 | ``` 113 | 114 | **Step 2: Create `UV` Virtual Environment** 115 | ```bash 116 | # Create virtual environment 117 | uv venv 118 | ``` 119 | 120 | **Step 3: Install M3** 121 | ```bash 122 | uv sync 123 | # Do not forget to use `uv run` to any subsequent commands to ensure you're using the `uv` virtual environment 124 | ``` 125 | 126 | ### 🗄️ Database Configuration 127 | 128 | After installation, choose your data source: 129 | 130 | #### Option A: Local Demo Database (Recommended for Beginners) 131 | 132 | **Perfect for learning and development - completely free!** 133 | 134 | 1. **Download demo database**: 135 | ```bash 136 | m3 init mimic-iv-demo 137 | ``` 138 | 139 | 2. **Setup MCP Client**: 140 | ```bash 141 | m3 config 142 | ``` 143 | 144 | *Alternative: For Claude Desktop specifically:* 145 | ```bash 146 | m3 config claude 147 | ``` 148 | 149 | 3. **Restart your MCP client** and ask: 150 | 151 | - "What tools do you have for MIMIC-IV data?" 152 | - "Show me patient demographics from the ICU" 153 | 154 | #### Option B: BigQuery (Full Dataset) 155 | 156 | **For researchers needing complete MIMIC-IV data** 157 | 158 | ##### Prerequisites 159 | - Google Cloud account and project with billing enabled 160 | - Access to MIMIC-IV on BigQuery (requires PhysioNet credentialing) 161 | 162 | ##### Setup Steps 163 | 164 | 1. **Install Google Cloud CLI**: 165 | 166 | **macOS (with Homebrew):** 167 | ```bash 168 | brew install google-cloud-sdk 169 | ``` 170 | 171 | **Windows:** Download from https://cloud.google.com/sdk/docs/install 172 | 173 | **Linux:** 174 | ```bash 175 | curl https://sdk.cloud.google.com | bash 176 | ``` 177 | 178 | 2. **Authenticate**: 179 | ```bash 180 | gcloud auth application-default login 181 | ``` 182 | *This will open your browser - choose the Google account that has access to your BigQuery project with MIMIC-IV data.* 183 | 184 | 3. **Setup MCP Client for BigQuery**: 185 | ```bash 186 | m3 config 187 | ``` 188 | 189 | *Alternative: For Claude Desktop specifically:* 190 | ```bash 191 | m3 config claude --backend bigquery --project-id YOUR_PROJECT_ID 192 | ``` 193 | 194 | 4. **Test BigQuery Access** - Restart your MCP client and ask: 195 | ``` 196 | Use the get_race_distribution function to show me the top 5 races in MIMIC-IV admissions. 197 | ``` 198 | 199 | ## 🔧 Advanced Configuration 200 | 201 | Need to configure other MCP clients or customize settings? Use these commands: 202 | 203 | ### Interactive Configuration (Universal) 204 | ```bash 205 | m3 config 206 | ``` 207 | Generates configuration for any MCP client with step-by-step guidance. 208 | 209 | ### Quick Configuration Examples 210 | ```bash 211 | # Quick universal config with defaults 212 | m3 config --quick 213 | 214 | # Universal config with custom database 215 | m3 config --quick --backend sqlite --db-path /path/to/database.db 216 | 217 | # Save config to file for other MCP clients 218 | m3 config --output my_config.json 219 | ``` 220 | 221 | ### 🔐 OAuth2 Authentication (Optional) 222 | 223 | For production deployments requiring secure access to medical data: 224 | 225 | ```bash 226 | # Enable OAuth2 with Claude Desktop 227 | m3 config claude --enable-oauth2 \ 228 | --oauth2-issuer https://your-auth-provider.com \ 229 | --oauth2-audience m3-api \ 230 | --oauth2-scopes "read:mimic-data" 231 | 232 | # Or configure interactively 233 | m3 config # Choose OAuth2 option during setup 234 | ``` 235 | 236 | **Supported OAuth2 Providers:** 237 | - Auth0, Google Identity Platform, Microsoft Azure AD, Keycloak 238 | - Any OAuth2/OpenID Connect compliant provider 239 | 240 | **Key Benefits:** 241 | - 🔒 **JWT Token Validation**: Industry-standard security 242 | - 🎯 **Scope-based Access**: Fine-grained permissions 243 | - 🛡️ **Rate Limiting**: Abuse protection 244 | - 📊 **Audit Logging**: Security monitoring 245 | 246 | > 📖 **Complete OAuth2 Setup Guide**: See [`docs/OAUTH2_AUTHENTICATION.md`](docs/OAUTH2_AUTHENTICATION.md) for detailed configuration, troubleshooting, and production deployment guidelines. 247 | 248 | ### Backend Comparison 249 | 250 | **SQLite Backend (Default)** 251 | - ✅ **Free**: No cloud costs 252 | - ✅ **Fast**: Local queries 253 | - ✅ **Easy**: No authentication needed 254 | - ❌ **Limited**: Demo dataset only (~1k records) 255 | 256 | **BigQuery Backend** 257 | - ✅ **Complete**: Full MIMIC-IV dataset (~500k admissions) 258 | - ✅ **Scalable**: Google Cloud infrastructure 259 | - ✅ **Current**: Latest MIMIC-IV version (3.1) 260 | - ❌ **Costs**: BigQuery usage fees apply 261 | 262 | ## 🛠️ Available MCP Tools 263 | 264 | When your MCP client processes questions, it uses these tools automatically: 265 | 266 | - **get_database_schema**: List all available tables 267 | - **get_table_info**: Get column info and sample data for a table 268 | - **execute_mimic_query**: Execute SQL SELECT queries 269 | - **get_icu_stays**: ICU stay information and length of stay data 270 | - **get_lab_results**: Laboratory test results 271 | - **get_race_distribution**: Patient race distribution 272 | 273 | ## 🧪 Example Prompts 274 | 275 | Try asking your MCP client these questions: 276 | 277 | **Demographics & Statistics:** 278 | 279 | - `Prompt:` *What is the race distribution in MIMIC-IV admissions?* 280 | - `Prompt:` *Show me patient demographics for ICU stays* 281 | - `Prompt:` *How many total admissions are in the database?* 282 | 283 | **Clinical Data:** 284 | 285 | - `Prompt:` *Find lab results for patient X* 286 | - `Prompt:` *What lab tests are most commonly ordered?* 287 | - `Prompt:` *Show me recent ICU admissions* 288 | 289 | **Data Exploration:** 290 | 291 | - `Prompt:` *What tables are available in the database?* 292 | - `Prompt:` *What tools do you have for MIMIC-IV data?* 293 | 294 | ## 🎩 Pro Tips 295 | 296 | - Do you want to pre-approve the usage of all tools in Claude Desktop? Use the prompt below and then select **Always Allow** 297 | - `Prompt:` *Can you please call all your tools in a logical sequence?* 298 | 299 | ## 🔍 Troubleshooting 300 | 301 | ### Common Issues 302 | 303 | **SQLite "Database not found" errors:** 304 | ```bash 305 | # Re-download demo database 306 | m3 init mimic-iv-demo 307 | ``` 308 | 309 | **MCP client server not starting:** 310 | 1. Check your MCP client logs (for Claude Desktop: Help → View Logs) 311 | 2. Verify configuration file location and format 312 | 3. Restart your MCP client completely 313 | 314 | ### OAuth2 Authentication Issues 315 | 316 | **"Missing OAuth2 access token" errors:** 317 | ```bash 318 | # Set your access token 319 | export M3_OAUTH2_TOKEN="Bearer your-access-token-here" 320 | ``` 321 | 322 | **"OAuth2 authentication failed" errors:** 323 | - Verify your token hasn't expired 324 | - Check that required scopes are included in your token 325 | - Ensure your OAuth2 provider configuration is correct 326 | 327 | **Rate limit exceeded:** 328 | - Wait for the rate limit window to reset 329 | - Contact your administrator to adjust limits if needed 330 | 331 | > 🔧 **OAuth2 Troubleshooting**: See [`OAUTH2_AUTHENTICATION.md`](docs/OAUTH2_AUTHENTICATION.md) for detailed OAuth2 troubleshooting and configuration guides. 332 | 333 | ### BigQuery Issues 334 | 335 | **"Access Denied" errors:** 336 | - Ensure you have MIMIC-IV access on PhysioNet 337 | - Verify your Google Cloud project has BigQuery API enabled 338 | - Check that you're authenticated: `gcloud auth list` 339 | 340 | **"Dataset not found" errors:** 341 | - Confirm your project ID is correct 342 | - Ensure you have access to `physionet-data` project 343 | 344 | **Authentication issues:** 345 | ```bash 346 | # Re-authenticate 347 | gcloud auth application-default login 348 | 349 | # Check current authentication 350 | gcloud auth list 351 | ``` 352 | 353 | ## 👩‍💻 For Developers 354 | 355 | ### Development Setup 356 | 357 | #### Option A: Standard `pip` Development Setup 358 | **Step 1: Clone and Navigate** 359 | ```bash 360 | # Clone the repository 361 | git clone https://github.com/rafiattrach/m3.git 362 | cd m3 363 | ``` 364 | 365 | **Step 2: Create and Activate Virtual Environment** 366 | ```bash 367 | # Create virtual environment 368 | python -m venv .venv 369 | source .venv/bin/activate # Windows: .venv\Scripts\activate 370 | ``` 371 | 372 | **Step 3: Install Development Dependencies** 373 | ```bash 374 | # Install in development mode with dev dependencies 375 | pip install -e ".[dev]" 376 | # Install pre-commit hooks 377 | pre-commit install 378 | ``` 379 | 380 | #### Option B: Development Setup with `UV` (Recommended) 381 | **Step 1: Clone and Navigate** 382 | ```bash 383 | # Clone the repository 384 | git clone https://github.com/rafiattrach/m3.git 385 | cd m3 386 | ``` 387 | 388 | **Step 2: Create and Activate `UV` Virtual Environment** 389 | ```bash 390 | # Create virtual environment 391 | uv venv 392 | ``` 393 | 394 | **Step 3: Install Development Dependencies** 395 | ```bash 396 | # Install in development mode with dev dependencies (by default, UV runs in editable mode) 397 | uv sync 398 | 399 | # Install pre-commit hooks 400 | uv run pre-commit install 401 | 402 | # Do not forget to use `uv run` to any subsequent commands to ensure you're using the `uv` virtual environment 403 | ``` 404 | 405 | ### Testing 406 | 407 | ```bash 408 | pytest # All tests (includes OAuth2 and BigQuery mocks) 409 | pytest tests/test_mcp_server.py -v # MCP server tests 410 | pytest tests/test_oauth2_auth.py -v # OAuth2 authentication tests 411 | ``` 412 | 413 | ### Test BigQuery Locally 414 | 415 | ```bash 416 | # Set environment variables 417 | export M3_BACKEND=bigquery 418 | export M3_PROJECT_ID=your-project-id 419 | export GOOGLE_CLOUD_PROJECT=your-project-id 420 | 421 | # Optional: Test with OAuth2 authentication 422 | export M3_OAUTH2_ENABLED=true 423 | export M3_OAUTH2_ISSUER_URL=https://your-provider.com 424 | export M3_OAUTH2_AUDIENCE=m3-api 425 | export M3_OAUTH2_TOKEN="Bearer your-test-token" 426 | 427 | # Test MCP server 428 | m3-mcp-server 429 | ``` 430 | 431 | ## 🔮 Roadmap 432 | 433 | - 🏠 **Local Full Dataset**: Complete MIMIC-IV locally (no cloud costs) 434 | - 🔧 **Advanced Tools**: More specialized medical data functions 435 | - 📊 **Visualization**: Built-in plotting and charting tools 436 | - 🔐 **Enhanced Security**: Role-based access control, audit logging 437 | - 🌐 **Multi-tenant Support**: Organization-level data isolation 438 | 439 | ## 🤝 Contributing 440 | 441 | We welcome contributions! Please: 442 | 443 | 1. Fork the repository 444 | 2. Create a feature branch 445 | 3. Add tests for new functionality 446 | 4. Submit a pull request 447 | 448 | *Built with ❤️ for the medical AI community* 449 | 450 | **Need help?** Open an issue on GitHub or check our troubleshooting guide above. 451 | -------------------------------------------------------------------------------- /benchmarks/ehrsql-naacl2024/README.md: -------------------------------------------------------------------------------- 1 | # EHR SQL Benchmark (NAACL 2024) 2 | 3 | ## Overview 4 | 5 | Benchmark results comparing different models on the EHRSQL dataset with one hundred questions covering various medical queries including cost analysis, temporal measurement differences, medication prescriptions, lab results, patient demographics etc. 6 | 7 | **Source**: [ehrsql-2024](https://github.com/glee4810/ehrsql-2024) 8 | 9 | Each model folder contains: 10 | - **Model answers** extracted from conversations 11 | - **Golden truth answers** and SQL queries for comparison 12 | - **Correct/Incorrect** annotations with detailed notes 13 | - **Chat conversation links** (Claude.ai shared links or local conversation files) 14 | 15 | The dataset includes complex medical questions requiring database queries, with model performance evaluated against ground truth answers through human assessment. 16 | -------------------------------------------------------------------------------- /docs/OAUTH2_AUTHENTICATION.md: -------------------------------------------------------------------------------- 1 | # OAuth2 Authentication for M3 2 | 3 | This guide covers the technical details of OAuth2 authentication in M3. For basic setup, see the OAuth2 section in the main README. 4 | 5 | ## Configuration Reference 6 | 7 | ### Required Environment Variables 8 | 9 | ```bash 10 | # Core Configuration 11 | M3_OAUTH2_ENABLED=true 12 | M3_OAUTH2_ISSUER_URL=https://your-auth-provider.com 13 | M3_OAUTH2_AUDIENCE=m3-api 14 | M3_OAUTH2_REQUIRED_SCOPES=read:mimic-data 15 | ``` 16 | 17 | ### Optional Environment Variables 18 | 19 | ```bash 20 | # Advanced Configuration (all optional) 21 | M3_OAUTH2_JWKS_URL=https://your-auth-provider.com/.well-known/jwks.json # Auto-discovered if not set 22 | M3_OAUTH2_RATE_LIMIT_REQUESTS=100 # Default: 100 requests per hour 23 | M3_OAUTH2_JWKS_CACHE_TTL=3600 # Default: 1 hour 24 | ``` 25 | 26 | ## Token Requirements 27 | 28 | Your JWT token must include: 29 | 30 | 1. **Header**: 31 | - `alg`: RS256 or ES256 32 | - `kid`: Key ID matching a key in the JWKS 33 | 34 | 2. **Claims**: 35 | ```json 36 | { 37 | "iss": "https://your-auth-provider.com", // Must match M3_OAUTH2_ISSUER_URL 38 | "aud": "m3-api", // Must match M3_OAUTH2_AUDIENCE 39 | "scope": "read:mimic-data", // Must include all required scopes 40 | "exp": 1234567890 // Must not be expired 41 | } 42 | ``` 43 | 44 | ## Provider-Specific Setup 45 | 46 | ### Auth0 47 | ```bash 48 | M3_OAUTH2_ISSUER_URL=https://your-domain.auth0.com/ 49 | M3_OAUTH2_AUDIENCE=https://api.your-domain.com 50 | ``` 51 | 52 | ### Other Providers 53 | Any OAuth2 provider supporting JWT tokens with RS256/ES256 signing will work. Key requirements: 54 | - Must expose JWKS endpoint 55 | - Must support JWT tokens with required claims 56 | - Must allow scope configuration 57 | 58 | ## Troubleshooting 59 | 60 | ### Common Error Messages 61 | 62 | 1. `Missing OAuth2 access token` 63 | - Set `M3_OAUTH2_TOKEN` environment variable 64 | - Include "Bearer " prefix (optional) 65 | 66 | 2. `Invalid token signature` 67 | - Verify token is signed by configured issuer 68 | - Check JWKS URL is accessible 69 | - Ensure token's `kid` matches a key in JWKS 70 | 71 | 3. `Missing required scopes` 72 | - Request new token with all required scopes 73 | - Check scope format matches provider's format (space vs comma-separated) 74 | 75 | ### Debug Mode 76 | 77 | ```bash 78 | export M3_OAUTH2_DEBUG=true # Enables detailed logging 79 | ``` 80 | 81 | ## Security Best Practices 82 | 83 | 1. **Token Management** 84 | - Use short-lived tokens (< 1 hour) 85 | - Never store tokens in code or version control 86 | - Use environment variables or secure secret storage 87 | 88 | 2. **Rate Limiting** 89 | - Start conservative (100/hour default) 90 | - Monitor usage patterns before increasing 91 | - Consider per-endpoint limits for production 92 | 93 | 3. **Scope Design** 94 | - Use granular scopes for different access levels 95 | - Follow principle of least privilege 96 | - Document scope requirements clearly 97 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["pdm-backend"] 3 | build-backend = "pdm.backend" 4 | 5 | [project] 6 | name = "m3-mcp" 7 | dynamic = ["version"] 8 | description = "MIMIC-IV + MCP + Models: Local MIMIC-IV querying with LLMs via Model Context Protocol" 9 | requires-python = ">=3.10" 10 | authors = [ 11 | { name = "Rafi Al Attrach", email = "rafiaa@mit.edu" }, 12 | { name = "Pedro Moreira", email = "pedrojfm@mit.edu" }, 13 | { name = "Rajna Fani", email = "rajnaf@mit.edu" }, 14 | ] 15 | maintainers = [ 16 | { name = "Rafi Al Attrach", email = "rafiaa@mit.edu" }, 17 | { name = "Pedro Moreira", email = "pedrojfm@mit.edu" }, 18 | { name = "Rajna Fani", email = "rajnaf@mit.edu" }, 19 | ] 20 | readme = "README.md" 21 | license = "MIT" 22 | keywords = ["mimic-iv", "clinical-data", "mcp", "llm", "medical", "healthcare", "sqlite", "bigquery"] 23 | classifiers = [ 24 | "Development Status :: 4 - Beta", 25 | "Intended Audience :: Science/Research", 26 | "Intended Audience :: Healthcare Industry", 27 | "Topic :: Scientific/Engineering :: Medical Science Apps.", 28 | "Topic :: Database :: Database Engines/Servers", 29 | "Programming Language :: Python :: 3", 30 | "Programming Language :: Python :: 3.10", 31 | "Programming Language :: Python :: 3.11", 32 | "Programming Language :: Python :: 3.12", 33 | "Operating System :: OS Independent", 34 | ] 35 | 36 | dependencies = [ 37 | "typer>=0.9.0", # Typer as a core dependency for the CLI 38 | "rich>=13.0.0", # For Typer's rich output 39 | "requests>=2.30.0", 40 | "beautifulsoup4>=4.12.0", 41 | "polars[pyarrow]>=0.20.10", 42 | "appdirs>=1.4.0", 43 | "sqlalchemy>=2.0.0", 44 | "pandas>=2.0.0", 45 | "fastmcp>=0.1.0", # MCP server functionality 46 | "google-cloud-bigquery>=3.0.0", # BigQuery support 47 | "db-dtypes>=1.0.0", # BigQuery data types 48 | "sqlparse>=0.4.0", # SQL parsing for security validation 49 | "pyjwt[crypto]>=2.8.0", # JWT token handling with cryptography support 50 | "cryptography>=41.0.0", # Cryptographic operations for JWT 51 | "python-jose[cryptography]>=3.3.0", # Additional JWT support with crypto 52 | "httpx>=0.24.0", # Modern HTTP client for OAuth2 token validation 53 | ] 54 | 55 | [project.dependency-groups] 56 | dev = [ 57 | "ruff>=0.4.0", 58 | "pre-commit>=3.0.0", 59 | "pytest>=7.4.0", 60 | "pytest-asyncio>=0.23.0", 61 | "pytest-mock>=3.10.0", 62 | "aiohttp>=3.8.0", # For MCP client testing 63 | ] 64 | 65 | [project.scripts] 66 | m3 = "m3.cli:app" 67 | m3-mcp-server = "m3.mcp_server:main" 68 | 69 | [project.urls] 70 | Homepage = "https://github.com/rafiattrach/m3" 71 | Repository = "https://github.com/rafiattrach/m3" 72 | Documentation = "https://github.com/rafiattrach/m3#readme" 73 | Issues = "https://github.com/rafiattrach/m3/issues" 74 | Changelog = "https://github.com/rafiattrach/m3/releases" 75 | 76 | [tool.pdm.version] 77 | source = "file" 78 | path = "src/m3/__init__.py" 79 | 80 | [tool.ruff] 81 | line-length = 88 82 | target-version = "py310" 83 | src = ["src"] 84 | 85 | [tool.ruff.lint] 86 | select = [ 87 | "E", # pycodestyle errors 88 | "W", # pycodestyle warnings 89 | "F", # Pyflakes 90 | "I", # isort (import sorting) 91 | "UP", # pyupgrade (modernize syntax) 92 | "RUF",# Ruff-specific rules 93 | ] 94 | 95 | ignore = [ 96 | "E501", # Line too long (let ruff-format handle line length) 97 | ] 98 | 99 | [tool.ruff.format] 100 | # Ruff's default formatter will be used. 101 | 102 | [tool.ruff.lint.isort] 103 | known-first-party = ["m3"] 104 | 105 | [tool.pytest.ini_options] 106 | asyncio_mode = "auto" 107 | asyncio_default_fixture_loop_scope = "function" 108 | # Filter out Jupyter deprecation warning 109 | filterwarnings = [ 110 | "ignore::DeprecationWarning:jupyter_client.*", 111 | ] 112 | -------------------------------------------------------------------------------- /src/m3/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | MIMIC-IV + MCP + Models (M3): Local MIMIC-IV querying with LLMs via Model Context Protocol 3 | """ 4 | 5 | __version__ = "0.2.0" 6 | -------------------------------------------------------------------------------- /src/m3/auth.py: -------------------------------------------------------------------------------- 1 | """ 2 | OAuth2 Authentication Module for M3 MCP Server 3 | Provides secure authentication using OAuth2 with JWT tokens. 4 | """ 5 | 6 | import os 7 | import time 8 | from datetime import datetime, timedelta, timezone 9 | from functools import wraps 10 | from typing import Any 11 | from urllib.parse import urljoin 12 | 13 | import httpx 14 | import jwt 15 | from cryptography.hazmat.primitives import serialization 16 | from cryptography.hazmat.primitives.asymmetric import rsa 17 | 18 | from m3.config import logger 19 | 20 | 21 | class AuthenticationError(Exception): 22 | """Raised when authentication fails.""" 23 | 24 | pass 25 | 26 | 27 | class TokenValidationError(Exception): 28 | """Raised when token validation fails.""" 29 | 30 | pass 31 | 32 | 33 | class OAuth2Config: 34 | """OAuth2 configuration management.""" 35 | 36 | def __init__(self): 37 | self.enabled = os.getenv("M3_OAUTH2_ENABLED", "false").lower() == "true" 38 | 39 | # OAuth2 Provider Configuration 40 | self.issuer_url = os.getenv("M3_OAUTH2_ISSUER_URL", "") 41 | self.client_id = os.getenv("M3_OAUTH2_CLIENT_ID", "") 42 | self.client_secret = os.getenv("M3_OAUTH2_CLIENT_SECRET", "") 43 | self.audience = os.getenv("M3_OAUTH2_AUDIENCE", "") 44 | 45 | # Scopes required for access 46 | self.required_scopes = self._parse_scopes( 47 | os.getenv("M3_OAUTH2_REQUIRED_SCOPES", "read:mimic-data") 48 | ) 49 | 50 | # Token validation settings 51 | self.validate_exp = ( 52 | os.getenv("M3_OAUTH2_VALIDATE_EXP", "true").lower() == "true" 53 | ) 54 | self.validate_aud = ( 55 | os.getenv("M3_OAUTH2_VALIDATE_AUD", "true").lower() == "true" 56 | ) 57 | self.validate_iss = ( 58 | os.getenv("M3_OAUTH2_VALIDATE_ISS", "true").lower() == "true" 59 | ) 60 | 61 | # JWKS settings 62 | self.jwks_url = os.getenv("M3_OAUTH2_JWKS_URL", "") 63 | self.jwks_cache_ttl = int( 64 | os.getenv("M3_OAUTH2_JWKS_CACHE_TTL", "3600") 65 | ) # 1 hour 66 | 67 | # Rate limiting 68 | self.rate_limit_enabled = ( 69 | os.getenv("M3_OAUTH2_RATE_LIMIT_ENABLED", "true").lower() == "true" 70 | ) 71 | self.rate_limit_requests = int( 72 | os.getenv("M3_OAUTH2_RATE_LIMIT_REQUESTS", "100") 73 | ) 74 | self.rate_limit_window = int( 75 | os.getenv("M3_OAUTH2_RATE_LIMIT_WINDOW", "3600") 76 | ) # 1 hour 77 | 78 | # Cache for JWKS and validation 79 | self._jwks_cache = {} 80 | self._jwks_cache_time = 0 81 | self._rate_limit_cache = {} 82 | 83 | if self.enabled: 84 | self._validate_config() 85 | 86 | def _parse_scopes(self, scopes_str: str) -> set[str]: 87 | """Parse comma-separated scopes string.""" 88 | return set(scope.strip() for scope in scopes_str.split(",") if scope.strip()) 89 | 90 | def _validate_config(self): 91 | """Validate OAuth2 configuration.""" 92 | if not self.issuer_url: 93 | raise ValueError("M3_OAUTH2_ISSUER_URL is required when OAuth2 is enabled") 94 | 95 | if not self.audience: 96 | raise ValueError("M3_OAUTH2_AUDIENCE is required when OAuth2 is enabled") 97 | 98 | if not self.jwks_url: 99 | # Auto-discover JWKS URL from issuer 100 | self.jwks_url = urljoin( 101 | self.issuer_url.rstrip("/"), "/.well-known/jwks.json" 102 | ) 103 | 104 | logger.info(f"OAuth2 authentication enabled with issuer: {self.issuer_url}") 105 | 106 | 107 | class OAuth2Validator: 108 | """OAuth2 token validator.""" 109 | 110 | def __init__(self, config: OAuth2Config): 111 | self.config = config 112 | self.http_client = httpx.Client(timeout=30.0) 113 | 114 | async def validate_token(self, token: str) -> dict[str, Any]: 115 | """ 116 | Validate an OAuth2 access token. 117 | 118 | Args: 119 | token: The access token to validate 120 | 121 | Returns: 122 | Decoded token claims 123 | 124 | Raises: 125 | TokenValidationError: If token is invalid 126 | """ 127 | try: 128 | # Get JWKS for token validation 129 | jwks = await self._get_jwks() 130 | 131 | # Decode token header to get key ID 132 | unverified_header = jwt.get_unverified_header(token) 133 | kid = unverified_header.get("kid") 134 | 135 | if not kid: 136 | raise TokenValidationError("Token missing key ID (kid)") 137 | 138 | # Find the appropriate key 139 | key = self._find_key(jwks, kid) 140 | if not key: 141 | raise TokenValidationError(f"No key found for kid: {kid}") 142 | 143 | # Convert JWK to PEM format for verification 144 | public_key = self._jwk_to_pem(key) 145 | 146 | # Validate token 147 | payload = jwt.decode( 148 | token, 149 | public_key, 150 | algorithms=["RS256", "ES256"], 151 | audience=self.config.audience if self.config.validate_aud else None, 152 | issuer=self.config.issuer_url if self.config.validate_iss else None, 153 | options={ 154 | "verify_exp": self.config.validate_exp, 155 | "verify_aud": self.config.validate_aud, 156 | "verify_iss": self.config.validate_iss, 157 | }, 158 | ) 159 | 160 | # Validate scopes 161 | self._validate_scopes(payload) 162 | 163 | # Check rate limits 164 | if self.config.rate_limit_enabled: 165 | self._check_rate_limit(payload) 166 | 167 | return payload 168 | 169 | except jwt.ExpiredSignatureError: 170 | raise TokenValidationError("Token has expired") 171 | except jwt.InvalidAudienceError: 172 | raise TokenValidationError("Invalid token audience") 173 | except jwt.InvalidIssuerError: 174 | raise TokenValidationError("Invalid token issuer") 175 | except jwt.InvalidTokenError as e: 176 | raise TokenValidationError(f"Invalid token: {e}") 177 | except Exception as e: 178 | raise TokenValidationError(f"Token validation failed: {e}") 179 | 180 | async def _get_jwks(self) -> dict[str, Any]: 181 | """Get JWKS (JSON Web Key Set) from the OAuth2 provider.""" 182 | current_time = time.time() 183 | 184 | # Check cache 185 | if ( 186 | self._jwks_cache 187 | and current_time - self.config._jwks_cache_time < self.config.jwks_cache_ttl 188 | ): 189 | return self.config._jwks_cache 190 | 191 | # Fetch JWKS 192 | try: 193 | response = self.http_client.get(self.config.jwks_url) 194 | response.raise_for_status() 195 | jwks = response.json() 196 | 197 | # Cache the result 198 | self.config._jwks_cache = jwks 199 | self.config._jwks_cache_time = current_time 200 | 201 | return jwks 202 | 203 | except Exception as e: 204 | raise TokenValidationError(f"Failed to fetch JWKS: {e}") 205 | 206 | def _find_key(self, jwks: dict[str, Any], kid: str) -> dict[str, Any] | None: 207 | """Find a key in JWKS by key ID.""" 208 | keys = jwks.get("keys", []) 209 | for key in keys: 210 | if key.get("kid") == kid: 211 | return key 212 | return None 213 | 214 | def _jwk_to_pem(self, jwk: dict[str, Any]) -> bytes: 215 | """Convert JWK to PEM format.""" 216 | try: 217 | # Use python-jose for JWK to PEM conversion 218 | from jose.utils import base64url_decode 219 | 220 | if jwk.get("kty") == "RSA": 221 | # RSA key 222 | n = base64url_decode(jwk["n"]) 223 | e = base64url_decode(jwk["e"]) 224 | 225 | # Create RSA public key 226 | public_numbers = rsa.RSAPublicNumbers( 227 | int.from_bytes(e, byteorder="big"), 228 | int.from_bytes(n, byteorder="big"), 229 | ) 230 | public_key = public_numbers.public_key() 231 | 232 | # Convert to PEM 233 | pem = public_key.public_bytes( 234 | encoding=serialization.Encoding.PEM, 235 | format=serialization.PublicFormat.SubjectPublicKeyInfo, 236 | ) 237 | return pem 238 | else: 239 | raise TokenValidationError(f"Unsupported key type: {jwk.get('kty')}") 240 | 241 | except Exception as e: 242 | raise TokenValidationError(f"Failed to convert JWK to PEM: {e}") 243 | 244 | def _validate_scopes(self, payload: dict[str, Any]): 245 | """Validate that token has required scopes.""" 246 | if not self.config.required_scopes: 247 | return 248 | 249 | token_scopes = set() 250 | 251 | # Check different possible scope claims 252 | scope_claim = payload.get("scope", "") 253 | if isinstance(scope_claim, str): 254 | token_scopes = set(scope_claim.split()) 255 | elif isinstance(scope_claim, list): 256 | token_scopes = set(scope_claim) 257 | 258 | # Also check 'scp' claim (some providers use this) 259 | scp_claim = payload.get("scp", []) 260 | if isinstance(scp_claim, list): 261 | token_scopes.update(scp_claim) 262 | 263 | # Check if required scopes are present 264 | missing_scopes = self.config.required_scopes - token_scopes 265 | if missing_scopes: 266 | raise TokenValidationError(f"Missing required scopes: {missing_scopes}") 267 | 268 | def _check_rate_limit(self, payload: dict[str, Any]): 269 | """Check rate limits for the user.""" 270 | user_id = payload.get("sub", "unknown") 271 | current_time = time.time() 272 | window_start = current_time - self.config.rate_limit_window 273 | 274 | # Clean old entries 275 | user_requests = self.config._rate_limit_cache.get(user_id, []) 276 | user_requests = [ 277 | req_time for req_time in user_requests if req_time > window_start 278 | ] 279 | 280 | # Check if limit exceeded 281 | if len(user_requests) >= self.config.rate_limit_requests: 282 | raise TokenValidationError("Rate limit exceeded") 283 | 284 | # Add current request 285 | user_requests.append(current_time) 286 | self.config._rate_limit_cache[user_id] = user_requests 287 | 288 | 289 | # Global instances 290 | _oauth2_config = None 291 | _oauth2_validator = None 292 | 293 | 294 | def init_oauth2(): 295 | """Initialize OAuth2 authentication.""" 296 | global _oauth2_config, _oauth2_validator 297 | 298 | _oauth2_config = OAuth2Config() 299 | if _oauth2_config.enabled: 300 | _oauth2_validator = OAuth2Validator(_oauth2_config) 301 | logger.info("OAuth2 authentication initialized") 302 | else: 303 | logger.info("OAuth2 authentication disabled") 304 | 305 | 306 | def require_oauth2(func): 307 | """Decorator to require OAuth2 authentication for MCP tools.""" 308 | 309 | @wraps(func) 310 | def wrapper(*args, **kwargs): 311 | if not _oauth2_config or not _oauth2_config.enabled: 312 | # If OAuth2 is disabled, allow access 313 | return func(*args, **kwargs) 314 | 315 | # Extract token from environment (in real implementation, this would come from request headers) 316 | token = os.getenv("M3_OAUTH2_TOKEN", "") 317 | if not token: 318 | return "Error: Missing OAuth2 access token" 319 | 320 | # Remove "Bearer " prefix if present 321 | if token.startswith("Bearer "): 322 | token = token[7:] 323 | 324 | try: 325 | # For synchronous compatibility, we'll do a simple validation 326 | # In a real async environment, this would be await _oauth2_validator.validate_token(token) 327 | 328 | # Basic token structure check (JWT has 3 parts separated by dots) 329 | if not token or len(token.split(".")) != 3: 330 | return "Error: Invalid token format" 331 | 332 | # In production, you would validate the token here 333 | # For now, we'll do a basic check and assume the token is valid if OAuth2 is properly configured 334 | 335 | return func(*args, **kwargs) 336 | 337 | except Exception as e: 338 | logger.error(f"OAuth2 authentication error: {e}") 339 | return "Error: Authentication system error" 340 | 341 | return wrapper 342 | 343 | 344 | def get_oauth2_config() -> OAuth2Config | None: 345 | """Get the current OAuth2 configuration.""" 346 | return _oauth2_config 347 | 348 | 349 | def is_oauth2_enabled() -> bool: 350 | """Check if OAuth2 authentication is enabled.""" 351 | return _oauth2_config is not None and _oauth2_config.enabled 352 | 353 | 354 | def generate_test_token( 355 | issuer: str = "https://test-issuer.example.com", 356 | audience: str = "m3-api", 357 | subject: str = "test-user", 358 | scopes: list[str] | None = None, 359 | expires_in: int = 3600, 360 | ) -> str: 361 | """ 362 | Generate a test JWT token for development/testing. 363 | 364 | WARNING: This should only be used for testing! 365 | """ 366 | if scopes is None: 367 | scopes = ["read:mimic-data"] 368 | 369 | now = datetime.now(timezone.utc) 370 | claims = { 371 | "iss": issuer, 372 | "aud": audience, 373 | "sub": subject, 374 | "iat": int(now.timestamp()), 375 | "exp": int((now + timedelta(seconds=expires_in)).timestamp()), 376 | "scope": " ".join(scopes), 377 | "email": f"{subject}@example.com", 378 | } 379 | 380 | # Generate a test key (DO NOT use in production) 381 | private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048) 382 | 383 | private_pem = private_key.private_bytes( 384 | encoding=serialization.Encoding.PEM, 385 | format=serialization.PrivateFormat.PKCS8, 386 | encryption_algorithm=serialization.NoEncryption(), 387 | ) 388 | 389 | # Sign the token 390 | token = jwt.encode(claims, private_pem, algorithm="RS256") 391 | 392 | return token 393 | -------------------------------------------------------------------------------- /src/m3/cli.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sqlite3 3 | import subprocess 4 | import sys 5 | from pathlib import Path 6 | from typing import Annotated 7 | 8 | import typer 9 | 10 | from m3 import __version__ 11 | from m3.config import ( 12 | SUPPORTED_DATASETS, 13 | get_dataset_config, 14 | get_dataset_raw_files_path, 15 | get_default_database_path, 16 | logger, 17 | ) 18 | from m3.data_io import initialize_dataset 19 | 20 | app = typer.Typer( 21 | name="m3", 22 | help="M3 CLI: Initialize local clinical datasets like MIMIC-IV Demo.", 23 | add_completion=False, 24 | rich_markup_mode="markdown", 25 | ) 26 | 27 | 28 | def version_callback(value: bool): 29 | if value: 30 | typer.echo(f"M3 CLI Version: {__version__}") 31 | raise typer.Exit() 32 | 33 | 34 | @app.callback() 35 | def main_callback( 36 | version: Annotated[ 37 | bool, 38 | typer.Option( 39 | "--version", 40 | "-v", 41 | callback=version_callback, 42 | is_eager=True, 43 | help="Show CLI version.", 44 | ), 45 | ] = False, 46 | verbose: Annotated[ 47 | bool, 48 | typer.Option( 49 | "--verbose", "-V", help="Enable DEBUG level logging for m3 components." 50 | ), 51 | ] = False, 52 | ): 53 | """ 54 | Main callback for the M3 CLI. Sets logging level. 55 | """ 56 | m3_logger = logging.getLogger("m3") # Get the logger from config.py 57 | if verbose: 58 | m3_logger.setLevel(logging.DEBUG) 59 | for handler in m3_logger.handlers: # Ensure handlers also respect the new level 60 | handler.setLevel(logging.DEBUG) 61 | logger.debug("Verbose mode enabled via CLI flag.") 62 | else: 63 | # Default to INFO as set in config.py 64 | m3_logger.setLevel(logging.INFO) 65 | for handler in m3_logger.handlers: 66 | handler.setLevel(logging.INFO) 67 | 68 | 69 | @app.command("init") 70 | def dataset_init_cmd( 71 | dataset_name: Annotated[ 72 | str, 73 | typer.Argument( 74 | help=( 75 | "Dataset to initialize. Default: 'mimic-iv-demo'. " 76 | f"Supported: {', '.join(SUPPORTED_DATASETS.keys())}" 77 | ), 78 | metavar="DATASET_NAME", 79 | ), 80 | ] = "mimic-iv-demo", 81 | db_path_str: Annotated[ 82 | str | None, 83 | typer.Option( 84 | "--db-path", 85 | "-p", 86 | help="Custom path for the SQLite DB. Uses a default if not set.", 87 | ), 88 | ] = None, 89 | ): 90 | """ 91 | Download a supported dataset (e.g., 'mimic-iv-demo') and load it into a local SQLite 92 | 93 | Raw downloaded files are stored in a `m3_data/raw_files/<dataset_name>/` subdirectory 94 | and are **not** deleted after processing. 95 | The SQLite database is stored in `m3_data/databases/` or path specified by `--db-path`. 96 | """ 97 | logger.info(f"CLI 'init' called for dataset: '{dataset_name}'") 98 | 99 | dataset_key = dataset_name.lower() # Normalize for lookup 100 | dataset_config = get_dataset_config(dataset_key) 101 | 102 | if not dataset_config: 103 | typer.secho( 104 | f"Error: Dataset '{dataset_name}' is not supported or not configured.", 105 | fg=typer.colors.RED, 106 | err=True, 107 | ) 108 | typer.secho( 109 | f"Supported datasets are: {', '.join(SUPPORTED_DATASETS.keys())}", 110 | fg=typer.colors.YELLOW, 111 | err=True, 112 | ) 113 | raise typer.Exit(code=1) 114 | 115 | # Currently, only mimic-iv-demo is fully wired up as an example. 116 | # This check can be removed or adapted as more datasets are supported. 117 | if dataset_key != "mimic-iv-demo": 118 | typer.secho( 119 | ( 120 | f"Warning: While '{dataset_name}' is configured, only 'mimic-iv-demo' " 121 | "is fully implemented for initialization in this version." 122 | ), 123 | fg=typer.colors.YELLOW, 124 | ) 125 | 126 | final_db_path = ( 127 | Path(db_path_str).resolve() 128 | if db_path_str 129 | else get_default_database_path(dataset_key) 130 | ) 131 | if not final_db_path: 132 | typer.secho( 133 | f"Critical Error: Could not determine database path for '{dataset_name}'.", 134 | fg=typer.colors.RED, 135 | err=True, 136 | ) 137 | raise typer.Exit(code=1) 138 | 139 | # Ensure parent directory for the database exists 140 | final_db_path.parent.mkdir(parents=True, exist_ok=True) 141 | 142 | raw_files_storage_path = get_dataset_raw_files_path( 143 | dataset_key 144 | ) # Will be created if doesn't exist 145 | typer.echo(f"Initializing dataset: '{dataset_name}'") 146 | typer.echo(f"Target database path: {final_db_path}") 147 | typer.echo(f"Raw files will be stored at: {raw_files_storage_path.resolve()}") 148 | 149 | initialization_successful = initialize_dataset( 150 | dataset_name=dataset_key, db_target_path=final_db_path 151 | ) 152 | 153 | if not initialization_successful: 154 | typer.secho( 155 | ( 156 | f"Dataset '{dataset_name}' initialization FAILED. " 157 | "Please check logs for details." 158 | ), 159 | fg=typer.colors.RED, 160 | err=True, 161 | ) 162 | raise typer.Exit(code=1) 163 | 164 | logger.info( 165 | f"Dataset '{dataset_name}' initialization seems complete. " 166 | "Verifying database integrity..." 167 | ) 168 | 169 | # Basic verification by querying a known table 170 | verification_table_name = dataset_config.get("primary_verification_table") 171 | if not verification_table_name: 172 | logger.warning( 173 | f"No 'primary_verification_table' configured for '{dataset_name}'. " 174 | "Skipping DB query test." 175 | ) 176 | typer.secho( 177 | ( 178 | f"Dataset '{dataset_name}' initialized to {final_db_path}. " 179 | f"Raw files at {raw_files_storage_path.resolve()}." 180 | ), 181 | fg=typer.colors.GREEN, 182 | ) 183 | typer.secho( 184 | "Skipped database query test as no verification table is set in config.", 185 | fg=typer.colors.YELLOW, 186 | ) 187 | return 188 | 189 | try: 190 | conn = sqlite3.connect(final_db_path) 191 | cursor = conn.cursor() 192 | # A simple count query is usually safe and informative. 193 | query = f"SELECT COUNT(*) FROM {verification_table_name};" 194 | logger.debug(f"Executing verification query: '{query}' on {final_db_path}") 195 | cursor.execute(query) 196 | count_result = cursor.fetchone() 197 | conn.close() 198 | 199 | if count_result is None: 200 | raise sqlite3.Error( 201 | f"Query on table '{verification_table_name}' returned no result (None)." 202 | ) 203 | 204 | record_count = count_result[0] 205 | typer.secho( 206 | ( 207 | f"Database verification successful: Found {record_count} records in " 208 | f"table '{verification_table_name}'." 209 | ), 210 | fg=typer.colors.GREEN, 211 | ) 212 | typer.secho( 213 | ( 214 | f"Dataset '{dataset_name}' ready at {final_db_path}. " 215 | f"Raw files at {raw_files_storage_path.resolve()}." 216 | ), 217 | fg=typer.colors.BRIGHT_GREEN, 218 | ) 219 | except sqlite3.Error as e: 220 | logger.error( 221 | ( 222 | f"SQLite error during verification query on table " 223 | f"'{verification_table_name}': {e}" 224 | ), 225 | exc_info=True, 226 | ) 227 | typer.secho( 228 | ( 229 | f"Error verifying table '{verification_table_name}': {e}. " 230 | f"The database was created at {final_db_path}, but the test query " 231 | "failed. The data might be incomplete or corrupted." 232 | ), 233 | fg=typer.colors.RED, 234 | err=True, 235 | ) 236 | except Exception as e: # Catch any other unexpected errors 237 | logger.error( 238 | f"Unexpected error during database verification: {e}", exc_info=True 239 | ) 240 | typer.secho( 241 | f"An unexpected error occurred during database verification: {e}", 242 | fg=typer.colors.RED, 243 | err=True, 244 | ) 245 | 246 | 247 | @app.command("config") 248 | def config_cmd( 249 | client: Annotated[ 250 | str | None, 251 | typer.Argument( 252 | help="MCP client to configure. Use 'claude' for Claude Desktop auto-setup, or omit for universal config generator.", 253 | metavar="CLIENT", 254 | ), 255 | ] = None, 256 | backend: Annotated[ 257 | str, 258 | typer.Option( 259 | "--backend", 260 | "-b", 261 | help="Backend to use (sqlite or bigquery). Default: sqlite", 262 | ), 263 | ] = "sqlite", 264 | db_path: Annotated[ 265 | str | None, 266 | typer.Option( 267 | "--db-path", 268 | "-p", 269 | help="Path to SQLite database (for sqlite backend)", 270 | ), 271 | ] = None, 272 | project_id: Annotated[ 273 | str | None, 274 | typer.Option( 275 | "--project-id", 276 | help="Google Cloud project ID (required for bigquery backend)", 277 | ), 278 | ] = None, 279 | python_path: Annotated[ 280 | str | None, 281 | typer.Option( 282 | "--python-path", 283 | help="Path to Python executable", 284 | ), 285 | ] = None, 286 | working_directory: Annotated[ 287 | str | None, 288 | typer.Option( 289 | "--working-directory", 290 | help="Working directory for the server", 291 | ), 292 | ] = None, 293 | server_name: Annotated[ 294 | str, 295 | typer.Option( 296 | "--server-name", 297 | help="Name for the MCP server", 298 | ), 299 | ] = "m3", 300 | output: Annotated[ 301 | str | None, 302 | typer.Option( 303 | "--output", 304 | "-o", 305 | help="Save configuration to file instead of printing", 306 | ), 307 | ] = None, 308 | quick: Annotated[ 309 | bool, 310 | typer.Option( 311 | "--quick", 312 | "-q", 313 | help="Use quick mode with provided arguments (non-interactive)", 314 | ), 315 | ] = False, 316 | ): 317 | """ 318 | Configure M3 MCP server for various clients. 319 | 320 | Examples: 321 | 322 | • m3 config # Interactive universal config generator 323 | 324 | • m3 config claude # Auto-configure Claude Desktop 325 | 326 | • m3 config --quick # Quick universal config with defaults 327 | 328 | • m3 config claude --backend bigquery --project-id my-project 329 | """ 330 | try: 331 | from m3 import mcp_client_configs 332 | 333 | script_dir = Path(mcp_client_configs.__file__).parent 334 | except ImportError: 335 | typer.secho( 336 | "❌ Error: Could not find m3.mcp_client_configs package", 337 | fg=typer.colors.RED, 338 | err=True, 339 | ) 340 | raise typer.Exit(code=1) 341 | 342 | # Validate backend-specific arguments 343 | if backend == "sqlite" and project_id: 344 | typer.secho( 345 | "❌ Error: --project-id can only be used with --backend bigquery", 346 | fg=typer.colors.RED, 347 | err=True, 348 | ) 349 | raise typer.Exit(code=1) 350 | 351 | if backend == "bigquery" and db_path: 352 | typer.secho( 353 | "❌ Error: --db-path can only be used with --backend sqlite", 354 | fg=typer.colors.RED, 355 | err=True, 356 | ) 357 | raise typer.Exit(code=1) 358 | 359 | # Require project_id for BigQuery backend 360 | if backend == "bigquery" and not project_id: 361 | typer.secho( 362 | "❌ Error: --project-id is required when using --backend bigquery", 363 | fg=typer.colors.RED, 364 | err=True, 365 | ) 366 | raise typer.Exit(code=1) 367 | 368 | if client == "claude": 369 | # Run the Claude Desktop setup script 370 | script_path = script_dir / "setup_claude_desktop.py" 371 | 372 | if not script_path.exists(): 373 | typer.secho( 374 | f"Error: Claude Desktop setup script not found at {script_path}", 375 | fg=typer.colors.RED, 376 | err=True, 377 | ) 378 | raise typer.Exit(code=1) 379 | 380 | # Build command arguments 381 | cmd = [sys.executable, str(script_path)] 382 | 383 | if backend != "sqlite": 384 | cmd.extend(["--backend", backend]) 385 | 386 | if backend == "sqlite" and db_path: 387 | cmd.extend(["--db-path", db_path]) 388 | elif backend == "bigquery" and project_id: 389 | cmd.extend(["--project-id", project_id]) 390 | 391 | try: 392 | result = subprocess.run(cmd, check=True, capture_output=False) 393 | if result.returncode == 0: 394 | typer.secho( 395 | "✅ Claude Desktop configuration completed!", fg=typer.colors.GREEN 396 | ) 397 | except subprocess.CalledProcessError as e: 398 | typer.secho( 399 | f"❌ Claude Desktop setup failed with exit code {e.returncode}", 400 | fg=typer.colors.RED, 401 | err=True, 402 | ) 403 | raise typer.Exit(code=e.returncode) 404 | except FileNotFoundError: 405 | typer.secho( 406 | "❌ Python interpreter not found. Please ensure Python is installed.", 407 | fg=typer.colors.RED, 408 | err=True, 409 | ) 410 | raise typer.Exit(code=1) 411 | 412 | else: 413 | # Run the dynamic config generator 414 | script_path = script_dir / "dynamic_mcp_config.py" 415 | 416 | if not script_path.exists(): 417 | typer.secho( 418 | f"Error: Dynamic config script not found at {script_path}", 419 | fg=typer.colors.RED, 420 | err=True, 421 | ) 422 | raise typer.Exit(code=1) 423 | 424 | # Build command arguments 425 | cmd = [sys.executable, str(script_path)] 426 | 427 | if quick: 428 | cmd.append("--quick") 429 | 430 | if backend != "sqlite": 431 | cmd.extend(["--backend", backend]) 432 | 433 | if server_name != "m3": 434 | cmd.extend(["--server-name", server_name]) 435 | 436 | if python_path: 437 | cmd.extend(["--python-path", python_path]) 438 | 439 | if working_directory: 440 | cmd.extend(["--working-directory", working_directory]) 441 | 442 | if backend == "sqlite" and db_path: 443 | cmd.extend(["--db-path", db_path]) 444 | elif backend == "bigquery" and project_id: 445 | cmd.extend(["--project-id", project_id]) 446 | 447 | if output: 448 | cmd.extend(["--output", output]) 449 | 450 | if quick: 451 | typer.echo("🔧 Generating M3 MCP configuration...") 452 | else: 453 | typer.echo("🔧 Starting interactive M3 MCP configuration...") 454 | 455 | try: 456 | result = subprocess.run(cmd, check=True, capture_output=False) 457 | if result.returncode == 0 and quick: 458 | typer.secho( 459 | "✅ Configuration generated successfully!", fg=typer.colors.GREEN 460 | ) 461 | except subprocess.CalledProcessError as e: 462 | typer.secho( 463 | f"❌ Configuration generation failed with exit code {e.returncode}", 464 | fg=typer.colors.RED, 465 | err=True, 466 | ) 467 | raise typer.Exit(code=e.returncode) 468 | except FileNotFoundError: 469 | typer.secho( 470 | "❌ Python interpreter not found. Please ensure Python is installed.", 471 | fg=typer.colors.RED, 472 | err=True, 473 | ) 474 | raise typer.Exit(code=1) 475 | 476 | 477 | if __name__ == "__main__": 478 | app() 479 | -------------------------------------------------------------------------------- /src/m3/config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | 4 | APP_NAME = "m3" 5 | 6 | # Setup basic logging 7 | logging.basicConfig( 8 | level=logging.INFO, 9 | format="%(asctime)s [%(levelname)-8s] %(name)s: %(message)s", 10 | datefmt="%Y-%m-%d %H:%M:%S", 11 | ) 12 | logger = logging.getLogger(APP_NAME) 13 | 14 | 15 | # ------------------------------------------------------------------- 16 | # Data directory rooted at project root (two levels up from this file) 17 | # ------------------------------------------------------------------- 18 | def _get_project_root() -> Path: 19 | """ 20 | Determine project root: 21 | - If cloned repo: use repository root (two levels up from this file) 22 | - If pip installed: ALWAYS use home directory 23 | """ 24 | package_root = Path(__file__).resolve().parents[2] 25 | 26 | # Check if we're in a cloned repository (has pyproject.toml at root) 27 | if (package_root / "pyproject.toml").exists(): 28 | return package_root 29 | 30 | # Pip installed: ALWAYS use home directory (simple and consistent) 31 | return Path.home() 32 | 33 | 34 | _PROJECT_ROOT = _get_project_root() 35 | _PROJECT_DATA_DIR = _PROJECT_ROOT / "m3_data" 36 | 37 | DEFAULT_DATABASES_DIR = _PROJECT_DATA_DIR / "databases" 38 | DEFAULT_RAW_FILES_DIR = _PROJECT_DATA_DIR / "raw_files" 39 | 40 | 41 | # -------------------------------------------------- 42 | # Dataset configurations (add more entries as needed) 43 | # -------------------------------------------------- 44 | SUPPORTED_DATASETS = { 45 | "mimic-iv-demo": { 46 | "file_listing_url": "https://physionet.org/files/mimic-iv-demo/2.2/", 47 | "subdirectories_to_scan": ["hosp", "icu"], 48 | "default_db_filename": "mimic_iv_demo.db", 49 | "primary_verification_table": "hosp_admissions", # Table name in SQLite DB 50 | }, 51 | # add other datasets here... 52 | } 53 | 54 | 55 | # -------------------------------------------------- 56 | # Helper functions 57 | # -------------------------------------------------- 58 | def get_dataset_config(dataset_name: str) -> dict | None: 59 | """Retrieve the configuration for a given dataset (case-insensitive).""" 60 | return SUPPORTED_DATASETS.get(dataset_name.lower()) 61 | 62 | 63 | def get_default_database_path(dataset_name: str) -> Path | None: 64 | """ 65 | Return the default SQLite DB path for a given dataset, 66 | under <project_root>/m3_data/databases/. 67 | """ 68 | cfg = get_dataset_config(dataset_name) 69 | if cfg and "default_db_filename" in cfg: 70 | DEFAULT_DATABASES_DIR.mkdir(parents=True, exist_ok=True) 71 | return DEFAULT_DATABASES_DIR / cfg["default_db_filename"] 72 | 73 | logger.warning(f"Missing default_db_filename for dataset: {dataset_name}") 74 | return None 75 | 76 | 77 | def get_dataset_raw_files_path(dataset_name: str) -> Path | None: 78 | """ 79 | Return the raw-file storage path for a dataset, 80 | under <project_root>/m3_data/raw_files/<dataset_name>/. 81 | """ 82 | cfg = get_dataset_config(dataset_name) 83 | if cfg: 84 | path = DEFAULT_RAW_FILES_DIR / dataset_name.lower() 85 | path.mkdir(parents=True, exist_ok=True) 86 | return path 87 | 88 | logger.warning(f"Unknown dataset, cannot determine raw path: {dataset_name}") 89 | return None 90 | -------------------------------------------------------------------------------- /src/m3/data_io.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from urllib.parse import urljoin, urlparse 3 | 4 | import polars as pl 5 | import requests 6 | import typer 7 | from bs4 import BeautifulSoup 8 | 9 | from m3.config import get_dataset_config, get_dataset_raw_files_path, logger 10 | 11 | COMMON_USER_AGENT = ( 12 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " 13 | "(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" 14 | ) 15 | 16 | 17 | def _download_single_file( 18 | url: str, target_filepath: Path, session: requests.Session 19 | ) -> bool: 20 | """Downloads a single file with progress tracking.""" 21 | logger.debug(f"Attempting to download {url} to {target_filepath}...") 22 | try: 23 | response = session.get(url, stream=True, timeout=60) 24 | response.raise_for_status() 25 | total_size = int(response.headers.get("content-length", 0)) 26 | file_display_name = target_filepath.name 27 | 28 | target_filepath.parent.mkdir(parents=True, exist_ok=True) 29 | with ( 30 | open(target_filepath, "wb") as f, 31 | typer.progressbar( 32 | length=total_size, label=f"Downloading {file_display_name}" 33 | ) as progress, 34 | ): 35 | for chunk in response.iter_content(chunk_size=8192): # Standard chunk size 36 | if chunk: 37 | f.write(chunk) 38 | progress.update(len(chunk)) 39 | logger.info(f"Successfully downloaded: {file_display_name}") 40 | return True 41 | except requests.exceptions.HTTPError as e: 42 | status = e.response.status_code 43 | if status == 404: 44 | logger.error(f"Download failed (404 Not Found): {url}.") 45 | else: 46 | logger.error(f"HTTP error {status} downloading {url}: {e.response.reason}") 47 | except requests.exceptions.Timeout: 48 | logger.error(f"Timeout occurred while downloading {url}.") 49 | except requests.exceptions.RequestException as e: 50 | logger.error(f"A network or request error occurred downloading {url}: {e}") 51 | except OSError as e: 52 | logger.error(f"File system error writing {target_filepath}: {e}") 53 | 54 | # If download failed, attempt to remove partially downloaded file 55 | if target_filepath.exists(): 56 | try: 57 | target_filepath.unlink() 58 | except OSError as e: 59 | logger.error(f"Could not remove incomplete file {target_filepath}: {e}") 60 | return False 61 | 62 | 63 | def _scrape_urls_from_html_page( 64 | page_url: str, session: requests.Session, file_suffix: str = ".csv.gz" 65 | ) -> list[str]: 66 | """Scrapes a webpage for links ending with a specific suffix.""" 67 | found_urls = [] 68 | logger.debug(f"Scraping for '{file_suffix}' links on page: {page_url}") 69 | try: 70 | page_response = session.get(page_url, timeout=30) 71 | page_response.raise_for_status() 72 | soup = BeautifulSoup(page_response.content, "html.parser") 73 | for link_tag in soup.find_all("a", href=True): 74 | href_path = link_tag["href"] 75 | # Basic validation of the link 76 | if ( 77 | href_path.endswith(file_suffix) 78 | and not href_path.startswith(("?", "#")) 79 | and ".." not in href_path 80 | ): 81 | absolute_url = urljoin(page_url, href_path) 82 | found_urls.append(absolute_url) 83 | except requests.exceptions.RequestException as e: 84 | logger.error(f"Could not access or parse page {page_url} for scraping: {e}") 85 | return found_urls 86 | 87 | 88 | def _download_dataset_files( 89 | dataset_name: str, dataset_config: dict, raw_files_root_dir: Path 90 | ) -> bool: 91 | """Downloads all relevant files for a dataset based on its configuration.""" 92 | base_listing_url = dataset_config["file_listing_url"] 93 | subdirs_to_scan = dataset_config.get("subdirectories_to_scan", []) 94 | 95 | logger.info( 96 | f"Preparing to download {dataset_name} files from base URL: {base_listing_url}" 97 | ) 98 | session = requests.Session() 99 | session.headers.update({"User-Agent": COMMON_USER_AGENT}) 100 | 101 | all_files_to_process = [] # List of (url, local_target_path) 102 | 103 | for subdir_name in subdirs_to_scan: 104 | subdir_listing_url = urljoin(base_listing_url, f"{subdir_name}/") 105 | logger.info(f"Scanning subdirectory for CSVs: {subdir_listing_url}") 106 | csv_urls_in_subdir = _scrape_urls_from_html_page(subdir_listing_url, session) 107 | 108 | if not csv_urls_in_subdir: 109 | logger.warning( 110 | f"No .csv.gz files found in subdirectory: {subdir_listing_url}" 111 | ) 112 | continue 113 | 114 | for file_url in csv_urls_in_subdir: 115 | url_path_obj = Path(urlparse(file_url).path) 116 | base_listing_url_path_obj = Path(urlparse(base_listing_url).path) 117 | relative_file_path: Path 118 | 119 | try: 120 | # Attempt to make file path relative to base URL's path part 121 | if url_path_obj.as_posix().startswith( 122 | base_listing_url_path_obj.as_posix() 123 | ): 124 | relative_file_path = url_path_obj.relative_to( 125 | base_listing_url_path_obj 126 | ) 127 | else: 128 | # Fallback if URL structure is unexpected 129 | # (e.g., flat list of files not matching base structure) 130 | logger.warning( 131 | f"Path calculation fallback for {url_path_obj} vs " 132 | f"{base_listing_url_path_obj}. " 133 | f"Using {Path(subdir_name) / url_path_obj.name}" 134 | ) 135 | relative_file_path = Path(subdir_name) / url_path_obj.name 136 | except ( 137 | ValueError 138 | ) as e_rel: # Handles cases where relative_to is not possible 139 | logger.error( 140 | f"Path relative_to error for {url_path_obj} from " 141 | f"{base_listing_url_path_obj}: {e_rel}. " 142 | f"Defaulting to {Path(subdir_name) / url_path_obj.name}" 143 | ) 144 | relative_file_path = Path(subdir_name) / url_path_obj.name 145 | 146 | local_target_path = raw_files_root_dir / relative_file_path 147 | all_files_to_process.append((file_url, local_target_path)) 148 | 149 | if not all_files_to_process: 150 | logger.error( 151 | f"No '.csv.gz' download links found after scanning {base_listing_url} " 152 | f"and its subdirectories {subdirs_to_scan} for dataset '{dataset_name}'." 153 | ) 154 | return False 155 | 156 | # Deduplicate and sort for consistent processing order 157 | unique_files_to_process = sorted( 158 | list(set(all_files_to_process)), key=lambda x: x[1] 159 | ) 160 | logger.info( 161 | f"Found {len(unique_files_to_process)} unique '.csv.gz' files to download " 162 | f"for {dataset_name}." 163 | ) 164 | 165 | downloaded_count = 0 166 | for file_url, target_filepath in unique_files_to_process: 167 | if not _download_single_file(file_url, target_filepath, session): 168 | logger.error( 169 | f"Critical download failed for '{target_filepath.name}'. " 170 | "Aborting dataset download." 171 | ) 172 | return False # Stop if any single download fails 173 | downloaded_count += 1 174 | 175 | # Success only if all identified files were downloaded 176 | return downloaded_count == len(unique_files_to_process) 177 | 178 | 179 | def _load_csv_with_robust_parsing(csv_file_path: Path, table_name: str) -> pl.DataFrame: 180 | """ 181 | Load a CSV file with proper type inference by scanning the entire file. 182 | """ 183 | df = pl.read_csv( 184 | source=csv_file_path, 185 | infer_schema_length=None, # Scan entire file for proper type inference 186 | try_parse_dates=True, 187 | ignore_errors=False, 188 | null_values=["", "NULL", "null", "\\N", "NA"], 189 | ) 190 | 191 | # Log empty columns (this is normal, not an error) 192 | if df.height > 0: 193 | empty_columns = [col for col in df.columns if df[col].is_null().all()] 194 | if empty_columns: 195 | logger.info( 196 | f" Table '{table_name}': Found {len(empty_columns)} empty column(s): " 197 | f"{', '.join(empty_columns[:5])}" 198 | + ( 199 | f" (and {len(empty_columns) - 5} more)" 200 | if len(empty_columns) > 5 201 | else "" 202 | ) 203 | ) 204 | 205 | return df 206 | 207 | 208 | def _etl_csv_collection_to_sqlite(csv_source_dir: Path, db_target_path: Path) -> bool: 209 | """Loads all .csv.gz files from a directory structure into an SQLite database.""" 210 | db_target_path.parent.mkdir(parents=True, exist_ok=True) 211 | # Polars uses this format for SQLite connections 212 | db_connection_uri = f"sqlite:///{db_target_path.resolve()}" 213 | logger.info( 214 | f"Starting ETL: loading CSVs from '{csv_source_dir}' to SQLite DB " 215 | f"at '{db_target_path}'" 216 | ) 217 | 218 | csv_file_paths = list(csv_source_dir.rglob("*.csv.gz")) 219 | if not csv_file_paths: 220 | logger.error( 221 | "ETL Error: No .csv.gz files found (recursively) in source directory: " 222 | f"{csv_source_dir}" 223 | ) 224 | return False 225 | 226 | successfully_loaded_count = 0 227 | files_with_errors = [] 228 | logger.info(f"Found {len(csv_file_paths)} .csv.gz files for ETL process.") 229 | 230 | for i, csv_file_path in enumerate(csv_file_paths): 231 | # Generate table name from file path relative to the source directory 232 | # e.g., source_dir/hosp/admissions.csv.gz -> hosp_admissions 233 | relative_path = csv_file_path.relative_to(csv_source_dir) 234 | table_name_parts = [part.lower() for part in relative_path.parts] 235 | table_name = ( 236 | "_".join(table_name_parts) 237 | .replace(".csv.gz", "") 238 | .replace("-", "_") 239 | .replace(".", "_") 240 | ) 241 | 242 | logger.info( 243 | f"[{i + 1}/{len(csv_file_paths)}] ETL: Processing '{relative_path}' " 244 | f"into SQLite table '{table_name}'..." 245 | ) 246 | 247 | try: 248 | # Use the robust parsing function 249 | df = _load_csv_with_robust_parsing(csv_file_path, table_name) 250 | 251 | df.write_database( 252 | table_name=table_name, 253 | connection=db_connection_uri, 254 | if_table_exists="replace", # Overwrite table if it exists 255 | engine="sqlalchemy", # Recommended engine for Polars with SQLite 256 | ) 257 | logger.info( 258 | f" Successfully loaded '{relative_path}' into table '{table_name}' " 259 | f"({df.height} rows, {df.width} columns)." 260 | ) 261 | successfully_loaded_count += 1 262 | 263 | except Exception as e: 264 | err_msg = ( 265 | f"Unexpected error during ETL for '{relative_path}' " 266 | f"(target table '{table_name}'): {e}" 267 | ) 268 | logger.error(err_msg, exc_info=True) 269 | files_with_errors.append(f"{relative_path}: {e!s}") 270 | # Continue to process other files even if one fails 271 | 272 | if files_with_errors: 273 | logger.warning( 274 | "ETL completed with errors during processing for " 275 | f"{len(files_with_errors)} file(s):" 276 | ) 277 | for detail in files_with_errors: 278 | logger.warning(f" - {detail}") 279 | 280 | # Strict success: all found files must be loaded without Polars/DB errors. 281 | if successfully_loaded_count == len(csv_file_paths): 282 | logger.info( 283 | f"All {len(csv_file_paths)} CSV files successfully processed & loaded into " 284 | f"{db_target_path}." 285 | ) 286 | return True 287 | elif successfully_loaded_count > 0: 288 | logger.warning( 289 | f"Partially completed ETL: Loaded {successfully_loaded_count} out of " 290 | f"{len(csv_file_paths)} files. Some files encountered errors during " 291 | "their individual processing and were not loaded." 292 | ) 293 | return False 294 | else: # No files were successfully loaded 295 | logger.error( 296 | "ETL process failed: No CSV files were successfully loaded into the " 297 | f"database from {csv_source_dir}." 298 | ) 299 | return False 300 | 301 | 302 | def initialize_dataset(dataset_name: str, db_target_path: Path) -> bool: 303 | """Initializes a dataset: downloads files and loads them into a database.""" 304 | dataset_config = get_dataset_config(dataset_name) 305 | if not dataset_config: 306 | logger.error(f"Configuration for dataset '{dataset_name}' not found.") 307 | return False 308 | 309 | raw_files_root_dir = get_dataset_raw_files_path(dataset_name) 310 | raw_files_root_dir.mkdir(parents=True, exist_ok=True) 311 | 312 | logger.info(f"Starting initialization for dataset: {dataset_name}") 313 | download_ok = _download_dataset_files( 314 | dataset_name, dataset_config, raw_files_root_dir 315 | ) 316 | 317 | if not download_ok: 318 | logger.error( 319 | f"Download phase failed for dataset '{dataset_name}'. ETL skipped." 320 | ) 321 | return False 322 | 323 | logger.info(f"Download phase complete for '{dataset_name}'. Starting ETL phase.") 324 | etl_ok = _etl_csv_collection_to_sqlite(raw_files_root_dir, db_target_path) 325 | 326 | if not etl_ok: 327 | logger.error(f"ETL phase failed for dataset '{dataset_name}'.") 328 | return False 329 | 330 | logger.info( 331 | f"Dataset '{dataset_name}' successfully initialized. " 332 | f"Database at: {db_target_path}" 333 | ) 334 | return True 335 | -------------------------------------------------------------------------------- /src/m3/mcp_client_configs/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | MCP client configuration utilities. 3 | 4 | This package contains scripts for configuring various MCP clients 5 | with the M3 server. 6 | """ 7 | -------------------------------------------------------------------------------- /src/m3/mcp_client_configs/dynamic_mcp_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dynamic MCP Configuration Generator for M3 Server. 3 | Generates MCP server configurations that can be copied and pasted into any MCP client. 4 | """ 5 | 6 | import json 7 | import os 8 | import shutil 9 | import sys 10 | from pathlib import Path 11 | from typing import Any 12 | 13 | # Error messages 14 | _DATABASE_PATH_ERROR_MSG = ( 15 | "Could not determine default database path for mimic-iv-demo.\n" 16 | "Please run 'm3 init mimic-iv-demo' first." 17 | ) 18 | 19 | 20 | class MCPConfigGenerator: 21 | """Generator for MCP server configurations.""" 22 | 23 | def __init__(self): 24 | self.current_dir = Path(__file__).parent.parent.absolute() 25 | self.default_python = self._get_default_python() 26 | 27 | def _get_default_python(self) -> str: 28 | """Get the default Python executable path.""" 29 | # Try to use the current virtual environment 30 | if "VIRTUAL_ENV" in os.environ: 31 | venv_python = Path(os.environ["VIRTUAL_ENV"]) / "bin" / "python" 32 | if venv_python.exists(): 33 | return str(venv_python) 34 | 35 | # Fall back to system python 36 | return shutil.which("python") or shutil.which("python3") or "python" 37 | 38 | def _validate_python_path(self, python_path: str) -> bool: 39 | """Validate that the Python path exists and is executable.""" 40 | path = Path(python_path) 41 | return path.exists() and path.is_file() and os.access(path, os.X_OK) 42 | 43 | def _validate_directory(self, dir_path: str) -> bool: 44 | """Validate that the directory exists.""" 45 | return Path(dir_path).exists() and Path(dir_path).is_dir() 46 | 47 | def generate_config( 48 | self, 49 | server_name: str = "m3", 50 | python_path: str | None = None, 51 | working_directory: str | None = None, 52 | backend: str = "sqlite", 53 | db_path: str | None = None, 54 | project_id: str | None = None, 55 | additional_env: dict[str, str] | None = None, 56 | module_name: str = "m3.mcp_server", 57 | oauth2_enabled: bool = False, 58 | oauth2_config: dict[str, str] | None = None, 59 | ) -> dict[str, Any]: 60 | """Generate MCP server configuration.""" 61 | 62 | # Use defaults if not provided 63 | if python_path is None: 64 | python_path = self.default_python 65 | if working_directory is None: 66 | working_directory = str(self.current_dir) 67 | 68 | # Validate inputs 69 | if not self._validate_python_path(python_path): 70 | raise ValueError(f"Invalid Python path: {python_path}") 71 | if not self._validate_directory(working_directory): 72 | raise ValueError(f"Invalid working directory: {working_directory}") 73 | 74 | # Build environment variables 75 | env = { 76 | "PYTHONPATH": str(Path(working_directory) / "src"), 77 | "M3_BACKEND": backend, 78 | } 79 | 80 | # Add backend-specific environment variables 81 | if backend == "sqlite" and db_path: 82 | env["M3_DB_PATH"] = db_path 83 | elif backend == "bigquery" and project_id: 84 | env["M3_PROJECT_ID"] = project_id 85 | env["GOOGLE_CLOUD_PROJECT"] = project_id 86 | 87 | # Add OAuth2 configuration if enabled 88 | if oauth2_enabled and oauth2_config: 89 | env.update( 90 | { 91 | "M3_OAUTH2_ENABLED": "true", 92 | "M3_OAUTH2_ISSUER_URL": oauth2_config.get("issuer_url", ""), 93 | "M3_OAUTH2_AUDIENCE": oauth2_config.get("audience", ""), 94 | "M3_OAUTH2_REQUIRED_SCOPES": oauth2_config.get( 95 | "required_scopes", "read:mimic-data" 96 | ), 97 | "M3_OAUTH2_JWKS_URL": oauth2_config.get("jwks_url", ""), 98 | } 99 | ) 100 | 101 | # Optional OAuth2 settings 102 | if oauth2_config.get("client_id"): 103 | env["M3_OAUTH2_CLIENT_ID"] = oauth2_config["client_id"] 104 | if oauth2_config.get("rate_limit_requests"): 105 | env["M3_OAUTH2_RATE_LIMIT_REQUESTS"] = str( 106 | oauth2_config["rate_limit_requests"] 107 | ) 108 | 109 | # Add any additional environment variables 110 | if additional_env: 111 | env.update(additional_env) 112 | 113 | # Create the configuration 114 | config = { 115 | "mcpServers": { 116 | server_name: { 117 | "command": python_path, 118 | "args": ["-m", module_name], 119 | "cwd": working_directory, 120 | "env": env, 121 | } 122 | } 123 | } 124 | 125 | return config 126 | 127 | def interactive_config(self) -> dict[str, Any]: 128 | """Interactive configuration builder.""" 129 | print("🔧 M3 MCP Server Configuration Generator") 130 | print("=" * 50) 131 | 132 | # Server name 133 | print("\n🏷️ Server Configuration:") 134 | print("The server name is how your MCP client will identify this server.") 135 | server_name = ( 136 | input("Server name (press Enter for default 'm3'): ").strip() or "m3" 137 | ) 138 | 139 | # Python path 140 | print(f"\nDefault Python path: {self.default_python}") 141 | python_path = input( 142 | "Python executable path (press Enter for default): " 143 | ).strip() 144 | if not python_path: 145 | python_path = self.default_python 146 | 147 | # Working directory 148 | print(f"\nDefault working directory: {self.current_dir}") 149 | working_directory = input( 150 | "Working directory (press Enter for default): " 151 | ).strip() 152 | if not working_directory: 153 | working_directory = str(self.current_dir) 154 | 155 | # Backend selection - simplified 156 | print("\nChoose backend:") 157 | print("1. SQLite (local database)") 158 | print("2. BigQuery (Google Cloud)") 159 | 160 | while True: 161 | backend_choice = input("Choose backend [1]: ").strip() or "1" 162 | if backend_choice in ["1", "2"]: 163 | break 164 | print("Please enter 1 or 2") 165 | 166 | backend = "sqlite" if backend_choice == "1" else "bigquery" 167 | 168 | # Backend-specific configuration 169 | db_path = None 170 | project_id = None 171 | 172 | if backend == "sqlite": 173 | print("\n📁 SQLite Configuration:") 174 | from m3.config import get_default_database_path 175 | 176 | default_db_path = get_default_database_path("mimic-iv-demo") 177 | if default_db_path is None: 178 | raise ValueError(_DATABASE_PATH_ERROR_MSG) 179 | print(f"Default database path: {default_db_path}") 180 | 181 | db_path = ( 182 | input( 183 | "SQLite database path (optional, press Enter to use default): " 184 | ).strip() 185 | or None 186 | ) 187 | 188 | elif backend == "bigquery": 189 | print("\n☁️ BigQuery Configuration:") 190 | project_id = None 191 | while not project_id: 192 | project_id = input( 193 | "Google Cloud project ID (required for BigQuery): " 194 | ).strip() 195 | if not project_id: 196 | print( 197 | "❌ Project ID is required when using BigQuery backend. Please enter your GCP project ID." 198 | ) 199 | print(f"✅ Will use project: {project_id}") 200 | 201 | # OAuth2 Configuration 202 | oauth2_enabled = False 203 | oauth2_config = None 204 | 205 | print("\n🔐 OAuth2 Authentication (optional):") 206 | enable_oauth2 = input("Enable OAuth2 authentication? [y/N]: ").strip().lower() 207 | 208 | if enable_oauth2 in ["y", "yes"]: 209 | oauth2_enabled = True 210 | oauth2_config = {} 211 | 212 | print("\nOAuth2 Configuration:") 213 | oauth2_config["issuer_url"] = input( 214 | "OAuth2 Issuer URL (e.g., https://auth.example.com): " 215 | ).strip() 216 | oauth2_config["audience"] = input( 217 | "OAuth2 Audience (e.g., m3-api): " 218 | ).strip() 219 | oauth2_config["required_scopes"] = ( 220 | input("Required Scopes [read:mimic-data]: ").strip() 221 | or "read:mimic-data" 222 | ) 223 | 224 | # Optional settings 225 | jwks_url = input("JWKS URL (optional, auto-discovered if empty): ").strip() 226 | if jwks_url: 227 | oauth2_config["jwks_url"] = jwks_url 228 | 229 | rate_limit = input("Rate limit (requests per hour) [100]: ").strip() 230 | if rate_limit and rate_limit.isdigit(): 231 | oauth2_config["rate_limit_requests"] = rate_limit 232 | 233 | print("✅ OAuth2 configuration added") 234 | 235 | # Additional environment variables 236 | additional_env = {} 237 | print("\n🌍 Additional environment variables (optional):") 238 | print( 239 | "Enter key=value pairs, one per line. Press Enter on empty line to finish." 240 | ) 241 | while True: 242 | env_var = input("Environment variable: ").strip() 243 | if not env_var: 244 | break 245 | if "=" in env_var: 246 | key, value = env_var.split("=", 1) 247 | additional_env[key.strip()] = value.strip() 248 | print(f"✅ Added: {key.strip()}={value.strip()}") 249 | else: 250 | print("❌ Invalid format. Use key=value") 251 | 252 | return self.generate_config( 253 | server_name=server_name, 254 | python_path=python_path, 255 | working_directory=working_directory, 256 | backend=backend, 257 | db_path=db_path, 258 | project_id=project_id, 259 | additional_env=additional_env if additional_env else None, 260 | module_name="m3.mcp_server", 261 | oauth2_enabled=oauth2_enabled, 262 | oauth2_config=oauth2_config, 263 | ) 264 | 265 | 266 | def print_config_info(config: dict[str, Any]): 267 | """Print configuration information.""" 268 | # Get the first (and likely only) server configuration 269 | server_name = next(iter(config["mcpServers"].keys())) 270 | server_config = config["mcpServers"][server_name] 271 | 272 | print("\n📋 Configuration Summary:") 273 | print("=" * 30) 274 | print(f"🏷️ Server name: {server_name}") 275 | print(f"🐍 Python path: {server_config['command']}") 276 | print(f"📁 Working directory: {server_config['cwd']}") 277 | print(f"🔧 Backend: {server_config['env'].get('M3_BACKEND', 'unknown')}") 278 | 279 | if "M3_DB_PATH" in server_config["env"]: 280 | print(f"💾 Database path: {server_config['env']['M3_DB_PATH']}") 281 | elif server_config["env"].get("M3_BACKEND") == "sqlite": 282 | # Show the default path when using SQLite backend 283 | from m3.config import get_default_database_path 284 | 285 | default_path = get_default_database_path("mimic-iv-demo") 286 | if default_path is None: 287 | raise ValueError(_DATABASE_PATH_ERROR_MSG) 288 | print(f"💾 Database path: {default_path}") 289 | 290 | if "M3_PROJECT_ID" in server_config["env"]: 291 | print(f"☁️ Project ID: {server_config['env']['M3_PROJECT_ID']}") 292 | 293 | # Show additional env vars 294 | additional_env = { 295 | k: v 296 | for k, v in server_config["env"].items() 297 | if k 298 | not in [ 299 | "PYTHONPATH", 300 | "M3_BACKEND", 301 | "M3_DB_PATH", 302 | "M3_PROJECT_ID", 303 | "GOOGLE_CLOUD_PROJECT", 304 | ] 305 | } 306 | if additional_env: 307 | print("🌍 Additional environment variables:") 308 | for key, value in additional_env.items(): 309 | print(f" {key}: {value}") 310 | 311 | 312 | def main(): 313 | """Main function.""" 314 | import argparse 315 | 316 | parser = argparse.ArgumentParser( 317 | description="Generate MCP server configuration for M3", 318 | formatter_class=argparse.RawDescriptionHelpFormatter, 319 | epilog=""" 320 | Examples: 321 | # Interactive mode 322 | python dynamic_mcp_config.py 323 | 324 | # Quick generation with defaults 325 | python dynamic_mcp_config.py --quick 326 | 327 | # Custom configuration 328 | python dynamic_mcp_config.py --python-path /usr/bin/python3 --backend bigquery --project-id my-project 329 | 330 | # Save to file 331 | python dynamic_mcp_config.py --output config.json 332 | """, 333 | ) 334 | 335 | parser.add_argument( 336 | "--quick", 337 | action="store_true", 338 | help="Generate configuration with defaults (non-interactive)", 339 | ) 340 | parser.add_argument( 341 | "--server-name", default="m3", help="Name for the MCP server (default: m3)" 342 | ) 343 | parser.add_argument("--python-path", help="Path to Python executable") 344 | parser.add_argument("--working-directory", help="Working directory for the server") 345 | parser.add_argument( 346 | "--backend", 347 | choices=["sqlite", "bigquery"], 348 | default="sqlite", 349 | help="Backend to use (default: sqlite)", 350 | ) 351 | parser.add_argument( 352 | "--db-path", help="Path to SQLite database (for sqlite backend)" 353 | ) 354 | parser.add_argument( 355 | "--project-id", help="Google Cloud project ID (for bigquery backend)" 356 | ) 357 | parser.add_argument( 358 | "--env", 359 | action="append", 360 | help="Additional environment variables (format: KEY=VALUE)", 361 | ) 362 | parser.add_argument( 363 | "--output", "-o", help="Save configuration to file instead of printing" 364 | ) 365 | parser.add_argument( 366 | "--pretty", 367 | action="store_true", 368 | default=True, 369 | help="Pretty print JSON (default: True)", 370 | ) 371 | 372 | args = parser.parse_args() 373 | 374 | # Validate backend-specific arguments 375 | if args.backend == "sqlite" and args.project_id: 376 | print( 377 | "❌ Error: --project-id can only be used with --backend bigquery", 378 | file=sys.stderr, 379 | ) 380 | sys.exit(1) 381 | 382 | if args.backend == "bigquery" and args.db_path: 383 | print( 384 | "❌ Error: --db-path can only be used with --backend sqlite", 385 | file=sys.stderr, 386 | ) 387 | sys.exit(1) 388 | 389 | # Require project_id for BigQuery backend 390 | if args.backend == "bigquery" and not args.project_id: 391 | print( 392 | "❌ Error: --project-id is required when using --backend bigquery", 393 | file=sys.stderr, 394 | ) 395 | sys.exit(1) 396 | 397 | generator = MCPConfigGenerator() 398 | 399 | try: 400 | if args.quick: 401 | # Quick mode with command line arguments 402 | additional_env = {} 403 | if args.env: 404 | for env_var in args.env: 405 | if "=" in env_var: 406 | key, value = env_var.split("=", 1) 407 | additional_env[key.strip()] = value.strip() 408 | 409 | config = generator.generate_config( 410 | server_name=args.server_name, 411 | python_path=args.python_path, 412 | working_directory=args.working_directory, 413 | backend=args.backend, 414 | db_path=args.db_path, 415 | project_id=args.project_id, 416 | additional_env=additional_env if additional_env else None, 417 | module_name="m3.mcp_server", 418 | ) 419 | else: 420 | # Interactive mode 421 | config = generator.interactive_config() 422 | 423 | # Print configuration info 424 | print_config_info(config) 425 | 426 | # Output the configuration 427 | json_output = json.dumps(config, indent=2 if args.pretty else None) 428 | 429 | if args.output: 430 | # Save to file 431 | with open(args.output, "w") as f: 432 | f.write(json_output) 433 | print(f"\n💾 Configuration saved to: {args.output}") 434 | else: 435 | # Print to terminal 436 | print("\n📋 MCP Configuration (copy and paste this into your MCP client):") 437 | print("=" * 70) 438 | print(json_output) 439 | print("=" * 70) 440 | print( 441 | "\n💡 Copy the JSON above and paste it into your MCP client configuration." 442 | ) 443 | 444 | except Exception as e: 445 | print(f"❌ Error: {e}", file=sys.stderr) 446 | sys.exit(1) 447 | 448 | 449 | if __name__ == "__main__": 450 | main() 451 | -------------------------------------------------------------------------------- /src/m3/mcp_client_configs/setup_claude_desktop.py: -------------------------------------------------------------------------------- 1 | """ 2 | Setup script for M3 MCP Server with Claude Desktop. 3 | Automatically configures Claude Desktop to use the M3 MCP server. 4 | """ 5 | 6 | import json 7 | import os 8 | import shutil 9 | from pathlib import Path 10 | 11 | 12 | def get_claude_config_path(): 13 | """Get the Claude Desktop configuration file path.""" 14 | home = Path.home() 15 | 16 | # macOS path 17 | claude_config = ( 18 | home 19 | / "Library" 20 | / "Application Support" 21 | / "Claude" 22 | / "claude_desktop_config.json" 23 | ) 24 | if claude_config.parent.exists(): 25 | return claude_config 26 | 27 | # Windows path 28 | claude_config = ( 29 | home / "AppData" / "Roaming" / "Claude" / "claude_desktop_config.json" 30 | ) 31 | if claude_config.parent.exists(): 32 | return claude_config 33 | 34 | # Linux path 35 | claude_config = home / ".config" / "Claude" / "claude_desktop_config.json" 36 | if claude_config.parent.exists(): 37 | return claude_config 38 | 39 | raise FileNotFoundError("Could not find Claude Desktop configuration directory") 40 | 41 | 42 | def get_current_directory(): 43 | """Get the current M3 project directory.""" 44 | return Path(__file__).parent.parent.absolute() 45 | 46 | 47 | def get_python_path(): 48 | """Get the Python executable path.""" 49 | # Try to use the current virtual environment 50 | if "VIRTUAL_ENV" in os.environ: 51 | venv_python = Path(os.environ["VIRTUAL_ENV"]) / "bin" / "python" 52 | if venv_python.exists(): 53 | return str(venv_python) 54 | 55 | # Fall back to system python 56 | return shutil.which("python") or shutil.which("python3") or "python" 57 | 58 | 59 | def create_mcp_config( 60 | backend="sqlite", 61 | db_path=None, 62 | project_id=None, 63 | oauth2_enabled=False, 64 | oauth2_config=None, 65 | ): 66 | """Create MCP server configuration.""" 67 | current_dir = get_current_directory() 68 | python_path = get_python_path() 69 | 70 | config = { 71 | "mcpServers": { 72 | "m3": { 73 | "command": python_path, 74 | "args": ["-m", "m3.mcp_server"], 75 | "cwd": str(current_dir), 76 | "env": {"PYTHONPATH": str(current_dir / "src"), "M3_BACKEND": backend}, 77 | } 78 | } 79 | } 80 | 81 | # Add backend-specific environment variables 82 | if backend == "sqlite" and db_path: 83 | config["mcpServers"]["m3"]["env"]["M3_DB_PATH"] = db_path 84 | elif backend == "bigquery" and project_id: 85 | config["mcpServers"]["m3"]["env"]["M3_PROJECT_ID"] = project_id 86 | config["mcpServers"]["m3"]["env"]["GOOGLE_CLOUD_PROJECT"] = project_id 87 | 88 | # Add OAuth2 configuration if enabled 89 | if oauth2_enabled and oauth2_config: 90 | config["mcpServers"]["m3"]["env"].update( 91 | { 92 | "M3_OAUTH2_ENABLED": "true", 93 | "M3_OAUTH2_ISSUER_URL": oauth2_config.get("issuer_url", ""), 94 | "M3_OAUTH2_AUDIENCE": oauth2_config.get("audience", ""), 95 | "M3_OAUTH2_REQUIRED_SCOPES": oauth2_config.get( 96 | "required_scopes", "read:mimic-data" 97 | ), 98 | "M3_OAUTH2_JWKS_URL": oauth2_config.get("jwks_url", ""), 99 | } 100 | ) 101 | 102 | # Optional OAuth2 settings 103 | if oauth2_config.get("client_id"): 104 | config["mcpServers"]["m3"]["env"]["M3_OAUTH2_CLIENT_ID"] = oauth2_config[ 105 | "client_id" 106 | ] 107 | if oauth2_config.get("rate_limit_requests"): 108 | config["mcpServers"]["m3"]["env"]["M3_OAUTH2_RATE_LIMIT_REQUESTS"] = str( 109 | oauth2_config["rate_limit_requests"] 110 | ) 111 | 112 | return config 113 | 114 | 115 | def setup_claude_desktop( 116 | backend="sqlite", 117 | db_path=None, 118 | project_id=None, 119 | oauth2_enabled=False, 120 | oauth2_config=None, 121 | ): 122 | """Setup Claude Desktop with M3 MCP server.""" 123 | try: 124 | claude_config_path = get_claude_config_path() 125 | print(f"Found Claude Desktop config at: {claude_config_path}") 126 | 127 | # Load existing config or create new one 128 | existing_config = {} 129 | if claude_config_path.exists() and claude_config_path.stat().st_size > 0: 130 | try: 131 | with open(claude_config_path) as f: 132 | existing_config = json.load(f) 133 | print("Loaded existing Claude Desktop configuration") 134 | except json.JSONDecodeError: 135 | print("Found corrupted config file, creating new configuration") 136 | existing_config = {} 137 | else: 138 | print("Creating new Claude Desktop configuration") 139 | 140 | # Create MCP config 141 | mcp_config = create_mcp_config( 142 | backend, db_path, project_id, oauth2_enabled, oauth2_config 143 | ) 144 | 145 | # Merge configurations 146 | if "mcpServers" not in existing_config: 147 | existing_config["mcpServers"] = {} 148 | 149 | existing_config["mcpServers"].update(mcp_config["mcpServers"]) 150 | 151 | # Ensure directory exists 152 | claude_config_path.parent.mkdir(parents=True, exist_ok=True) 153 | 154 | # Write updated config 155 | with open(claude_config_path, "w") as f: 156 | json.dump(existing_config, f, indent=2) 157 | 158 | print("✅ Successfully configured Claude Desktop!") 159 | print(f"📁 Config file: {claude_config_path}") 160 | print(f"🔧 Backend: {backend}") 161 | 162 | if backend == "sqlite": 163 | db_path_display = db_path or "default (m3_data/databases/mimic_iv_demo.db)" 164 | print(f"💾 Database: {db_path_display}") 165 | elif backend == "bigquery": 166 | project_display = project_id or "physionet-data" 167 | print(f"☁️ Project: {project_display}") 168 | 169 | if oauth2_enabled: 170 | print("🔐 OAuth2 Authentication: Enabled") 171 | if oauth2_config: 172 | print(f"🔗 Issuer: {oauth2_config.get('issuer_url', 'Not configured')}") 173 | print(f"👥 Audience: {oauth2_config.get('audience', 'Not configured')}") 174 | print( 175 | f"🔑 Required Scopes: {oauth2_config.get('required_scopes', 'read:mimic-data')}" 176 | ) 177 | print("\n⚠️ Security Notice:") 178 | print(" - OAuth2 authentication is now required for all API calls") 179 | print(" - Ensure you have a valid access token with the required scopes") 180 | print( 181 | " - Set M3_OAUTH2_TOKEN environment variable with your Bearer token" 182 | ) 183 | else: 184 | print("🔓 OAuth2 Authentication: Disabled") 185 | 186 | print("\n🔄 Please restart Claude Desktop to apply changes") 187 | 188 | return True 189 | 190 | except Exception as e: 191 | print(f"❌ Error setting up Claude Desktop: {e}") 192 | return False 193 | 194 | 195 | def main(): 196 | """Main setup function.""" 197 | import argparse 198 | 199 | parser = argparse.ArgumentParser( 200 | description="Setup M3 MCP Server with Claude Desktop" 201 | ) 202 | parser.add_argument( 203 | "--backend", 204 | choices=["sqlite", "bigquery"], 205 | default="sqlite", 206 | help="Backend to use (default: sqlite)", 207 | ) 208 | parser.add_argument( 209 | "--db-path", help="Path to SQLite database (for sqlite backend)" 210 | ) 211 | parser.add_argument( 212 | "--project-id", help="Google Cloud project ID (for bigquery backend)" 213 | ) 214 | parser.add_argument( 215 | "--enable-oauth2", action="store_true", help="Enable OAuth2 authentication" 216 | ) 217 | parser.add_argument( 218 | "--oauth2-issuer", help="OAuth2 issuer URL (e.g., https://auth.example.com)" 219 | ) 220 | parser.add_argument("--oauth2-audience", help="OAuth2 audience (e.g., m3-api)") 221 | parser.add_argument( 222 | "--oauth2-scopes", 223 | default="read:mimic-data", 224 | help="Required OAuth2 scopes (comma-separated)", 225 | ) 226 | 227 | args = parser.parse_args() 228 | 229 | # Validate backend-specific arguments 230 | if args.backend == "sqlite" and args.project_id: 231 | print("❌ Error: --project-id can only be used with --backend bigquery") 232 | exit(1) 233 | 234 | if args.backend == "bigquery" and args.db_path: 235 | print("❌ Error: --db-path can only be used with --backend sqlite") 236 | exit(1) 237 | 238 | # Require project_id for BigQuery backend 239 | if args.backend == "bigquery" and not args.project_id: 240 | print("❌ Error: --project-id is required when using --backend bigquery") 241 | exit(1) 242 | 243 | print("🚀 Setting up M3 MCP Server with Claude Desktop...") 244 | print(f"📊 Backend: {args.backend}") 245 | 246 | # Prepare OAuth2 configuration if enabled 247 | oauth2_config = None 248 | if args.enable_oauth2: 249 | if not args.oauth2_issuer or not args.oauth2_audience: 250 | print( 251 | "❌ Error: --oauth2-issuer and --oauth2-audience are required when --enable-oauth2 is used" 252 | ) 253 | exit(1) 254 | 255 | oauth2_config = { 256 | "issuer_url": args.oauth2_issuer, 257 | "audience": args.oauth2_audience, 258 | "required_scopes": args.oauth2_scopes, 259 | } 260 | 261 | success = setup_claude_desktop( 262 | backend=args.backend, 263 | db_path=args.db_path, 264 | project_id=args.project_id, 265 | oauth2_enabled=args.enable_oauth2, 266 | oauth2_config=oauth2_config, 267 | ) 268 | 269 | if success: 270 | print("\n🎉 Setup complete! You can now use M3 tools in Claude Desktop.") 271 | print( 272 | "\n💡 Try asking Claude: 'What tools do you have available for MIMIC-IV data?'" 273 | ) 274 | else: 275 | print("\n💥 Setup failed. Please check the error messages above.") 276 | exit(1) 277 | 278 | 279 | if __name__ == "__main__": 280 | main() 281 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import tempfile 3 | from pathlib import Path 4 | from unittest.mock import MagicMock, patch 5 | 6 | import pytest 7 | from typer.testing import CliRunner 8 | 9 | import m3.cli as cli_module 10 | from m3.cli import app 11 | 12 | runner = CliRunner() 13 | 14 | 15 | @pytest.fixture(autouse=True) 16 | def inject_version(monkeypatch): 17 | monkeypatch.setattr(cli_module, "__version__", "0.0.1") 18 | 19 | 20 | def test_help_shows_app_name(): 21 | result = runner.invoke(app, ["--help"]) 22 | assert result.exit_code == 0 23 | assert "M3 CLI" in result.stdout 24 | 25 | 26 | def test_version_option_exits_zero_and_shows_version(): 27 | result = runner.invoke(app, ["--version"]) 28 | assert result.exit_code == 0 29 | assert "M3 CLI Version: 0.0.1" in result.stdout 30 | 31 | 32 | def test_unknown_command_reports_error(): 33 | result = runner.invoke(app, ["not-a-cmd"]) 34 | assert result.exit_code != 0 35 | # Check both stdout and stderr since error messages might go to either depending on environment 36 | error_message = "No such command 'not-a-cmd'" 37 | assert ( 38 | error_message in result.stdout 39 | or (hasattr(result, "stderr") and error_message in result.stderr) 40 | or error_message in result.output 41 | ) 42 | 43 | 44 | @patch("m3.cli.initialize_dataset") 45 | @patch("sqlite3.connect") 46 | def test_init_command_respects_custom_db_path( 47 | mock_sqlite_connect, mock_initialize_dataset 48 | ): 49 | """Test that m3 init --db-path correctly uses custom database path override.""" 50 | # Setup mocks 51 | mock_initialize_dataset.return_value = True 52 | 53 | # Mock sqlite connection and cursor for verification query 54 | mock_cursor = mock_sqlite_connect.return_value.cursor.return_value 55 | mock_cursor.fetchone.return_value = (100,) # Mock row count result 56 | 57 | with tempfile.TemporaryDirectory() as temp_dir: 58 | custom_db_path = Path(temp_dir) / "custom_mimic.db" 59 | # Resolve the path to handle symlinks (like /var -> /private/var on macOS) 60 | resolved_custom_db_path = custom_db_path.resolve() 61 | 62 | # Run the init command with custom db path 63 | result = runner.invoke( 64 | app, ["init", "mimic-iv-demo", "--db-path", str(custom_db_path)] 65 | ) 66 | 67 | # Assert command succeeded 68 | assert result.exit_code == 0 69 | 70 | # Verify the output mentions the custom path (either original or resolved form) 71 | assert ( 72 | str(custom_db_path) in result.stdout 73 | or str(resolved_custom_db_path) in result.stdout 74 | ) 75 | assert "Target database path:" in result.stdout 76 | 77 | # Verify initialize_dataset was called with the resolved custom path 78 | mock_initialize_dataset.assert_called_once_with( 79 | dataset_name="mimic-iv-demo", db_target_path=resolved_custom_db_path 80 | ) 81 | 82 | # Verify sqlite connection was attempted with the resolved custom path 83 | mock_sqlite_connect.assert_called_with(resolved_custom_db_path) 84 | 85 | 86 | def test_config_validation_sqlite_with_project_id(): 87 | """Test that sqlite backend rejects project-id parameter.""" 88 | result = runner.invoke( 89 | app, ["config", "claude", "--backend", "sqlite", "--project-id", "test"] 90 | ) 91 | assert result.exit_code == 1 92 | # Check output - error messages from typer usually go to stdout 93 | assert "project-id can only be used with --backend bigquery" in result.output 94 | 95 | 96 | def test_config_validation_bigquery_with_db_path(): 97 | """Test that bigquery backend rejects db-path parameter.""" 98 | result = runner.invoke( 99 | app, ["config", "claude", "--backend", "bigquery", "--db-path", "/test/path"] 100 | ) 101 | assert result.exit_code == 1 102 | # Check output - error messages from typer usually go to stdout 103 | assert "db-path can only be used with --backend sqlite" in result.output 104 | 105 | 106 | def test_config_validation_bigquery_requires_project_id(): 107 | """Test that bigquery backend requires project-id parameter.""" 108 | result = runner.invoke(app, ["config", "claude", "--backend", "bigquery"]) 109 | assert result.exit_code == 1 110 | # Check output - error messages from typer usually go to stdout 111 | assert "project-id is required when using --backend bigquery" in result.output 112 | 113 | 114 | @patch("subprocess.run") 115 | def test_config_claude_success(mock_subprocess): 116 | """Test successful Claude Desktop configuration.""" 117 | mock_subprocess.return_value = MagicMock(returncode=0) 118 | 119 | result = runner.invoke(app, ["config", "claude"]) 120 | assert result.exit_code == 0 121 | assert "Claude Desktop configuration completed" in result.stdout 122 | 123 | # Verify subprocess was called with correct script 124 | mock_subprocess.assert_called_once() 125 | call_args = mock_subprocess.call_args[0][0] 126 | assert "setup_claude_desktop.py" in call_args[1] # Script path is second argument 127 | 128 | 129 | @patch("subprocess.run") 130 | def test_config_universal_quick_mode(mock_subprocess): 131 | """Test universal config generator in quick mode.""" 132 | mock_subprocess.return_value = MagicMock(returncode=0) 133 | 134 | result = runner.invoke(app, ["config", "--quick"]) 135 | assert result.exit_code == 0 136 | assert "Generating M3 MCP configuration" in result.stdout 137 | 138 | # Verify subprocess was called with dynamic config script 139 | mock_subprocess.assert_called_once() 140 | call_args = mock_subprocess.call_args[0][0] 141 | assert "dynamic_mcp_config.py" in call_args[1] # Script path is second argument 142 | assert "--quick" in call_args 143 | 144 | 145 | @patch("subprocess.run") 146 | def test_config_script_failure(mock_subprocess): 147 | """Test error handling when config script fails.""" 148 | mock_subprocess.side_effect = subprocess.CalledProcessError(1, "cmd") 149 | 150 | result = runner.invoke(app, ["config", "claude"]) 151 | assert result.exit_code == 1 152 | # Just verify that the command failed with the right exit code 153 | # The specific error message may vary 154 | -------------------------------------------------------------------------------- /tests/test_config.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from m3.config import ( 4 | get_dataset_config, 5 | get_dataset_raw_files_path, 6 | get_default_database_path, 7 | ) 8 | 9 | 10 | def test_get_dataset_config_known(): 11 | cfg = get_dataset_config("mimic-iv-demo") 12 | assert isinstance(cfg, dict) 13 | assert cfg.get("default_db_filename") == "mimic_iv_demo.db" 14 | 15 | 16 | def test_get_dataset_config_unknown(): 17 | assert get_dataset_config("not-a-dataset") is None 18 | 19 | 20 | def test_default_paths(tmp_path, monkeypatch): 21 | # Redirect default dirs to a temp location 22 | import m3.config as cfg_mod 23 | 24 | monkeypatch.setattr(cfg_mod, "DEFAULT_DATABASES_DIR", tmp_path / "dbs") 25 | monkeypatch.setattr(cfg_mod, "DEFAULT_RAW_FILES_DIR", tmp_path / "raw") 26 | db_path = get_default_database_path("mimic-iv-demo") 27 | raw_path = get_dataset_raw_files_path("mimic-iv-demo") 28 | # They should be Path objects and exist 29 | assert isinstance(db_path, Path) 30 | assert db_path.parent.exists() 31 | assert isinstance(raw_path, Path) 32 | assert raw_path.exists() 33 | 34 | 35 | def test_raw_path_includes_dataset_name(tmp_path, monkeypatch): 36 | import m3.config as cfg_mod 37 | 38 | monkeypatch.setattr(cfg_mod, "DEFAULT_RAW_FILES_DIR", tmp_path / "raw") 39 | raw_path = get_dataset_raw_files_path("mimic-iv-demo") 40 | assert "mimic-iv-demo" in str(raw_path) 41 | -------------------------------------------------------------------------------- /tests/test_config_scripts.py: -------------------------------------------------------------------------------- 1 | """Tests for MCP configuration scripts.""" 2 | 3 | import sys 4 | from pathlib import Path 5 | from unittest.mock import patch 6 | 7 | import pytest 8 | 9 | sys.path.insert(0, str(Path(__file__).parent.parent / "src")) 10 | 11 | from m3.mcp_client_configs.dynamic_mcp_config import MCPConfigGenerator 12 | 13 | 14 | class TestMCPConfigGenerator: 15 | """Test the MCPConfigGenerator class.""" 16 | 17 | def test_generate_config_sqlite_default(self): 18 | """Test generating SQLite config with defaults.""" 19 | generator = MCPConfigGenerator() 20 | 21 | with ( 22 | patch.object(generator, "_validate_python_path", return_value=True), 23 | patch.object(generator, "_validate_directory", return_value=True), 24 | ): 25 | config = generator.generate_config() 26 | 27 | assert config["mcpServers"]["m3"]["env"]["M3_BACKEND"] == "sqlite" 28 | assert "M3_PROJECT_ID" not in config["mcpServers"]["m3"]["env"] 29 | assert config["mcpServers"]["m3"]["args"] == ["-m", "m3.mcp_server"] 30 | 31 | def test_generate_config_bigquery_with_project(self): 32 | """Test generating BigQuery config with project ID.""" 33 | generator = MCPConfigGenerator() 34 | 35 | with ( 36 | patch.object(generator, "_validate_python_path", return_value=True), 37 | patch.object(generator, "_validate_directory", return_value=True), 38 | ): 39 | config = generator.generate_config( 40 | backend="bigquery", project_id="test-project" 41 | ) 42 | 43 | assert config["mcpServers"]["m3"]["env"]["M3_BACKEND"] == "bigquery" 44 | assert config["mcpServers"]["m3"]["env"]["M3_PROJECT_ID"] == "test-project" 45 | assert ( 46 | config["mcpServers"]["m3"]["env"]["GOOGLE_CLOUD_PROJECT"] 47 | == "test-project" 48 | ) 49 | 50 | def test_generate_config_sqlite_with_db_path(self): 51 | """Test generating SQLite config with custom database path.""" 52 | generator = MCPConfigGenerator() 53 | 54 | with ( 55 | patch.object(generator, "_validate_python_path", return_value=True), 56 | patch.object(generator, "_validate_directory", return_value=True), 57 | ): 58 | config = generator.generate_config( 59 | backend="sqlite", db_path="/custom/path/database.db" 60 | ) 61 | 62 | assert config["mcpServers"]["m3"]["env"]["M3_BACKEND"] == "sqlite" 63 | assert ( 64 | config["mcpServers"]["m3"]["env"]["M3_DB_PATH"] 65 | == "/custom/path/database.db" 66 | ) 67 | 68 | def test_generate_config_custom_server_name(self): 69 | """Test generating config with custom server name.""" 70 | generator = MCPConfigGenerator() 71 | 72 | with ( 73 | patch.object(generator, "_validate_python_path", return_value=True), 74 | patch.object(generator, "_validate_directory", return_value=True), 75 | ): 76 | config = generator.generate_config(server_name="custom-m3") 77 | 78 | assert "custom-m3" in config["mcpServers"] 79 | assert "m3" not in config["mcpServers"] 80 | 81 | def test_generate_config_additional_env_vars(self): 82 | """Test generating config with additional environment variables.""" 83 | generator = MCPConfigGenerator() 84 | 85 | with ( 86 | patch.object(generator, "_validate_python_path", return_value=True), 87 | patch.object(generator, "_validate_directory", return_value=True), 88 | ): 89 | config = generator.generate_config( 90 | additional_env={"DEBUG": "true", "LOG_LEVEL": "info"} 91 | ) 92 | 93 | env = config["mcpServers"]["m3"]["env"] 94 | assert env["DEBUG"] == "true" 95 | assert env["LOG_LEVEL"] == "info" 96 | assert env["M3_BACKEND"] == "sqlite" # Default should still be there 97 | 98 | def test_validation_invalid_python_path(self): 99 | """Test that invalid Python path raises error.""" 100 | generator = MCPConfigGenerator() 101 | 102 | with ( 103 | patch.object(generator, "_validate_python_path", return_value=False), 104 | patch.object(generator, "_validate_directory", return_value=True), 105 | ): 106 | with pytest.raises(ValueError, match="Invalid Python path"): 107 | generator.generate_config(python_path="/invalid/python") 108 | 109 | def test_validation_invalid_directory(self): 110 | """Test that invalid working directory raises error.""" 111 | generator = MCPConfigGenerator() 112 | 113 | with ( 114 | patch.object(generator, "_validate_python_path", return_value=True), 115 | patch.object(generator, "_validate_directory", return_value=False), 116 | ): 117 | with pytest.raises(ValueError, match="Invalid working directory"): 118 | generator.generate_config(working_directory="/invalid/dir") 119 | -------------------------------------------------------------------------------- /tests/test_data_io.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | from m3.data_io import COMMON_USER_AGENT, _scrape_urls_from_html_page 4 | 5 | 6 | class DummyResponse: 7 | def __init__(self, content, status_code=200, headers=None): 8 | self.content = content.encode() 9 | self.status_code = status_code 10 | self.headers = headers or {} 11 | 12 | def raise_for_status(self): 13 | if not (200 <= self.status_code < 300): 14 | raise requests.exceptions.HTTPError(response=self) 15 | 16 | @property 17 | def reason(self): 18 | return "Error" 19 | 20 | def iter_content(self, chunk_size=1): 21 | yield from self.content 22 | 23 | 24 | def test_scrape_urls(monkeypatch): 25 | html = ( 26 | "<html><body>" 27 | '<a href="file1.csv.gz">ok</a>' 28 | '<a href="skip.txt">no</a>' 29 | "</body></html>" 30 | ) 31 | dummy = DummyResponse(html) 32 | session = requests.Session() 33 | monkeypatch.setattr(session, "get", lambda url, timeout=None: dummy) 34 | urls = _scrape_urls_from_html_page("http://example.com/", session) 35 | assert urls == ["http://example.com/file1.csv.gz"] 36 | 37 | 38 | def test_scrape_no_matching_suffix(monkeypatch): 39 | html = '<html><body><a href="file1.txt">ok</a></body></html>' 40 | dummy = DummyResponse(html) 41 | session = requests.Session() 42 | monkeypatch.setattr(session, "get", lambda url, timeout=None: dummy) 43 | urls = _scrape_urls_from_html_page("http://example.com/", session) 44 | assert urls == [] 45 | 46 | 47 | def test_common_user_agent_header(): 48 | # Ensure the constant is set and looks like a UA string 49 | assert isinstance(COMMON_USER_AGENT, str) 50 | assert "Mozilla/" in COMMON_USER_AGENT 51 | -------------------------------------------------------------------------------- /tests/test_example.py: -------------------------------------------------------------------------------- 1 | def test_always_passes(): 2 | """ 3 | A simple placeholder test that always passes. 4 | This ensures the test runner is configured correctly. 5 | """ 6 | assert True 7 | -------------------------------------------------------------------------------- /tests/test_mcp_server.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for the MCP server functionality. 3 | """ 4 | 5 | import os 6 | import sqlite3 7 | from pathlib import Path 8 | from unittest.mock import Mock, patch 9 | 10 | import pytest 11 | from fastmcp import Client 12 | 13 | # Mock the database path check during import to handle CI environments 14 | with patch("pathlib.Path.exists", return_value=True): 15 | with patch( 16 | "m3.mcp_server.get_default_database_path", return_value=Path("/fake/test.db") 17 | ): 18 | from m3.mcp_server import _init_backend, mcp 19 | 20 | 21 | def _bigquery_available(): 22 | """Check if BigQuery dependencies are available.""" 23 | try: 24 | import importlib.util 25 | 26 | return importlib.util.find_spec("google.cloud.bigquery") is not None 27 | except ImportError: 28 | return False 29 | 30 | 31 | class TestMCPServerSetup: 32 | """Test MCP server setup and configuration.""" 33 | 34 | def test_server_instance_exists(self): 35 | """Test that the FastMCP server instance exists.""" 36 | assert mcp is not None 37 | assert mcp.name == "m3" 38 | 39 | def test_backend_init_sqlite_default(self): 40 | """Test SQLite backend initialization with defaults.""" 41 | with patch.dict(os.environ, {"M3_BACKEND": "sqlite"}, clear=True): 42 | with patch("m3.mcp_server.get_default_database_path") as mock_path: 43 | mock_path.return_value = Path("/fake/path.db") 44 | with patch("pathlib.Path.exists", return_value=True): 45 | _init_backend() 46 | # If no exception raised, initialization succeeded 47 | 48 | def test_backend_init_sqlite_custom_path(self): 49 | """Test SQLite backend initialization with custom path.""" 50 | with patch.dict( 51 | os.environ, 52 | {"M3_BACKEND": "sqlite", "M3_DB_PATH": "/custom/path.db"}, 53 | clear=True, 54 | ): 55 | with patch("pathlib.Path.exists", return_value=True): 56 | _init_backend() 57 | # If no exception raised, initialization succeeded 58 | 59 | def test_backend_init_sqlite_missing_db(self): 60 | """Test SQLite backend initialization with missing database.""" 61 | with patch.dict(os.environ, {"M3_BACKEND": "sqlite"}, clear=True): 62 | with patch("m3.mcp_server.get_default_database_path") as mock_path: 63 | mock_path.return_value = Path("/fake/path.db") 64 | with patch("pathlib.Path.exists", return_value=False): 65 | with pytest.raises(FileNotFoundError): 66 | _init_backend() 67 | 68 | @pytest.mark.skipif( 69 | not _bigquery_available(), reason="BigQuery dependencies not available" 70 | ) 71 | def test_backend_init_bigquery(self): 72 | """Test BigQuery backend initialization.""" 73 | with patch.dict( 74 | os.environ, 75 | {"M3_BACKEND": "bigquery", "M3_PROJECT_ID": "test-project"}, 76 | clear=True, 77 | ): 78 | with patch("google.cloud.bigquery.Client") as mock_client: 79 | mock_client.return_value = Mock() 80 | _init_backend() 81 | # If no exception raised, initialization succeeded 82 | mock_client.assert_called_once_with(project="test-project") 83 | 84 | def test_backend_init_invalid(self): 85 | """Test initialization with invalid backend.""" 86 | with patch.dict(os.environ, {"M3_BACKEND": "invalid"}, clear=True): 87 | with pytest.raises(ValueError, match="Unsupported backend"): 88 | _init_backend() 89 | 90 | 91 | class TestMCPTools: 92 | """Test MCP tools functionality.""" 93 | 94 | @pytest.fixture 95 | def test_db(self, tmp_path): 96 | """Create a test SQLite database.""" 97 | db_path = tmp_path / "test.db" 98 | 99 | # Create test database with MIMIC-IV-like structure 100 | conn = sqlite3.connect(db_path) 101 | cursor = conn.cursor() 102 | 103 | # Create icu_icustays table 104 | cursor.execute(""" 105 | CREATE TABLE icu_icustays ( 106 | subject_id INTEGER, 107 | hadm_id INTEGER, 108 | stay_id INTEGER, 109 | intime TEXT, 110 | outtime TEXT 111 | ) 112 | """) 113 | cursor.execute(""" 114 | INSERT INTO icu_icustays (subject_id, hadm_id, stay_id, intime, outtime) 115 | VALUES 116 | (10000032, 20000001, 30000001, '2180-07-23 15:00:00', '2180-07-24 12:00:00'), 117 | (10000033, 20000002, 30000002, '2180-08-15 10:30:00', '2180-08-16 14:15:00') 118 | """) 119 | 120 | # Create hosp_labevents table 121 | cursor.execute(""" 122 | CREATE TABLE hosp_labevents ( 123 | subject_id INTEGER, 124 | hadm_id INTEGER, 125 | itemid INTEGER, 126 | charttime TEXT, 127 | value TEXT 128 | ) 129 | """) 130 | cursor.execute(""" 131 | INSERT INTO hosp_labevents (subject_id, hadm_id, itemid, charttime, value) 132 | VALUES 133 | (10000032, 20000001, 50912, '2180-07-23 16:00:00', '120'), 134 | (10000033, 20000002, 50912, '2180-08-15 11:00:00', '95') 135 | """) 136 | 137 | conn.commit() 138 | conn.close() 139 | 140 | return str(db_path) 141 | 142 | @pytest.mark.asyncio 143 | async def test_tools_via_client(self, test_db): 144 | """Test MCP tools through the FastMCP client.""" 145 | # Set up environment for SQLite backend with OAuth2 disabled 146 | with patch.dict( 147 | os.environ, 148 | { 149 | "M3_BACKEND": "sqlite", 150 | "M3_DB_PATH": test_db, 151 | "M3_OAUTH2_ENABLED": "false", 152 | }, 153 | clear=True, 154 | ): 155 | # Initialize backend 156 | _init_backend() 157 | 158 | # Test via FastMCP client 159 | async with Client(mcp) as client: 160 | # Test execute_mimic_query tool 161 | result = await client.call_tool( 162 | "execute_mimic_query", 163 | {"sql_query": "SELECT COUNT(*) as count FROM icu_icustays"}, 164 | ) 165 | result_text = str(result) 166 | assert "count" in result_text 167 | assert "2" in result_text 168 | 169 | # Test get_icu_stays tool 170 | result = await client.call_tool( 171 | "get_icu_stays", {"patient_id": 10000032, "limit": 10} 172 | ) 173 | result_text = str(result) 174 | assert "10000032" in result_text 175 | 176 | # Test get_lab_results tool 177 | result = await client.call_tool( 178 | "get_lab_results", {"patient_id": 10000032, "limit": 20} 179 | ) 180 | result_text = str(result) 181 | assert "10000032" in result_text 182 | 183 | # Test get_database_schema tool 184 | result = await client.call_tool("get_database_schema", {}) 185 | result_text = str(result) 186 | assert "icu_icustays" in result_text or "hosp_labevents" in result_text 187 | 188 | @pytest.mark.asyncio 189 | async def test_security_checks(self, test_db): 190 | """Test SQL injection protection.""" 191 | with patch.dict( 192 | os.environ, 193 | { 194 | "M3_BACKEND": "sqlite", 195 | "M3_DB_PATH": test_db, 196 | "M3_OAUTH2_ENABLED": "false", 197 | }, 198 | clear=True, 199 | ): 200 | _init_backend() 201 | 202 | async with Client(mcp) as client: 203 | # Test dangerous queries are blocked 204 | dangerous_queries = [ 205 | "UPDATE icu_icustays SET subject_id = 999", 206 | "DELETE FROM icu_icustays", 207 | "INSERT INTO icu_icustays VALUES (1, 2, 3, '2020-01-01', '2020-01-02')", 208 | "DROP TABLE icu_icustays", 209 | "CREATE TABLE test (id INTEGER)", 210 | "ALTER TABLE icu_icustays ADD COLUMN test TEXT", 211 | ] 212 | 213 | for query in dangerous_queries: 214 | result = await client.call_tool( 215 | "execute_mimic_query", {"sql_query": query} 216 | ) 217 | result_text = str(result) 218 | assert ( 219 | "Security Error:" in result_text 220 | and "Only SELECT" in result_text 221 | ) 222 | 223 | @pytest.mark.asyncio 224 | async def test_invalid_sql(self, test_db): 225 | """Test handling of invalid SQL.""" 226 | with patch.dict( 227 | os.environ, 228 | { 229 | "M3_BACKEND": "sqlite", 230 | "M3_DB_PATH": test_db, 231 | "M3_OAUTH2_ENABLED": "false", 232 | }, 233 | clear=True, 234 | ): 235 | _init_backend() 236 | 237 | async with Client(mcp) as client: 238 | result = await client.call_tool( 239 | "execute_mimic_query", {"sql_query": "INVALID SQL QUERY"} 240 | ) 241 | result_text = str(result) 242 | assert "Query Failed:" in result_text and "syntax error" in result_text 243 | 244 | @pytest.mark.asyncio 245 | async def test_empty_results(self, test_db): 246 | """Test handling of queries with no results.""" 247 | with patch.dict( 248 | os.environ, 249 | { 250 | "M3_BACKEND": "sqlite", 251 | "M3_DB_PATH": test_db, 252 | "M3_OAUTH2_ENABLED": "false", 253 | }, 254 | clear=True, 255 | ): 256 | _init_backend() 257 | 258 | async with Client(mcp) as client: 259 | result = await client.call_tool( 260 | "execute_mimic_query", 261 | { 262 | "sql_query": "SELECT * FROM icu_icustays WHERE subject_id = 999999" 263 | }, 264 | ) 265 | result_text = str(result) 266 | assert "No results found" in result_text 267 | 268 | @pytest.mark.asyncio 269 | async def test_oauth2_authentication_required(self, test_db): 270 | """Test that OAuth2 authentication is required when enabled.""" 271 | # Set up environment for SQLite backend with OAuth2 enabled 272 | with patch.dict( 273 | os.environ, 274 | { 275 | "M3_BACKEND": "sqlite", 276 | "M3_DB_PATH": test_db, 277 | "M3_OAUTH2_ENABLED": "true", 278 | "M3_OAUTH2_ISSUER_URL": "https://auth.example.com", 279 | "M3_OAUTH2_AUDIENCE": "m3-api", 280 | }, 281 | clear=True, 282 | ): 283 | _init_backend() 284 | 285 | async with Client(mcp) as client: 286 | # Test that tools require authentication 287 | result = await client.call_tool( 288 | "execute_mimic_query", 289 | {"sql_query": "SELECT COUNT(*) FROM icu_icustays"}, 290 | ) 291 | result_text = str(result) 292 | assert "Missing OAuth2 access token" in result_text 293 | 294 | 295 | class TestBigQueryIntegration: 296 | """Test BigQuery integration with mocks (no real API calls).""" 297 | 298 | @pytest.mark.skipif( 299 | not _bigquery_available(), reason="BigQuery dependencies not available" 300 | ) 301 | @pytest.mark.asyncio 302 | async def test_bigquery_tools(self): 303 | """Test BigQuery tools functionality with mocks.""" 304 | with patch.dict( 305 | os.environ, 306 | {"M3_BACKEND": "bigquery", "M3_PROJECT_ID": "test-project"}, 307 | clear=True, 308 | ): 309 | with patch("google.cloud.bigquery.Client") as mock_client: 310 | # Mock BigQuery client and query results 311 | mock_job = Mock() 312 | mock_df = Mock() 313 | mock_df.empty = False 314 | mock_df.to_string.return_value = "Mock BigQuery result" 315 | mock_df.__len__ = Mock(return_value=5) 316 | mock_job.to_dataframe.return_value = mock_df 317 | 318 | mock_client_instance = Mock() 319 | mock_client_instance.query.return_value = mock_job 320 | mock_client.return_value = mock_client_instance 321 | 322 | _init_backend() 323 | 324 | async with Client(mcp) as client: 325 | # Test execute_mimic_query tool 326 | result = await client.call_tool( 327 | "execute_mimic_query", 328 | { 329 | "sql_query": "SELECT COUNT(*) FROM `physionet-data.mimiciv_3_1_icu.icustays`" 330 | }, 331 | ) 332 | result_text = str(result) 333 | assert "Mock BigQuery result" in result_text 334 | 335 | # Test get_race_distribution tool 336 | result = await client.call_tool( 337 | "get_race_distribution", {"limit": 5} 338 | ) 339 | result_text = str(result) 340 | assert "Mock BigQuery result" in result_text 341 | 342 | # Verify BigQuery client was called 343 | mock_client.assert_called_once_with(project="test-project") 344 | assert mock_client_instance.query.called 345 | 346 | 347 | class TestServerIntegration: 348 | """Test overall server integration.""" 349 | 350 | def test_server_main_function_exists(self): 351 | """Test that the main function exists and is callable.""" 352 | from m3.mcp_server import main 353 | 354 | assert callable(main) 355 | 356 | def test_server_can_be_imported_as_module(self): 357 | """Test that the server can be imported as a module.""" 358 | import m3.mcp_server 359 | 360 | assert hasattr(m3.mcp_server, "mcp") 361 | assert hasattr(m3.mcp_server, "main") 362 | -------------------------------------------------------------------------------- /tests/test_oauth2_basic.py: -------------------------------------------------------------------------------- 1 | """ 2 | Basic OAuth2 authentication tests. 3 | """ 4 | 5 | import os 6 | from unittest.mock import patch 7 | 8 | import pytest 9 | 10 | from m3.auth import ( 11 | OAuth2Config, 12 | init_oauth2, 13 | is_oauth2_enabled, 14 | require_oauth2, 15 | ) 16 | 17 | 18 | class TestOAuth2BasicConfig: 19 | """Test basic OAuth2 configuration.""" 20 | 21 | def test_oauth2_disabled_by_default(self): 22 | """Test that OAuth2 is disabled by default.""" 23 | with patch.dict(os.environ, {}, clear=True): 24 | config = OAuth2Config() 25 | assert not config.enabled 26 | 27 | def test_oauth2_enabled_configuration(self): 28 | """Test OAuth2 enabled configuration.""" 29 | env_vars = { 30 | "M3_OAUTH2_ENABLED": "true", 31 | "M3_OAUTH2_ISSUER_URL": "https://auth.example.com", 32 | "M3_OAUTH2_AUDIENCE": "m3-api", 33 | "M3_OAUTH2_REQUIRED_SCOPES": "read:mimic-data,write:mimic-data", 34 | } 35 | 36 | with patch.dict(os.environ, env_vars, clear=True): 37 | config = OAuth2Config() 38 | assert config.enabled 39 | assert config.issuer_url == "https://auth.example.com" 40 | assert config.audience == "m3-api" 41 | assert config.required_scopes == {"read:mimic-data", "write:mimic-data"} 42 | 43 | def test_oauth2_invalid_configuration_raises_error(self): 44 | """Test that invalid OAuth2 configuration raises an error.""" 45 | with patch.dict(os.environ, {"M3_OAUTH2_ENABLED": "true"}, clear=True): 46 | with pytest.raises(ValueError, match="M3_OAUTH2_ISSUER_URL is required"): 47 | OAuth2Config() 48 | 49 | def test_jwks_url_auto_discovery(self): 50 | """Test automatic JWKS URL discovery.""" 51 | env_vars = { 52 | "M3_OAUTH2_ENABLED": "true", 53 | "M3_OAUTH2_ISSUER_URL": "https://auth.example.com", 54 | "M3_OAUTH2_AUDIENCE": "m3-api", 55 | } 56 | 57 | with patch.dict(os.environ, env_vars, clear=True): 58 | config = OAuth2Config() 59 | assert config.jwks_url == "https://auth.example.com/.well-known/jwks.json" 60 | 61 | def test_scope_parsing(self): 62 | """Test scope parsing from environment variable.""" 63 | config = OAuth2Config() 64 | 65 | # Test comma-separated scopes 66 | scopes = config._parse_scopes("read:data, write:data, admin") 67 | assert scopes == {"read:data", "write:data", "admin"} 68 | 69 | # Test empty scopes 70 | scopes = config._parse_scopes("") 71 | assert scopes == set() 72 | 73 | 74 | class TestOAuth2BasicIntegration: 75 | """Test basic OAuth2 integration functions.""" 76 | 77 | def test_init_oauth2_disabled(self): 78 | """Test OAuth2 initialization when disabled.""" 79 | with patch.dict(os.environ, {}, clear=True): 80 | init_oauth2() 81 | assert not is_oauth2_enabled() 82 | 83 | def test_init_oauth2_enabled(self): 84 | """Test OAuth2 initialization when enabled.""" 85 | env_vars = { 86 | "M3_OAUTH2_ENABLED": "true", 87 | "M3_OAUTH2_ISSUER_URL": "https://auth.example.com", 88 | "M3_OAUTH2_AUDIENCE": "m3-api", 89 | } 90 | 91 | with patch.dict(os.environ, env_vars, clear=True): 92 | init_oauth2() 93 | assert is_oauth2_enabled() 94 | 95 | 96 | class TestOAuth2BasicDecorator: 97 | """Test basic OAuth2 decorator functionality.""" 98 | 99 | def setup_method(self): 100 | """Set up test fixtures.""" 101 | # Reset global state 102 | import m3.auth 103 | 104 | m3.auth._oauth2_config = None 105 | m3.auth._oauth2_validator = None 106 | 107 | def test_decorator_with_oauth2_disabled(self): 108 | """Test decorator behavior when OAuth2 is disabled.""" 109 | 110 | @require_oauth2 111 | def test_function(): 112 | return "success" 113 | 114 | with patch.dict(os.environ, {}, clear=True): 115 | init_oauth2() 116 | 117 | # Should allow access when OAuth2 is disabled 118 | result = test_function() 119 | assert result == "success" 120 | 121 | def test_decorator_with_missing_token(self): 122 | """Test decorator behavior with missing token.""" 123 | 124 | @require_oauth2 125 | def test_function(): 126 | return "success" 127 | 128 | env_vars = { 129 | "M3_OAUTH2_ENABLED": "true", 130 | "M3_OAUTH2_ISSUER_URL": "https://auth.example.com", 131 | "M3_OAUTH2_AUDIENCE": "m3-api", 132 | } 133 | 134 | with patch.dict(os.environ, env_vars, clear=True): 135 | init_oauth2() 136 | 137 | # Should return error when token is missing 138 | result = test_function() 139 | assert "Missing OAuth2 access token" in result 140 | 141 | def test_decorator_with_invalid_token_format(self): 142 | """Test decorator behavior with invalid token format.""" 143 | 144 | @require_oauth2 145 | def test_function(): 146 | return "success" 147 | 148 | env_vars = { 149 | "M3_OAUTH2_ENABLED": "true", 150 | "M3_OAUTH2_ISSUER_URL": "https://auth.example.com", 151 | "M3_OAUTH2_AUDIENCE": "m3-api", 152 | "M3_OAUTH2_TOKEN": "invalid-token", 153 | } 154 | 155 | with patch.dict(os.environ, env_vars, clear=True): 156 | init_oauth2() 157 | 158 | # Should return error with invalid token format 159 | result = test_function() 160 | assert "Invalid token format" in result 161 | 162 | def test_decorator_with_valid_jwt_format(self): 163 | """Test decorator behavior with valid JWT format.""" 164 | 165 | @require_oauth2 166 | def test_function(): 167 | return "success" 168 | 169 | # Valid JWT format (header.payload.signature) 170 | valid_jwt = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiYWRtaW4iOnRydWV9.signature" 171 | 172 | env_vars = { 173 | "M3_OAUTH2_ENABLED": "true", 174 | "M3_OAUTH2_ISSUER_URL": "https://auth.example.com", 175 | "M3_OAUTH2_AUDIENCE": "m3-api", 176 | "M3_OAUTH2_TOKEN": f"Bearer {valid_jwt}", 177 | } 178 | 179 | with patch.dict(os.environ, env_vars, clear=True): 180 | init_oauth2() 181 | 182 | # Should work with valid JWT format 183 | result = test_function() 184 | assert result == "success" 185 | 186 | def test_decorator_with_bearer_prefix_removal(self): 187 | """Test that Bearer prefix is correctly removed.""" 188 | 189 | @require_oauth2 190 | def test_function(): 191 | return "success" 192 | 193 | # Valid JWT format (header.payload.signature) 194 | valid_jwt = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiYWRtaW4iOnRydWV9.signature" 195 | 196 | env_vars = { 197 | "M3_OAUTH2_ENABLED": "true", 198 | "M3_OAUTH2_ISSUER_URL": "https://auth.example.com", 199 | "M3_OAUTH2_AUDIENCE": "m3-api", 200 | "M3_OAUTH2_TOKEN": f"Bearer {valid_jwt}", 201 | } 202 | 203 | with patch.dict(os.environ, env_vars, clear=True): 204 | init_oauth2() 205 | 206 | # Should work even with Bearer prefix 207 | result = test_function() 208 | assert result == "success" 209 | -------------------------------------------------------------------------------- /webapp/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # production 12 | /build 13 | 14 | # misc 15 | .DS_Store 16 | .env.local 17 | .env.development.local 18 | .env.test.local 19 | .env.production.local 20 | 21 | npm-debug.log* 22 | yarn-debug.log* 23 | yarn-error.log* 24 | -------------------------------------------------------------------------------- /webapp/README.md: -------------------------------------------------------------------------------- 1 | # M3 Webapp README 2 | 3 | This file provides instructions on how to run and build the M3 web application. 4 | 5 | ## Setup 6 | ```bash 7 | cd webapp # Navigate to webapp directory 8 | npm install # Install dependencies 9 | ``` 10 | 11 | ## Available Scripts 12 | 13 | In the project directory, you can run: 14 | 15 | ### `npm start` 16 | 17 | Runs the app in the development mode.\ 18 | Open [http://localhost:3000](http://localhost:3000) to view it in the browser. 19 | 20 | The page will reload if you make edits. 21 | 22 | ### `npm run build` 23 | 24 | Builds the app for production to the `build` folder. 25 | -------------------------------------------------------------------------------- /webapp/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "frontend", 3 | "version": "0.1.0", 4 | "private": true, 5 | "homepage": "https://rafiattrach.github.io/m3", 6 | "dependencies": { 7 | "@emailjs/browser": "^4.4.1", 8 | "@testing-library/dom": "^10.4.0", 9 | "@testing-library/jest-dom": "^6.6.3", 10 | "@testing-library/react": "^16.3.0", 11 | "@testing-library/user-event": "^13.5.0", 12 | "react": "^19.1.0", 13 | "react-dom": "^19.1.0", 14 | "react-router-dom": "^7.6.2", 15 | "react-scripts": "5.0.1", 16 | "web-vitals": "^2.1.4" 17 | }, 18 | "scripts": { 19 | "start": "react-scripts start", 20 | "build": "react-scripts build", 21 | "test": "react-scripts test", 22 | "eject": "react-scripts eject" 23 | }, 24 | "eslintConfig": { 25 | "extends": [ 26 | "react-app", 27 | "react-app/jest" 28 | ] 29 | }, 30 | "browserslist": { 31 | "production": [ 32 | ">0.2%", 33 | "not dead", 34 | "not op_mini all" 35 | ], 36 | "development": [ 37 | "last 1 chrome version", 38 | "last 1 firefox version", 39 | "last 1 safari version" 40 | ] 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /webapp/public/banner1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/banner1.png -------------------------------------------------------------------------------- /webapp/public/banner2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/banner2.png -------------------------------------------------------------------------------- /webapp/public/banner3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/banner3.png -------------------------------------------------------------------------------- /webapp/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/favicon.ico -------------------------------------------------------------------------------- /webapp/public/index.html: -------------------------------------------------------------------------------- 1 | <!DOCTYPE html> 2 | <html lang="en"> 3 | <head> 4 | <meta charset="utf-8" /> 5 | <link rel="icon" href="%PUBLIC_URL%/m3_logo.png" /> 6 | <meta name="viewport" content="width=device-width, initial-scale=1" /> 7 | <meta name="theme-color" content="#000000" /> 8 | <meta 9 | name="description" 10 | content="Web site created using create-react-app" 11 | /> 12 | <link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" /> 13 | <!-- 14 | manifest.json provides metadata used when your web app is installed on a 15 | user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/ 16 | --> 17 | <link rel="manifest" href="%PUBLIC_URL%/manifest.json" /> 18 | <!-- 19 | Notice the use of %PUBLIC_URL% in the tags above. 20 | It will be replaced with the URL of the `public` folder during the build. 21 | Only files inside the `public` folder can be referenced from the HTML. 22 | 23 | Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will 24 | work correctly both with client-side routing and a non-root public URL. 25 | Learn how to configure a non-root public URL by running `npm run build`. 26 | --> 27 | <title>M3 - MCP for EHRs 28 | 29 | 30 | 31 |
32 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /webapp/public/logo192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/logo192.png -------------------------------------------------------------------------------- /webapp/public/logo512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/logo512.png -------------------------------------------------------------------------------- /webapp/public/m3_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/m3_architecture.png -------------------------------------------------------------------------------- /webapp/public/m3_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/m3_logo.png -------------------------------------------------------------------------------- /webapp/public/m3_logo_transparent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/m3_logo_transparent.png -------------------------------------------------------------------------------- /webapp/public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "M3", 3 | "name": "M3-MCP for EHRs", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "64x64 32x32 24x24 16x16", 8 | "type": "image/x-icon" 9 | }, 10 | { 11 | "src": "logo192.png", 12 | "type": "image/png", 13 | "sizes": "192x192" 14 | }, 15 | { 16 | "src": "logo512.png", 17 | "type": "image/png", 18 | "sizes": "512x512" 19 | } 20 | ], 21 | "start_url": ".", 22 | "display": "standalone", 23 | "theme_color": "#000000", 24 | "background_color": "#ffffff" 25 | } 26 | -------------------------------------------------------------------------------- /webapp/public/robots.txt: -------------------------------------------------------------------------------- 1 | # https://www.robotstxt.org/robotstxt.html 2 | User-agent: * 3 | Disallow: 4 | -------------------------------------------------------------------------------- /webapp/public/videos/m3_website_1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/videos/m3_website_1.mp4 -------------------------------------------------------------------------------- /webapp/public/videos/m3_website_2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/videos/m3_website_2.mp4 -------------------------------------------------------------------------------- /webapp/public/videos/m3_website_3.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/videos/m3_website_3.mp4 -------------------------------------------------------------------------------- /webapp/public/videos/m3_website_4.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/videos/m3_website_4.mp4 -------------------------------------------------------------------------------- /webapp/src/App.js: -------------------------------------------------------------------------------- 1 | import React, { useEffect } from 'react'; 2 | import './App.css'; 3 | import Header from './components/Header'; 4 | import Hero from './components/Hero'; 5 | import Contact from './components/Contact'; 6 | import Paper from './components/Paper'; 7 | import Demos from './components/Demos'; 8 | import Explanation from './components/Explanation'; 9 | import Features from './components/Features'; 10 | import Citation from './components/Citation'; 11 | import CTA from './components/CTA'; 12 | import Footer from './components/Footer'; 13 | import Installation from './components/Installation'; 14 | 15 | function App() { 16 | useEffect(() => { 17 | // Header scroll effect 18 | const handleScroll = () => { 19 | const header = document.querySelector('header'); 20 | if (header) { 21 | if (window.scrollY > 100) { 22 | header.style.background = 'rgba(255, 255, 255, 0.98)'; 23 | header.style.boxShadow = '0 2px 20px rgba(0, 0, 0, 0.1)'; 24 | } else { 25 | header.style.background = 'rgba(255, 255, 255, 0.95)'; 26 | header.style.boxShadow = 'none'; 27 | } 28 | } 29 | 30 | const scrolled = window.pageYOffset; 31 | const laptopMockup = document.querySelector('.laptop-mockup'); 32 | 33 | if (laptopMockup) { 34 | const rate = scrolled * 0.2; 35 | laptopMockup.style.transform = `translateY(${rate}px)`; 36 | } 37 | 38 | const ctaSection = document.querySelector('.cta-section'); 39 | if (ctaSection) { 40 | const rate = scrolled * 0.1; 41 | ctaSection.style.backgroundPosition = `center ${rate}px`; 42 | } 43 | }; 44 | 45 | window.addEventListener('scroll', handleScroll); 46 | 47 | // Intersection Observer for animations 48 | const observerOptions = { 49 | threshold: 0.1, 50 | rootMargin: '0px 0px -50px 0px' 51 | }; 52 | 53 | const observer = new IntersectionObserver((entries) => { 54 | entries.forEach(entry => { 55 | if (entry.isIntersecting) { 56 | entry.target.classList.add('visible'); 57 | } 58 | }); 59 | }, observerOptions); 60 | 61 | document.querySelectorAll('.fade-in').forEach(el => { 62 | observer.observe(el); 63 | }); 64 | 65 | // Add interactive hover effects for demo cards 66 | document.querySelectorAll('.demo-card').forEach(card => { 67 | card.addEventListener('mouseenter', () => { 68 | card.style.transform = 'translateY(-8px) scale(1.02)'; 69 | }); 70 | 71 | card.addEventListener('mouseleave', () => { 72 | card.style.transform = 'translateY(0) scale(1)'; 73 | }); 74 | }); 75 | 76 | // Animate dashboard cards on scroll 77 | const animateDashboard = () => { 78 | const cards = document.querySelectorAll('.dashboard-card'); 79 | cards.forEach((card, index) => { 80 | setTimeout(() => { 81 | card.style.transform = 'translateY(0)'; 82 | card.style.opacity = '1'; 83 | }, index * 200); 84 | }); 85 | }; 86 | 87 | // Initialize dashboard animation 88 | setTimeout(animateDashboard, 1000); 89 | 90 | return () => { 91 | window.removeEventListener('scroll', handleScroll); 92 | // Clean up other event listeners if necessary 93 | }; 94 | }, []); 95 | 96 | return ( 97 |
98 |
99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 |
110 | ); 111 | } 112 | 113 | export default App; 114 | -------------------------------------------------------------------------------- /webapp/src/components/ArchitectureDiagram.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | 3 | const ArchitectureDiagram = () => { 4 | return ( 5 |
6 |
7 |
8 |

Architecture Overview

9 |

How m3 Model Context Protocol connects AI models to MIMIC-IV healthcare data

10 |
11 | 12 |
13 | m3 Architecture Diagram 24 |
25 |
26 |
27 | ); 28 | }; 29 | 30 | export default ArchitectureDiagram; 31 | -------------------------------------------------------------------------------- /webapp/src/components/CTA.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | 3 | const CTA = () => { 4 | return ( 5 | <> 6 | 32 |
33 |
34 |
35 |

Contribute to our Open Source project

36 |

Help us build a better platform for everyone. We are looking for developers to contribute with their code and ideas.

37 | 38 | 39 | 40 | 41 | Contribute Now 42 | 43 |
44 |
45 |
46 | 47 | ); 48 | }; 49 | 50 | export default CTA; 51 | -------------------------------------------------------------------------------- /webapp/src/components/Citation.js: -------------------------------------------------------------------------------- 1 | import React, { useState } from 'react'; 2 | 3 | const Citation = () => { 4 | const [copiedFormat, setCopiedFormat] = useState(null); 5 | 6 | const citations = { 7 | apa: `Al Attrach, R., Moreira, P., Fani, R., Umeton, R., & Celi, L. A. (2025). Conversational LLMs Simplify Secure Clinical Data Access, Understanding, and Analysis. arXiv preprint arXiv:2507.01053.`, 8 | mla: `Al Attrach, Rafi, et al. "Conversational LLMs Simplify Secure Clinical Data Access, Understanding, and Analysis." arXiv preprint arXiv:2507.01053 (2025).`, 9 | chicago: `Al Attrach, Rafi, Pedro Moreira, Rajna Fani, Renato Umeton, and Leo Anthony Celi. "Conversational LLMs Simplify Secure Clinical Data Access, Understanding, and Analysis." arXiv preprint arXiv:2507.01053 (2025).`, 10 | bibtex: `@misc{attrach2025conversationalllmssimplifysecure, 11 | title={Conversational LLMs Simplify Secure Clinical Data Access, Understanding, and Analysis}, 12 | author={Rafi Al Attrach and Pedro Moreira and Rajna Fani and Renato Umeton and Leo Anthony Celi}, 13 | year={2025}, 14 | eprint={2507.01053}, 15 | archivePrefix={arXiv}, 16 | primaryClass={cs.IR}, 17 | url={https://arxiv.org/abs/2507.01053}, 18 | }` 19 | }; 20 | 21 | const handleCopy = (format) => { 22 | navigator.clipboard.writeText(citations[format]) 23 | .then(() => { 24 | setCopiedFormat(format); 25 | setTimeout(() => setCopiedFormat(null), 2000); 26 | }) 27 | .catch(err => { 28 | console.error('Could not copy text: ', err); 29 | }); 30 | }; 31 | 32 | return ( 33 |
34 |
35 |
36 |

Cite This Work

37 |

If you use m3 in your research, please cite our paper :)

38 |
39 | 40 |
41 |
42 |
43 |

BibTeX Format

44 | 50 |
51 |
52 |
{citations.bibtex}
53 |
54 |
55 |
56 |
57 |
58 | ); 59 | }; 60 | 61 | export default Citation; 62 | -------------------------------------------------------------------------------- /webapp/src/components/Contact.js: -------------------------------------------------------------------------------- 1 | import React, { useState } from 'react'; 2 | import emailjs from '@emailjs/browser'; 3 | 4 | const Contact = () => { 5 | // EmailJS configuration with your actual credentials 6 | const EMAILJS_CONFIG = { 7 | serviceId: 'm3_contact_service', 8 | templateId: 'template_sn5rm19', 9 | publicKey: 'aUrTfsE6oJtpIe1ac' 10 | }; 11 | 12 | const [contactForm, setContactForm] = useState({ 13 | email: '', 14 | inquiryType: 'hospital', 15 | message: '' 16 | }); 17 | const [isSubmitting, setIsSubmitting] = useState(false); 18 | const [submitStatus, setSubmitStatus] = useState(null); 19 | 20 | const handleInputChange = (e) => { 21 | const { name, value } = e.target; 22 | setContactForm(prev => ({ 23 | ...prev, 24 | [name]: value 25 | })); 26 | }; 27 | 28 | const handleSubmit = async (e) => { 29 | e.preventDefault(); 30 | setIsSubmitting(true); 31 | setSubmitStatus(null); 32 | 33 | try { 34 | // Prepare template parameters 35 | const templateParams = { 36 | user_email: contactForm.email, 37 | user_name: contactForm.email.split('@')[0], // Extract name from email 38 | inquiry_type: contactForm.inquiryType, 39 | message: contactForm.message || 'No additional message provided', 40 | inquiry_type_label: contactForm.inquiryType === 'hospital' ? 'Hospital/EHR MCP Request' : 41 | contactForm.inquiryType === 'suggestions' ? 'Suggestions & Feedback' : 'General Contact', 42 | timestamp: new Date().toLocaleString() 43 | }; 44 | 45 | // Send email using EmailJS 46 | const response = await emailjs.send( 47 | EMAILJS_CONFIG.serviceId, 48 | EMAILJS_CONFIG.templateId, 49 | templateParams, 50 | EMAILJS_CONFIG.publicKey 51 | ); 52 | 53 | console.log('Email sent successfully:', response); 54 | setSubmitStatus({ type: 'success', message: 'Message sent successfully! We\'ll get back to you soon.' }); 55 | setContactForm({ email: '', inquiryType: 'hospital', message: '' }); 56 | 57 | } catch (error) { 58 | console.error('Error sending email:', error); 59 | setSubmitStatus('error'); 60 | // Store the specific error message for display 61 | setSubmitStatus({ type: 'error', message: getErrorMessage(error) }); 62 | } finally { 63 | setIsSubmitting(false); 64 | } 65 | }; 66 | 67 | const getErrorMessage = (error) => { 68 | // Handle different types of errors 69 | if (error.message?.includes('rate limit') || error.status === 429) { 70 | return 'Too many emails sent recently. Please try again in a few minutes.'; 71 | } 72 | if (error.message?.includes('invalid email') || error.status === 400) { 73 | return 'Please check your email address and try again.'; 74 | } 75 | if (error.message?.includes('network') || !navigator.onLine) { 76 | return 'Network error. Please check your internet connection and try again.'; 77 | } 78 | if (error.status === 403) { 79 | return 'Service temporarily unavailable. Please try again later.'; 80 | } 81 | return 'Failed to send message. Please try again later.'; 82 | }; 83 | 84 | return ( 85 | <> 86 | 244 |
245 |
246 |
247 |

Let's Connect

248 |

Need an MCP for your hospital or EHR? Have suggestions? Want to collaborate? We'd love to hear from you!

249 |

250 | ⚡ Our team responds within 24 hours 251 |

252 |
253 | 254 |
255 |
256 |
257 | 258 | 268 |
269 | 270 |
271 | 272 | 283 |
284 |
285 | 286 |
287 | 288 |