├── .dockerignore
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.yaml
    │   ├── feature_request.yaml
    │   └── question.yaml
    └── workflows
    │   ├── deploy-webapp.yaml
    │   ├── pre-commit.yaml
    │   ├── publish.yaml
    │   └── tests.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── CITATION.cff
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── README.md
├── benchmarks
    └── ehrsql-naacl2024
    │   ├── README.md
    │   ├── claude-sonnet-4
    │       └── EHRSQL_benchmark.csv
    │   └── gpt-oss-20B
    │       ├── EHRSQL_benchmark.csv
    │       └── conversations
    │           ├── 10.conversation.json
    │           ├── 100.conversation.json
    │           ├── 101.conversation.json
    │           ├── 11.conversation.json
    │           ├── 12.conversation.json
    │           ├── 13.conversation.json
    │           ├── 14.conversation.json
    │           ├── 15.conversation.json
    │           ├── 16.conversation.json
    │           ├── 17.conversation.json
    │           ├── 18.conversation.json
    │           ├── 19.conversation.json
    │           ├── 2.conversation.json
    │           ├── 20.conversation.json
    │           ├── 21.conversation.json
    │           ├── 22.conversation.json
    │           ├── 23.conversation.json
    │           ├── 24.conversation.json
    │           ├── 25.conversation.json
    │           ├── 26.conversation.json
    │           ├── 27.conversation.json
    │           ├── 28.conversation.json
    │           ├── 29.conversation.json
    │           ├── 3.conversation.json
    │           ├── 30.conversation.json
    │           ├── 31.conversation.json
    │           ├── 32.conversation.json
    │           ├── 33.conversation.json
    │           ├── 34.conversation.json
    │           ├── 35.conversation.json
    │           ├── 36.conversation.json
    │           ├── 37.conversation.json
    │           ├── 38.conversation.json
    │           ├── 39.conversation.json
    │           ├── 4.conversation.json
    │           ├── 40.conversation.json
    │           ├── 41.conversation.json
    │           ├── 42.conversation.json
    │           ├── 43.conversation.json
    │           ├── 44.conversation.json
    │           ├── 45.conversation.json
    │           ├── 46.conversation.json
    │           ├── 47.conversation.json
    │           ├── 48.conversation.json
    │           ├── 49.conversation.json
    │           ├── 5.conversation.json
    │           ├── 50.conversation.json
    │           ├── 51.conversation.json
    │           ├── 52.conversation.json
    │           ├── 53.conversation.json
    │           ├── 54.conversation.json
    │           ├── 55.conversation.json
    │           ├── 56.conversation.json
    │           ├── 57.conversation.json
    │           ├── 58.conversation.json
    │           ├── 59.conversation.json
    │           ├── 6.conversation.json
    │           ├── 60.conversation.json
    │           ├── 61.conversation.json
    │           ├── 62.conversation.json
    │           ├── 63.conversation.json
    │           ├── 64.conversation.json
    │           ├── 65.conversation.json
    │           ├── 66.conversation.json
    │           ├── 67.conversation.json
    │           ├── 68.conversation.json
    │           ├── 69.conversation.json
    │           ├── 7.conversation.json
    │           ├── 70.conversation.json
    │           ├── 71.conversation.json
    │           ├── 72.conversation.json
    │           ├── 73.conversation.json
    │           ├── 74.conversation.json
    │           ├── 75.conversation.json
    │           ├── 76.conversation.json
    │           ├── 77.conversation.json
    │           ├── 78.conversation.json
    │           ├── 79.conversation.json
    │           ├── 8.conversation.json
    │           ├── 80.conversation.json
    │           ├── 81.conversation.json
    │           ├── 82.conversation.json
    │           ├── 83.conversation.json
    │           ├── 84.conversation.json
    │           ├── 85.conversation.json
    │           ├── 86.conversation.json
    │           ├── 87.conversation.json
    │           ├── 88.conversation.json
    │           ├── 89.conversation.json
    │           ├── 9.conversation.json
    │           ├── 90.conversation.json
    │           ├── 91.conversation.json
    │           ├── 92.conversation.json
    │           ├── 93.conversation.json
    │           ├── 94.conversation.json
    │           ├── 95.conversation.json
    │           ├── 96.conversation.json
    │           ├── 97.conversation.json
    │           ├── 98.conversation.json
    │           └── 99.conversation.json
├── docs
    └── OAUTH2_AUTHENTICATION.md
├── pyproject.toml
├── src
    └── m3
    │   ├── __init__.py
    │   ├── auth.py
    │   ├── cli.py
    │   ├── config.py
    │   ├── data_io.py
    │   ├── mcp_client_configs
    │       ├── __init__.py
    │       ├── dynamic_mcp_config.py
    │       └── setup_claude_desktop.py
    │   └── mcp_server.py
├── tests
    ├── test_cli.py
    ├── test_config.py
    ├── test_config_scripts.py
    ├── test_data_io.py
    ├── test_example.py
    ├── test_mcp_server.py
    └── test_oauth2_basic.py
├── uv.lock
└── webapp
    ├── .gitignore
    ├── README.md
    ├── package-lock.json
    ├── package.json
    ├── public
        ├── banner1.png
        ├── banner2.png
        ├── banner3.png
        ├── favicon.ico
        ├── index.html
        ├── logo192.png
        ├── logo512.png
        ├── m3_architecture.png
        ├── m3_logo.png
        ├── m3_logo_transparent.png
        ├── manifest.json
        ├── pypi_logo.svg
        ├── robots.txt
        └── videos
        │   ├── m3_website_1.mp4
        │   ├── m3_website_2.mp4
        │   ├── m3_website_3.mp4
        │   └── m3_website_4.mp4
    └── src
        ├── App.css
        ├── App.js
        ├── components
            ├── ArchitectureDiagram.js
            ├── CTA.js
            ├── Citation.js
            ├── Contact.js
            ├── Demos.js
            ├── Documentation.js
            ├── Explanation.js
            ├── Features.js
            ├── Footer.js
            ├── Header.js
            ├── Hero.js
            ├── Installation.js
            └── Paper.js
        ├── index.css
        └── index.js


/.dockerignore:
--------------------------------------------------------------------------------
 1 | .git
 2 | __pycache__
 3 | *.pyc
 4 | .venv
 5 | .env
 6 | webapp/
 7 | benchmarks/
 8 | coverage.xml
 9 | .pytest_cache/
10 | dist/
11 | build/
12 | *.egg-info
13 | # Keep only the demo DB
14 | m3_data/**
15 | !m3_data/databases/
16 | !m3_data/databases/mimic_iv_demo.db
17 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yaml:
--------------------------------------------------------------------------------
  1 | name: "🐛 Bug Report"
  2 | description: Create a new ticket for a bug in M3.
  3 | title: "🐛 [BUG] - <title>"
  4 | labels:
  5 |   - "bug"
  6 | body:
  7 |   - type: markdown
  8 |     attributes:
  9 |       value: |
 10 |         <p align="center">
 11 |           <img src="https://miro.medium.com/v2/resize:fit:400/1*QEps725rQjfgqNnlbRYb1g.png" alt="Harvard MIT HST Logo">
 12 |           <br>
 13 |           <em><a href="https://doi.org/10.48550/arXiv.2507.01053">M3's Paper</a>—<a href="https://rafiattrach.github.io/m3/">M3's Website</a>.</em>
 14 |         </p>
 15 |   - type: checkboxes
 16 |     id: checks
 17 |     attributes:
 18 |       label: "Before Submitting"
 19 |       description: Please confirm the following to help us process your issue in timely-manner.
 20 |       options:
 21 |         - label: I have checked the documentation and existing issues.
 22 |           required: true
 23 |   - type: textarea
 24 |     id: description
 25 |     attributes:
 26 |       label: "Description"
 27 |       description: Please provide a clear and concise description of the bug.
 28 |       placeholder: "Hint: ‘brevity is the soul of wit’, Hamlet — by William Shakespeare"
 29 |     validations:
 30 |       required: true
 31 |   - type: input
 32 |     id: reprod-url
 33 |     attributes:
 34 |       label: "Reproduction URL"
 35 |       description: "If you’ve forked M3, provide a GitHub URL or repository link to reproduce the issue."
 36 |       placeholder: "Hint: Optional, but it helps us resolve the issue faster. Leave empty if not applicable."
 37 |     validations:
 38 |       required: false
 39 |   - type: textarea
 40 |     id: reprod
 41 |     attributes:
 42 |       label: "Reproduction Steps"
 43 |       description: "List the steps to reproduce the bug. The more detailed, the better!"
 44 |       value: |
 45 |         1. Go to '...'
 46 |         2. Run '....'
 47 |         3. Observe '....'
 48 |       render: bash
 49 |     validations:
 50 |       required: true
 51 |   - type: markdown
 52 |     attributes:
 53 |       value: "### Environment Information"
 54 |   - type: dropdown
 55 |     id: os
 56 |     attributes:
 57 |       label: "Operating System"
 58 |       description: What operating system were you using when the bug occurred?
 59 |       options:
 60 |         - "Windows"
 61 |         - "Linux"
 62 |         - "Mac"
 63 |         - "Other"
 64 |   - type: input
 65 |     id: other-os
 66 |     attributes:
 67 |       label: "Other Operating System"
 68 |       description: "Only fill this if you selected 'Other' in the Operating System dropdown."
 69 |       placeholder: "e.g., Ubuntu 20.04"
 70 |     validations:
 71 |       required: false
 72 |   - type: dropdown
 73 |     id: backend
 74 |     attributes:
 75 |       label: "Backend"
 76 |       description: Which backend were you using when the bug occurred?
 77 |       options:
 78 |         - "SQLite"
 79 |         - "BigQuery"
 80 |         - "Other"
 81 |   - type: input
 82 |     id: other-backend
 83 |     attributes:
 84 |       label: "Other Backend"
 85 |       description: "Only fill this if you selected 'Other' in the Backend dropdown."
 86 |       placeholder: "e.g., Custom Database"
 87 |     validations:
 88 |       required: false
 89 |   - type: dropdown
 90 |     id: mcp-client
 91 |     attributes:
 92 |       label: "MCP Client"
 93 |       description: Which MCP client were you using?
 94 |       options:
 95 |         - "Claude Desktop"
 96 |         - "Other"
 97 |   - type: input
 98 |     id: other-mcp-client
 99 |     attributes:
100 |       label: "Other MCP Client"
101 |       description: "Only fill this if you selected 'Other' in the MCP Client dropdown."
102 |       placeholder: "e.g., Custom Client"
103 |     validations:
104 |       required: false
105 |   - type: dropdown
106 |     id: authentication
107 |     attributes:
108 |       label: "Authentication"
109 |       description: Were you using OAuth2 authentication when the bug occurred?
110 |       options:
111 |         - "Yes"
112 |         - "No"
113 |   - type: input
114 |     id: python-version
115 |     attributes:
116 |       label: "Python Version"
117 |       description: What version of Python are you using?
118 |       placeholder: "e.g., 3.10.0"
119 |     validations:
120 |       required: true
121 |   - type: input
122 |     id: m3-version
123 |     attributes:
124 |       label: "M3 Version"
125 |       description: What version of M3 are you using? Run 'm3 --version' to check.
126 |       placeholder: "e.g., 0.1.0"
127 |     validations:
128 |       required: true
129 |   - type: textarea
130 |     id: additional-info
131 |     attributes:
132 |       label: "Additional Information"
133 |       description: "In case none of the above options fit your case, please provide additional information."
134 |       placeholder: "... write here ..."
135 |     validations:
136 |       required: false
137 |   - type: markdown
138 |     attributes:
139 |       value: |
140 |         **Security Note:** Please do not share sensitive information such as authentication tokens, personal data, or confidential project details in this report.
141 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yaml:
--------------------------------------------------------------------------------
 1 | name: "💡 Feature Request"
 2 | description: Suggest a new feature or enhancement for M3.
 3 | title: "💡 [FEAT] - <title>"
 4 | labels:
 5 |   - "feature"
 6 | body:
 7 |   - type: markdown
 8 |     attributes:
 9 |       value: |
10 |         <p align="center">
11 |           <img src="https://miro.medium.com/v2/resize:fit:400/1*QEps725rQjfgqNnlbRYb1g.png" alt="Harvard MIT HST Logo">
12 |           <br>
13 |           <em><a href="https://doi.org/10.48550/arXiv.2507.01053">M3's Paper</a>—<a href="https://rafiattrach.github.io/m3/">M3's Website</a>.</em>
14 |         </p>
15 |   - type: checkboxes
16 |     id: checks
17 |     attributes:
18 |       label: "Before Submitting"
19 |       description: Please confirm the following to help us process your request efficiently.
20 |       options:
21 |         - label: I have checked if this feature already exists or has been requested.
22 |           required: true
23 |   - type: textarea
24 |     id: feature-description
25 |     attributes:
26 |       label: "Feature Description"
27 |       description: Please provide a clear and concise description of the feature you are requesting.
28 |       placeholder: "Hint: ‘brevity is the soul of wit’, Hamlet — by William Shakespeare"
29 |     validations:
30 |       required: true
31 |   - type: textarea
32 |     id: proposed-solution
33 |     attributes:
34 |       label: "Proposed Solution"
35 |       description: Describe how you envision this feature working.
36 |       placeholder: "Explain how the feature should function."
37 |     validations:
38 |       required: true
39 |   - type: textarea
40 |     id: examples
41 |     attributes:
42 |       label: "Examples or References"
43 |       description: Provide any examples, links, or references that might help illustrate your request.
44 |       placeholder: "Hint: Optional, e.g., screenshots, links to similar features in other projects, etc."
45 |     validations:
46 |       required: false
47 |   - type: dropdown
48 |     id: contribution
49 |     attributes:
50 |       label: "Willing to Contribute?"
51 |       description: Would you be willing to help implement this feature?
52 |       options:
53 |         - "Yes"
54 |         - "No"
55 |   - type: dropdown
56 |     id: roadmap-alignment
57 |     attributes:
58 |       label: "Roadmap Alignment"
59 |       description: Which part of the M3 roadmap does this feature align with?
60 |       options:
61 |         - "Broader Dataset Coverage"
62 |         - "Richer MCP Tooling"
63 |         - "Technical Enhancements"
64 |         - "Ecosystem and Community Contributions"
65 |         - "Other"
66 |   - type: input
67 |     id: other-roadmap
68 |     attributes:
69 |       label: "Other Roadmap Alignment"
70 |       description: "Only fill this if you selected 'Other' in the Roadmap Alignment dropdown."
71 |       placeholder: "Describe how this feature aligns with M3's goals."
72 |     validations:
73 |       required: false
74 |   - type: markdown
75 |     attributes:
76 |       value: |
77 |         **Security Note:** Please do not share sensitive information such as authentication tokens, personal data, or confidential project details in this request.
78 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.yaml:
--------------------------------------------------------------------------------
 1 | name: "❓ Ask a Question"
 2 | description: Ask a question about M3.
 3 | title: "❓ [QUESTION] - <title>"
 4 | labels:
 5 |   - "question"
 6 | body:
 7 |   - type: markdown
 8 |     attributes:
 9 |       value: |
10 |         <p align="center">
11 |           <img src="https://miro.medium.com/v2/resize:fit:400/1*QEps725rQjfgqNnlbRYb1g.png" alt="Harvard MIT HST Logo">
12 |           <br>
13 |           <em><a href="https://doi.org/10.48550/arXiv.2507.01053">M3's Paper</a>—<a href="https://rafiattrach.github.io/m3/">M3's Website</a>.</em>
14 |         </p>
15 |   - type: checkboxes
16 |     id: checks
17 |     attributes:
18 |       label: "Before Submitting"
19 |       description: Please confirm the following to help us answer your question efficiently.
20 |       options:
21 |         - label: I have checked the documentation and existing issues.
22 |           required: true
23 |   - type: textarea
24 |     id: question
25 |     attributes:
26 |       label: "Question"
27 |       description: Please provide a clear and concise question.
28 |       placeholder: "Hint: ‘brevity is the soul of wit’, Hamlet — by William Shakespeare"
29 |     validations:
30 |       required: true
31 |   - type: textarea
32 |     id: context
33 |     attributes:
34 |       label: "Additional Context"
35 |       description: Provide any additional context or details that might help answer your question.
36 |       placeholder: "Hint: Optional, e.g., code snippets, error messages, etc."
37 |     validations:
38 |       required: false
39 |   - type: input
40 |     id: project-area
41 |     attributes:
42 |       label: "Project Area"
43 |       description: Specify the area of M3 your question relates to.
44 |       placeholder: "Hint: Optional, e.g., CLI, MCP Server, OAuth2, etc."
45 |     validations:
46 |       required: false
47 |   - type: markdown
48 |     attributes:
49 |       value: |
50 |         **Security Note:** Please do not share sensitive information such as authentication tokens, personal data, or confidential project details in this question.
51 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy-webapp.yaml:
--------------------------------------------------------------------------------
 1 | name: Deploy Webapp to GitHub Pages
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |     paths:
 7 |       - "webapp/**"
 8 |   workflow_dispatch:
 9 | 
10 | permissions:
11 |   contents: read
12 |   pages: write
13 |   id-token: write
14 | 
15 | concurrency:
16 |   group: "pages"
17 |   cancel-in-progress: false
18 | 
19 | jobs:
20 |   build:
21 |     runs-on: ubuntu-latest
22 |     steps:
23 |       - uses: actions/checkout@v4
24 | 
25 |       - uses: actions/setup-node@v4
26 |         with:
27 |           node-version: "18"
28 |           cache: "npm"
29 |           cache-dependency-path: webapp/package-lock.json
30 | 
31 |       - uses: actions/configure-pages@v4
32 | 
33 |       - name: Create .env file
34 |         run: |
35 |           cat <<EOF > ./webapp/.env
36 |           REACT_APP_FORMSPREE_FORM_IDS=${{ secrets.REACT_APP_FORMSPREE_FORM_IDS }}
37 |           REACT_APP_CONTACT_EMAILS=${{ secrets.REACT_APP_CONTACT_EMAILS }}
38 |           EOF
39 | 
40 |       - run: npm ci
41 |         working-directory: ./webapp
42 | 
43 |       - run: npm run build
44 |         working-directory: ./webapp
45 | 
46 |       - uses: actions/upload-pages-artifact@v3
47 |         with:
48 |           path: ./webapp/build
49 | 
50 |   deploy:
51 |     environment:
52 |       name: github-pages
53 |       url: ${{ steps.deployment.outputs.page_url }}
54 |     runs-on: ubuntu-latest
55 |     needs: build
56 |     steps:
57 |       - name: Deploy to GitHub Pages
58 |         id: deployment
59 |         uses: actions/deploy-pages@v4
60 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yaml:
--------------------------------------------------------------------------------
 1 | name: Pre-commit checks
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | jobs:
10 |   pre-commit:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v4
14 |         with:
15 |           fetch-depth: 0
16 | 
17 |       - name: Install UV and Python
18 |         uses: astral-sh/setup-uv@v5
19 |         with:
20 |           version: "latest"
21 |           python-version: "3.11"
22 |       - name: Symlink uv to expected path for pre-commit-uv
23 |         run: |
24 |           mkdir -p ~/.local/bin
25 |           ln -s $(which uv) ~/.local/bin/uv
26 |       - run: uv venv
27 |       - run: uv sync --dev
28 |       - run: uv add pytest==7.4.3
29 |       - uses: tox-dev/action-pre-commit-uv@v1
30 |         with:
31 |           extra_args: --all-files
32 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yaml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPI
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 |   workflow_dispatch:  # Allow manual triggering
 7 | 
 8 | jobs:
 9 |   publish:
10 |     runs-on: ubuntu-latest
11 |     environment:
12 |       name: pypi
13 |       url: https://pypi.org/p/m3-mcp
14 |     permissions:
15 |       id-token: write  # IMPORTANT: this permission is mandatory for trusted publishing
16 |       contents: read
17 | 
18 |     steps:
19 |     - uses: actions/checkout@v4
20 | 
21 |     - name: Set up uv
22 |       uses: astral-sh/setup-uv@v5
23 |       with:
24 |         version: "latest"
25 |         python-version: "3.11"
26 | 
27 |     - name: Extract version from tag
28 |       id: get_version
29 |       run: |
30 |         # Get version from git tag (removes 'v' prefix if present)
31 |         VERSION=${GITHUB_REF#refs/tags/}
32 |         VERSION=${VERSION#v}
33 |         echo "version=$VERSION" >> $GITHUB_OUTPUT
34 |         echo "Publishing version: $VERSION"
35 | 
36 |     - name: Update version in pyproject.toml
37 |       run: |
38 |         # Update version in pyproject.toml to match the git tag
39 |         sed -i "s/version = \".*\"/version = \"${{ steps.get_version.outputs.version }}\"/" pyproject.toml
40 |         echo "Updated pyproject.toml version to ${{ steps.get_version.outputs.version }}"
41 |         cat pyproject.toml | grep version
42 | 
43 |     - name: Lock dependencies
44 |       run: uv lock --locked
45 | 
46 |     - name: Sync dependencies including dev
47 |       run: uv sync --all-groups
48 | 
49 |     - name: Run quick tests
50 |       run: |
51 |         uv add pytest==7.4.3
52 |         uv add pytest-asyncio
53 |         uv run pytest tests/ -v --tb=short
54 | 
55 |     - name: Build package
56 |       run: uv build
57 | 
58 |     - name: Verify package
59 |       run: uv run --with twine twine check dist/*
60 | 
61 |     - name: Publish to PyPI
62 |       uses: pypa/gh-action-pypi-publish@release/v1
63 |       with:
64 |         print-hash: true
65 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yaml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ubuntu-latest
11 |     strategy:
12 |       matrix:
13 |         python-version: ["3.10", "3.12"]
14 | 
15 |     steps:
16 |       - uses: actions/checkout@v4
17 | 
18 |       - name: Install UV and Python
19 |         uses: astral-sh/setup-uv@v5
20 |         with:
21 |           version: "latest"
22 |           python-version: ${{ matrix.python-version }}
23 | 
24 |       - name: Create virtual environment
25 |         run: uv venv
26 | 
27 |       - name: Install dependencies
28 |         run: |
29 |           uv sync --all-groups
30 |           uv add pytest==7.4.3
31 | 
32 |       - name: Run tests
33 |         run: uv run pytest -v
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | *.so
 6 | *.egg-info/
 7 | .installed.cfg
 8 | *.egg
 9 | *.whl
10 | 
11 | # Distribution / packaging
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | MANIFEST
24 | 
25 | # Virtual environments
26 | .env
27 | .*venv*
28 | env/
29 | venv/
30 | ENV/
31 | env.bak/
32 | venv.bak/
33 | .python-version
34 | 
35 | # IDE specific files
36 | .idea/
37 | .vscode/
38 | *.sublime-project
39 | *.sublime-workspace
40 | 
41 | # Test and coverage reports
42 | htmlcov/
43 | .tox/
44 | .nox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *.cover
51 | .hypothesis/
52 | .pytest_cache/
53 | junit.xml
54 | 
55 | # Logs and temporary files
56 | *.log
57 | *.tmp
58 | *.swp
59 | 
60 | # SQLite databases (if you don't want to commit local test/demo dbs)
61 | *.db
62 | *.db-journal*
63 | mimic*.db
64 | 
65 | # Configuration files
66 | config.json
67 | *config*.json
68 | 
69 | # Operating System specific files
70 | .DS_Store
71 | Thumbs.db
72 | Desktop.ini
73 | 
74 | # MyPy
75 | .mypy_cache/
76 | 
77 | # Ruff
78 | .ruff_cache/
79 | 
80 | # Datasets and other large files
81 | data/
82 | m3_data/
83 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.6.0
 4 |     hooks:
 5 |       - id: trailing-whitespace
 6 |       - id: end-of-file-fixer
 7 |       - id: check-yaml
 8 |       - id: check-toml
 9 |       - id: check-added-large-files
10 |         args: ['--maxkb=1024']
11 |         exclude: '\.(mp4|avi|mov|mkv)$'
12 |       - id: mixed-line-ending
13 | 
14 |   - repo: https://github.com/astral-sh/ruff-pre-commit
15 |     rev: v0.11.9
16 |     hooks:
17 |       - id: ruff
18 |         args: [--fix, --exit-non-zero-on-fix]
19 |       - id: ruff-format
20 | 
21 |   - repo: local
22 |     hooks:
23 |       - id: pytest
24 |         name: pytest
25 |         entry: pytest
26 |         language: system # Assumes pytest is installed in your environment (via pip install .[dev])
27 |         types: [python] # Run on changes to Python files
28 |         pass_filenames: false # Pytest typically runs on the whole suite
29 |         # stages: [commit] # This is the default, explicitly stating it
30 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "Citation for this repository"
 3 | authors:
 4 |   - family-names: Al Attrach
 5 |     given-names: Rafi
 6 |   - family-names: Moreira
 7 |     given-names: Pedro
 8 |   - family-names: Fani
 9 |     given-names: Rajna
10 |   - family-names: Umeton
11 |     given-names: Renato
12 |   - family-names: Celi
13 |     given-names: Leo Anthony
14 | title: "Conversational LLMs Simplify Secure Clinical Data Access, Understanding, and Analysis"
15 | date-released: 2025-06-27
16 | doi: 10.48550/arXiv.2507.01053
17 | url: https://arxiv.org/abs/2507.01053
18 | preferred-citation:
19 |   type: article
20 |   authors:
21 |     - family-names: Al Attrach
22 |       given-names: Rafi
23 |     - family-names: Moreira
24 |       given-names: Pedro
25 |     - family-names: Fani
26 |       given-names: Rajna
27 |     - family-names: Umeton
28 |       given-names: Renato
29 |     - family-names: Celi
30 |       given-names: Leo Anthony
31 |   title: "Conversational LLMs Simplify Secure Clinical Data Access, Understanding, and Analysis"
32 |   doi: 10.48550/arXiv.2507.01053
33 |   year: 2025
34 |   month: 6
35 |   url: https://arxiv.org/abs/2507.01053
36 |   publisher: arXiv
37 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # syntax=docker/dockerfile:1
 2 | 
 3 | # Build stage: create wheel
 4 | FROM python:3.11-slim AS builder
 5 | 
 6 | WORKDIR /build
 7 | COPY pyproject.toml uv.lock README.md ./
 8 | COPY src ./src
 9 | 
10 | RUN pip install --no-cache-dir build && \
11 |     python -m build --wheel
12 | 
13 | # Base runtime: install m3 and baked SQLite DB
14 | FROM python:3.11-slim AS base
15 | 
16 | ENV PYTHONUNBUFFERED=1 \
17 |     M3_BACKEND=sqlite \
18 |     M3_DB_PATH=/root/m3_data/databases/mimic_iv_demo.db
19 | 
20 | WORKDIR /app
21 | 
22 | COPY --from=builder /build/dist/*.whl /tmp/
23 | RUN pip install --no-cache-dir /tmp/*.whl && rm /tmp/*.whl
24 | 
25 | # Download and initialize demo DB using m3 init
26 | RUN m3 init mimic-iv-demo
27 | 
28 | # Lite: SQLite only
29 | FROM base AS lite
30 | CMD ["python", "-m", "m3.mcp_server"]
31 | 
32 | # BigQuery: add GCP client
33 | FROM base AS bigquery
34 | RUN pip install --no-cache-dir google-cloud-bigquery
35 | CMD ["python", "-m", "m3.mcp_server"]
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Rafi Al Attrach, Pedro Moreira, Rajna Fani
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include README.md
 2 | include LICENSE
 3 | include pyproject.toml
 4 | recursive-include src *.py
 5 | global-exclude *.pyc
 6 | global-exclude __pycache__
 7 | global-exclude .DS_Store
 8 | prune tests
 9 | prune webapp
10 | prune benchmarks
11 | prune build
12 | prune dist
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # M3: MIMIC-IV + MCP + Models 🏥🤖
  2 | 
  3 | <div align="center">
  4 |   <img src="webapp/public/m3_logo_transparent.png" alt="M3 Logo" width="300"/>
  5 | </div>
  6 | 
  7 | > **Query MIMIC-IV medical data using natural language through MCP clients**
  8 | 
  9 | <a href="https://www.python.org/downloads/"><img alt="Python" src="https://img.shields.io/badge/Python-3.10+-blue?logo=python&logoColor=white"></a>
 10 | <a href="https://modelcontextprotocol.io/"><img alt="MCP" src="https://img.shields.io/badge/MCP-Compatible-green?logo=ai&logoColor=white"></a>
 11 | <a href="https://github.com/rafiattrach/m3/actions/workflows/tests.yaml"><img alt="Tests" src="https://github.com/rafiattrach/m3/actions/workflows/tests.yaml/badge.svg"></a>
 12 | <a href="https://github.com/rafiattrach/m3/actions/workflows/pre-commit.yaml"><img alt="Code Quality" src="https://github.com/rafiattrach/m3/actions/workflows/pre-commit.yaml/badge.svg"></a>
 13 | <a href="https://github.com/rafiattrach/m3/pulls"><img alt="PRs Welcome" src="https://img.shields.io/badge/PRs-welcome-brightgreen.svg"></a>
 14 | 
 15 | Transform medical data analysis with AI! Ask questions about MIMIC-IV data in plain English and get instant insights. Choose between local demo data (free) or full cloud dataset (BigQuery).
 16 | 
 17 | ## ✨ Features
 18 | 
 19 | - 🔍 **Natural Language Queries**: Ask questions about MIMIC-IV data in plain English
 20 | - 🏠 **Local SQLite**: Fast queries on demo database (free, no setup)
 21 | - ☁️ **BigQuery Support**: Access full MIMIC-IV dataset on Google Cloud
 22 | - 🔒 **Enterprise Security**: OAuth2 authentication with JWT tokens and rate limiting
 23 | - 🛡️ **SQL Injection Protection**: Read-only queries with comprehensive validation
 24 | 
 25 | ## 🚀 Quick Start
 26 | 
 27 | > 💡 **Need more options?** Run `m3 --help` to see all available commands and options.
 28 | 
 29 | ### 📦 Installation
 30 | 
 31 | Choose your preferred installation method:
 32 | 
 33 | #### Option A: Install from PyPI (Recommended)
 34 | 
 35 | **Step 1: Create Virtual Environment**
 36 | ```bash
 37 | # Create virtual environment (recommended)
 38 | python -m venv .venv
 39 | source .venv/bin/activate  # Windows: .venv\Scripts\activate
 40 | ```
 41 | 
 42 | **Step 2: Install M3**
 43 | ```bash
 44 | # Install M3
 45 | pip install m3-mcp
 46 | ```
 47 | 
 48 | #### Option B: Docker
 49 | 
 50 | ```bash
 51 | # Clone repo first
 52 | git clone https://github.com/rafiattrach/m3.git && cd m3
 53 | 
 54 | # SQLite (demo DB)
 55 | docker build -t m3:lite --target lite .
 56 | docker run -d --name m3-server m3:lite tail -f /dev/null
 57 | 
 58 | # BigQuery (full dataset - requires GCP credentials)
 59 | docker build -t m3:bigquery --target bigquery .
 60 | docker run -d --name m3-server \
 61 |   -e M3_BACKEND=bigquery \
 62 |   -e M3_PROJECT_ID=YOUR_PROJECT_ID \
 63 |   -v $HOME/.config/gcloud:/root/.config/gcloud:ro \
 64 |   m3:bigquery tail -f /dev/null
 65 | ```
 66 | 
 67 | **MCP client config** (Claude Desktop, LM Studio, etc.):
 68 | ```json
 69 | {
 70 |   "mcpServers": {
 71 |     "m3": {
 72 |       "command": "docker",
 73 |       "args": ["exec", "-i", "m3-server", "python", "-m", "m3.mcp_server"]
 74 |     }
 75 |   }
 76 | }
 77 | ```
 78 | 
 79 | Stop container: `docker stop m3-server && docker rm m3-server`
 80 | 
 81 | #### Option C: Install from Source
 82 | 
 83 | #### Using standard `pip`
 84 | **Step 1: Clone and Navigate**
 85 | ```bash
 86 | # Clone the repository
 87 | git clone https://github.com/rafiattrach/m3.git
 88 | cd m3
 89 | ```
 90 | 
 91 | **Step 2: Create Virtual Environment**
 92 | ```bash
 93 | # Create virtual environment
 94 | python -m venv .venv
 95 | source .venv/bin/activate  # Windows: .venv\Scripts\activate
 96 | ```
 97 | 
 98 | **Step 3: Install M3**
 99 | ```bash
100 | # Install M3
101 | pip install .
102 | ```
103 | 
104 | #### Using `UV` (Recommended)
105 | Assuming you have [UV](https://docs.astral.sh/uv/getting-started/installation/) installed.
106 | 
107 | **Step 1: Clone and Navigate**
108 | ```bash
109 | # Clone the repository
110 | git clone https://github.com/rafiattrach/m3.git
111 | cd m3
112 | ```
113 | 
114 | **Step 2: Create `UV` Virtual Environment**
115 | ```bash
116 | # Create virtual environment
117 | uv venv
118 | ```
119 | 
120 | **Step 3: Install M3**
121 | ```bash
122 | uv sync
123 | # Do not forget to use `uv run` to any subsequent commands to ensure you're using the `uv` virtual environment
124 | ```
125 | 
126 | ### 🗄️ Database Configuration
127 | 
128 | After installation, choose your data source:
129 | 
130 | #### Option A: Local Demo Database (Recommended for Beginners)
131 | 
132 | **Perfect for learning and development - completely free!**
133 | 
134 | 1. **Download demo database**:
135 |    ```bash
136 |    m3 init mimic-iv-demo
137 |    ```
138 | 
139 | 2. **Setup MCP Client**:
140 |    ```bash
141 |    m3 config
142 |    ```
143 | 
144 |    *Alternative: For Claude Desktop specifically:*
145 |    ```bash
146 |    m3 config claude
147 |    ```
148 | 
149 | 3. **Restart your MCP client** and ask:
150 | 
151 |    - "What tools do you have for MIMIC-IV data?"
152 |    - "Show me patient demographics from the ICU"
153 | 
154 | #### Option B: BigQuery (Full Dataset)
155 | 
156 | **For researchers needing complete MIMIC-IV data**
157 | 
158 | ##### Prerequisites
159 | - Google Cloud account and project with billing enabled
160 | - Access to MIMIC-IV on BigQuery (requires PhysioNet credentialing)
161 | 
162 | ##### Setup Steps
163 | 
164 | 1. **Install Google Cloud CLI**:
165 | 
166 |    **macOS (with Homebrew):**
167 |    ```bash
168 |    brew install google-cloud-sdk
169 |    ```
170 | 
171 |    **Windows:** Download from https://cloud.google.com/sdk/docs/install
172 | 
173 |    **Linux:**
174 |    ```bash
175 |    curl https://sdk.cloud.google.com | bash
176 |    ```
177 | 
178 | 2. **Authenticate**:
179 |    ```bash
180 |    gcloud auth application-default login
181 |    ```
182 |    *This will open your browser - choose the Google account that has access to your BigQuery project with MIMIC-IV data.*
183 | 
184 | 3. **Setup MCP Client for BigQuery**:
185 |    ```bash
186 |    m3 config
187 |    ```
188 | 
189 |    *Alternative: For Claude Desktop specifically:*
190 |    ```bash
191 |    m3 config claude --backend bigquery --project-id YOUR_PROJECT_ID
192 |    ```
193 | 
194 | 4. **Test BigQuery Access** - Restart your MCP client and ask:
195 |    ```
196 |    Use the get_race_distribution function to show me the top 5 races in MIMIC-IV admissions.
197 |    ```
198 | 
199 | ## 🔧 Advanced Configuration
200 | 
201 | Need to configure other MCP clients or customize settings? Use these commands:
202 | 
203 | ### Interactive Configuration (Universal)
204 | ```bash
205 | m3 config
206 | ```
207 | Generates configuration for any MCP client with step-by-step guidance.
208 | 
209 | ### Quick Configuration Examples
210 | ```bash
211 | # Quick universal config with defaults
212 | m3 config --quick
213 | 
214 | # Universal config with custom database
215 | m3 config --quick --backend sqlite --db-path /path/to/database.db
216 | 
217 | # Save config to file for other MCP clients
218 | m3 config --output my_config.json
219 | ```
220 | 
221 | ### 🔐 OAuth2 Authentication (Optional)
222 | 
223 | For production deployments requiring secure access to medical data:
224 | 
225 | ```bash
226 | # Enable OAuth2 with Claude Desktop
227 | m3 config claude --enable-oauth2 \
228 |   --oauth2-issuer https://your-auth-provider.com \
229 |   --oauth2-audience m3-api \
230 |   --oauth2-scopes "read:mimic-data"
231 | 
232 | # Or configure interactively
233 | m3 config  # Choose OAuth2 option during setup
234 | ```
235 | 
236 | **Supported OAuth2 Providers:**
237 | - Auth0, Google Identity Platform, Microsoft Azure AD, Keycloak
238 | - Any OAuth2/OpenID Connect compliant provider
239 | 
240 | **Key Benefits:**
241 | - 🔒 **JWT Token Validation**: Industry-standard security
242 | - 🎯 **Scope-based Access**: Fine-grained permissions
243 | - 🛡️ **Rate Limiting**: Abuse protection
244 | - 📊 **Audit Logging**: Security monitoring
245 | 
246 | > 📖 **Complete OAuth2 Setup Guide**: See [`docs/OAUTH2_AUTHENTICATION.md`](docs/OAUTH2_AUTHENTICATION.md) for detailed configuration, troubleshooting, and production deployment guidelines.
247 | 
248 | ### Backend Comparison
249 | 
250 | **SQLite Backend (Default)**
251 | - ✅ **Free**: No cloud costs
252 | - ✅ **Fast**: Local queries
253 | - ✅ **Easy**: No authentication needed
254 | - ❌ **Limited**: Demo dataset only (~1k records)
255 | 
256 | **BigQuery Backend**
257 | - ✅ **Complete**: Full MIMIC-IV dataset (~500k admissions)
258 | - ✅ **Scalable**: Google Cloud infrastructure
259 | - ✅ **Current**: Latest MIMIC-IV version (3.1)
260 | - ❌ **Costs**: BigQuery usage fees apply
261 | 
262 | ## 🛠️ Available MCP Tools
263 | 
264 | When your MCP client processes questions, it uses these tools automatically:
265 | 
266 | - **get_database_schema**: List all available tables
267 | - **get_table_info**: Get column info and sample data for a table
268 | - **execute_mimic_query**: Execute SQL SELECT queries
269 | - **get_icu_stays**: ICU stay information and length of stay data
270 | - **get_lab_results**: Laboratory test results
271 | - **get_race_distribution**: Patient race distribution
272 | 
273 | ## 🧪 Example Prompts
274 | 
275 | Try asking your MCP client these questions:
276 | 
277 | **Demographics & Statistics:**
278 | 
279 | - `Prompt:` *What is the race distribution in MIMIC-IV admissions?*
280 | - `Prompt:` *Show me patient demographics for ICU stays*
281 | - `Prompt:` *How many total admissions are in the database?*
282 | 
283 | **Clinical Data:**
284 | 
285 | - `Prompt:` *Find lab results for patient X*
286 | - `Prompt:` *What lab tests are most commonly ordered?*
287 | - `Prompt:` *Show me recent ICU admissions*
288 | 
289 | **Data Exploration:**
290 | 
291 | - `Prompt:` *What tables are available in the database?*
292 | - `Prompt:` *What tools do you have for MIMIC-IV data?*
293 | 
294 | ## 🎩 Pro Tips
295 | 
296 | - Do you want to pre-approve the usage of all tools in Claude Desktop? Use the prompt below and then select **Always Allow**
297 |    - `Prompt:` *Can you please call all your tools in a logical sequence?*
298 | 
299 | ## 🔍 Troubleshooting
300 | 
301 | ### Common Issues
302 | 
303 | **SQLite "Database not found" errors:**
304 | ```bash
305 | # Re-download demo database
306 | m3 init mimic-iv-demo
307 | ```
308 | 
309 | **MCP client server not starting:**
310 | 1. Check your MCP client logs (for Claude Desktop: Help → View Logs)
311 | 2. Verify configuration file location and format
312 | 3. Restart your MCP client completely
313 | 
314 | ### OAuth2 Authentication Issues
315 | 
316 | **"Missing OAuth2 access token" errors:**
317 | ```bash
318 | # Set your access token
319 | export M3_OAUTH2_TOKEN="Bearer your-access-token-here"
320 | ```
321 | 
322 | **"OAuth2 authentication failed" errors:**
323 | - Verify your token hasn't expired
324 | - Check that required scopes are included in your token
325 | - Ensure your OAuth2 provider configuration is correct
326 | 
327 | **Rate limit exceeded:**
328 | - Wait for the rate limit window to reset
329 | - Contact your administrator to adjust limits if needed
330 | 
331 | > 🔧 **OAuth2 Troubleshooting**: See [`OAUTH2_AUTHENTICATION.md`](docs/OAUTH2_AUTHENTICATION.md) for detailed OAuth2 troubleshooting and configuration guides.
332 | 
333 | ### BigQuery Issues
334 | 
335 | **"Access Denied" errors:**
336 | - Ensure you have MIMIC-IV access on PhysioNet
337 | - Verify your Google Cloud project has BigQuery API enabled
338 | - Check that you're authenticated: `gcloud auth list`
339 | 
340 | **"Dataset not found" errors:**
341 | - Confirm your project ID is correct
342 | - Ensure you have access to `physionet-data` project
343 | 
344 | **Authentication issues:**
345 | ```bash
346 | # Re-authenticate
347 | gcloud auth application-default login
348 | 
349 | # Check current authentication
350 | gcloud auth list
351 | ```
352 | 
353 | ## 👩‍💻 For Developers
354 | 
355 | ### Development Setup
356 | 
357 | #### Option A: Standard `pip` Development Setup
358 | **Step 1: Clone and Navigate**
359 | ```bash
360 | # Clone the repository
361 | git clone https://github.com/rafiattrach/m3.git
362 | cd m3
363 | ```
364 | 
365 | **Step 2: Create and Activate Virtual Environment**
366 | ```bash
367 | # Create virtual environment
368 | python -m venv .venv
369 | source .venv/bin/activate  # Windows: .venv\Scripts\activate
370 | ```
371 | 
372 | **Step 3: Install Development Dependencies**
373 | ```bash
374 | # Install in development mode with dev dependencies
375 | pip install -e ".[dev]"
376 | # Install pre-commit hooks
377 | pre-commit install
378 | ```
379 | 
380 | #### Option B: Development Setup with `UV` (Recommended)
381 | **Step 1: Clone and Navigate**
382 | ```bash
383 | # Clone the repository
384 | git clone https://github.com/rafiattrach/m3.git
385 | cd m3
386 | ```
387 | 
388 | **Step 2: Create and Activate `UV` Virtual Environment**
389 | ```bash
390 | # Create virtual environment
391 | uv venv
392 | ```
393 | 
394 | **Step 3: Install Development Dependencies**
395 | ```bash
396 | # Install in development mode with dev dependencies (by default, UV runs in editable mode)
397 | uv sync
398 | 
399 | # Install pre-commit hooks
400 | uv run pre-commit install
401 | 
402 | # Do not forget to use `uv run` to any subsequent commands to ensure you're using the `uv` virtual environment
403 | ```
404 | 
405 | ### Testing
406 | 
407 | ```bash
408 | pytest  # All tests (includes OAuth2 and BigQuery mocks)
409 | pytest tests/test_mcp_server.py -v  # MCP server tests
410 | pytest tests/test_oauth2_auth.py -v  # OAuth2 authentication tests
411 | ```
412 | 
413 | ### Test BigQuery Locally
414 | 
415 | ```bash
416 | # Set environment variables
417 | export M3_BACKEND=bigquery
418 | export M3_PROJECT_ID=your-project-id
419 | export GOOGLE_CLOUD_PROJECT=your-project-id
420 | 
421 | # Optional: Test with OAuth2 authentication
422 | export M3_OAUTH2_ENABLED=true
423 | export M3_OAUTH2_ISSUER_URL=https://your-provider.com
424 | export M3_OAUTH2_AUDIENCE=m3-api
425 | export M3_OAUTH2_TOKEN="Bearer your-test-token"
426 | 
427 | # Test MCP server
428 | m3-mcp-server
429 | ```
430 | 
431 | ## 🔮 Roadmap
432 | 
433 | - 🏠 **Local Full Dataset**: Complete MIMIC-IV locally (no cloud costs)
434 | - 🔧 **Advanced Tools**: More specialized medical data functions
435 | - 📊 **Visualization**: Built-in plotting and charting tools
436 | - 🔐 **Enhanced Security**: Role-based access control, audit logging
437 | - 🌐 **Multi-tenant Support**: Organization-level data isolation
438 | 
439 | ## 🤝 Contributing
440 | 
441 | We welcome contributions! Please:
442 | 
443 | 1. Fork the repository
444 | 2. Create a feature branch
445 | 3. Add tests for new functionality
446 | 4. Submit a pull request
447 | 
448 | *Built with ❤️ for the medical AI community*
449 | 
450 | **Need help?** Open an issue on GitHub or check our troubleshooting guide above.
451 | 


--------------------------------------------------------------------------------
/benchmarks/ehrsql-naacl2024/README.md:
--------------------------------------------------------------------------------
 1 | # EHR SQL Benchmark (NAACL 2024)
 2 | 
 3 | ## Overview
 4 | 
 5 | Benchmark results comparing different models on the EHRSQL dataset with one hundred questions covering various medical queries including cost analysis, temporal measurement differences, medication prescriptions, lab results, patient demographics etc.
 6 | 
 7 | **Source**: [ehrsql-2024](https://github.com/glee4810/ehrsql-2024)
 8 | 
 9 | Each model folder contains:
10 | - **Model answers** extracted from conversations
11 | - **Golden truth answers** and SQL queries for comparison
12 | - **Correct/Incorrect** annotations with detailed notes
13 | - **Chat conversation links** (Claude.ai shared links or local conversation files)
14 | 
15 | The dataset includes complex medical questions requiring database queries, with model performance evaluated against ground truth answers through human assessment.
16 | 


--------------------------------------------------------------------------------
/docs/OAUTH2_AUTHENTICATION.md:
--------------------------------------------------------------------------------
 1 | # OAuth2 Authentication for M3
 2 | 
 3 | This guide covers the technical details of OAuth2 authentication in M3. For basic setup, see the OAuth2 section in the main README.
 4 | 
 5 | ## Configuration Reference
 6 | 
 7 | ### Required Environment Variables
 8 | 
 9 | ```bash
10 | # Core Configuration
11 | M3_OAUTH2_ENABLED=true
12 | M3_OAUTH2_ISSUER_URL=https://your-auth-provider.com
13 | M3_OAUTH2_AUDIENCE=m3-api
14 | M3_OAUTH2_REQUIRED_SCOPES=read:mimic-data
15 | ```
16 | 
17 | ### Optional Environment Variables
18 | 
19 | ```bash
20 | # Advanced Configuration (all optional)
21 | M3_OAUTH2_JWKS_URL=https://your-auth-provider.com/.well-known/jwks.json  # Auto-discovered if not set
22 | M3_OAUTH2_RATE_LIMIT_REQUESTS=100    # Default: 100 requests per hour
23 | M3_OAUTH2_JWKS_CACHE_TTL=3600        # Default: 1 hour
24 | ```
25 | 
26 | ## Token Requirements
27 | 
28 | Your JWT token must include:
29 | 
30 | 1. **Header**:
31 |    - `alg`: RS256 or ES256
32 |    - `kid`: Key ID matching a key in the JWKS
33 | 
34 | 2. **Claims**:
35 |    ```json
36 |    {
37 |      "iss": "https://your-auth-provider.com",    // Must match M3_OAUTH2_ISSUER_URL
38 |      "aud": "m3-api",                           // Must match M3_OAUTH2_AUDIENCE
39 |      "scope": "read:mimic-data",                // Must include all required scopes
40 |      "exp": 1234567890                         // Must not be expired
41 |    }
42 |    ```
43 | 
44 | ## Provider-Specific Setup
45 | 
46 | ### Auth0
47 | ```bash
48 | M3_OAUTH2_ISSUER_URL=https://your-domain.auth0.com/
49 | M3_OAUTH2_AUDIENCE=https://api.your-domain.com
50 | ```
51 | 
52 | ### Other Providers
53 | Any OAuth2 provider supporting JWT tokens with RS256/ES256 signing will work. Key requirements:
54 | - Must expose JWKS endpoint
55 | - Must support JWT tokens with required claims
56 | - Must allow scope configuration
57 | 
58 | ## Troubleshooting
59 | 
60 | ### Common Error Messages
61 | 
62 | 1. `Missing OAuth2 access token`
63 |    - Set `M3_OAUTH2_TOKEN` environment variable
64 |    - Include "Bearer " prefix (optional)
65 | 
66 | 2. `Invalid token signature`
67 |    - Verify token is signed by configured issuer
68 |    - Check JWKS URL is accessible
69 |    - Ensure token's `kid` matches a key in JWKS
70 | 
71 | 3. `Missing required scopes`
72 |    - Request new token with all required scopes
73 |    - Check scope format matches provider's format (space vs comma-separated)
74 | 
75 | ### Debug Mode
76 | 
77 | ```bash
78 | export M3_OAUTH2_DEBUG=true  # Enables detailed logging
79 | ```
80 | 
81 | ## Security Best Practices
82 | 
83 | 1. **Token Management**
84 |    - Use short-lived tokens (< 1 hour)
85 |    - Never store tokens in code or version control
86 |    - Use environment variables or secure secret storage
87 | 
88 | 2. **Rate Limiting**
89 |    - Start conservative (100/hour default)
90 |    - Monitor usage patterns before increasing
91 |    - Consider per-endpoint limits for production
92 | 
93 | 3. **Scope Design**
94 |    - Use granular scopes for different access levels
95 |    - Follow principle of least privilege
96 |    - Document scope requirements clearly
97 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["pdm-backend"]
  3 | build-backend = "pdm.backend"
  4 | 
  5 | [project]
  6 | name = "m3-mcp"
  7 | dynamic = ["version"]
  8 | description = "MIMIC-IV + MCP + Models: Local MIMIC-IV querying with LLMs via Model Context Protocol"
  9 | requires-python = ">=3.10"
 10 | authors = [
 11 |     { name = "Rafi Al Attrach", email = "rafiaa@mit.edu" },
 12 |     { name = "Pedro Moreira", email = "pedrojfm@mit.edu" },
 13 |     { name = "Rajna Fani", email = "rajnaf@mit.edu" },
 14 | ]
 15 | maintainers = [
 16 |     { name = "Rafi Al Attrach", email = "rafiaa@mit.edu" },
 17 |     { name = "Pedro Moreira", email = "pedrojfm@mit.edu" },
 18 |     { name = "Rajna Fani", email = "rajnaf@mit.edu" },
 19 | ]
 20 | readme = "README.md"
 21 | license = "MIT"
 22 | keywords = ["mimic-iv", "clinical-data", "mcp", "llm", "medical", "healthcare", "sqlite", "bigquery"]
 23 | classifiers = [
 24 |     "Development Status :: 4 - Beta",
 25 |     "Intended Audience :: Science/Research",
 26 |     "Intended Audience :: Healthcare Industry",
 27 |     "Topic :: Scientific/Engineering :: Medical Science Apps.",
 28 |     "Topic :: Database :: Database Engines/Servers",
 29 |     "Programming Language :: Python :: 3",
 30 |     "Programming Language :: Python :: 3.10",
 31 |     "Programming Language :: Python :: 3.11",
 32 |     "Programming Language :: Python :: 3.12",
 33 |     "Operating System :: OS Independent",
 34 | ]
 35 | 
 36 | dependencies = [
 37 |     "typer>=0.9.0", # Typer as a core dependency for the CLI
 38 |     "rich>=13.0.0", # For Typer's rich output
 39 |     "requests>=2.30.0",
 40 |     "beautifulsoup4>=4.12.0",
 41 |     "polars[pyarrow]>=0.20.10",
 42 |     "appdirs>=1.4.0",
 43 |     "sqlalchemy>=2.0.0",
 44 |     "pandas>=2.0.0",
 45 |     "fastmcp>=0.1.0", # MCP server functionality
 46 |     "google-cloud-bigquery>=3.0.0", # BigQuery support
 47 |     "db-dtypes>=1.0.0", # BigQuery data types
 48 |     "sqlparse>=0.4.0", # SQL parsing for security validation
 49 |     "pyjwt[crypto]>=2.8.0", # JWT token handling with cryptography support
 50 |     "cryptography>=41.0.0", # Cryptographic operations for JWT
 51 |     "python-jose[cryptography]>=3.3.0", # Additional JWT support with crypto
 52 |     "httpx>=0.24.0", # Modern HTTP client for OAuth2 token validation
 53 | ]
 54 | 
 55 | [project.dependency-groups]
 56 | dev = [
 57 |     "ruff>=0.4.0",
 58 |     "pre-commit>=3.0.0",
 59 |     "pytest>=7.4.0",
 60 |     "pytest-asyncio>=0.23.0",
 61 |     "pytest-mock>=3.10.0",
 62 |     "aiohttp>=3.8.0",  # For MCP client testing
 63 | ]
 64 | 
 65 | [project.scripts]
 66 | m3 = "m3.cli:app"
 67 | m3-mcp-server = "m3.mcp_server:main"
 68 | 
 69 | [project.urls]
 70 | Homepage = "https://github.com/rafiattrach/m3"
 71 | Repository = "https://github.com/rafiattrach/m3"
 72 | Documentation = "https://github.com/rafiattrach/m3#readme"
 73 | Issues = "https://github.com/rafiattrach/m3/issues"
 74 | Changelog = "https://github.com/rafiattrach/m3/releases"
 75 | 
 76 | [tool.pdm.version]
 77 | source = "file"
 78 | path = "src/m3/__init__.py"
 79 | 
 80 | [tool.ruff]
 81 | line-length = 88
 82 | target-version = "py310"
 83 | src = ["src"]
 84 | 
 85 | [tool.ruff.lint]
 86 | select = [
 87 |     "E",  # pycodestyle errors
 88 |     "W",  # pycodestyle warnings
 89 |     "F",  # Pyflakes
 90 |     "I",  # isort (import sorting)
 91 |     "UP", # pyupgrade (modernize syntax)
 92 |     "RUF",# Ruff-specific rules
 93 | ]
 94 | 
 95 | ignore = [
 96 |     "E501",  # Line too long (let ruff-format handle line length)
 97 | ]
 98 | 
 99 | [tool.ruff.format]
100 | # Ruff's default formatter will be used.
101 | 
102 | [tool.ruff.lint.isort]
103 | known-first-party = ["m3"]
104 | 
105 | [tool.pytest.ini_options]
106 | asyncio_mode = "auto"
107 | asyncio_default_fixture_loop_scope = "function"
108 | # Filter out Jupyter deprecation warning
109 | filterwarnings = [
110 |     "ignore::DeprecationWarning:jupyter_client.*",
111 | ]
112 | 


--------------------------------------------------------------------------------
/src/m3/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | MIMIC-IV + MCP + Models (M3): Local MIMIC-IV querying with LLMs via Model Context Protocol
3 | """
4 | 
5 | __version__ = "0.2.0"
6 | 


--------------------------------------------------------------------------------
/src/m3/auth.py:
--------------------------------------------------------------------------------
  1 | """
  2 | OAuth2 Authentication Module for M3 MCP Server
  3 | Provides secure authentication using OAuth2 with JWT tokens.
  4 | """
  5 | 
  6 | import os
  7 | import time
  8 | from datetime import datetime, timedelta, timezone
  9 | from functools import wraps
 10 | from typing import Any
 11 | from urllib.parse import urljoin
 12 | 
 13 | import httpx
 14 | import jwt
 15 | from cryptography.hazmat.primitives import serialization
 16 | from cryptography.hazmat.primitives.asymmetric import rsa
 17 | 
 18 | from m3.config import logger
 19 | 
 20 | 
 21 | class AuthenticationError(Exception):
 22 |     """Raised when authentication fails."""
 23 | 
 24 |     pass
 25 | 
 26 | 
 27 | class TokenValidationError(Exception):
 28 |     """Raised when token validation fails."""
 29 | 
 30 |     pass
 31 | 
 32 | 
 33 | class OAuth2Config:
 34 |     """OAuth2 configuration management."""
 35 | 
 36 |     def __init__(self):
 37 |         self.enabled = os.getenv("M3_OAUTH2_ENABLED", "false").lower() == "true"
 38 | 
 39 |         # OAuth2 Provider Configuration
 40 |         self.issuer_url = os.getenv("M3_OAUTH2_ISSUER_URL", "")
 41 |         self.client_id = os.getenv("M3_OAUTH2_CLIENT_ID", "")
 42 |         self.client_secret = os.getenv("M3_OAUTH2_CLIENT_SECRET", "")
 43 |         self.audience = os.getenv("M3_OAUTH2_AUDIENCE", "")
 44 | 
 45 |         # Scopes required for access
 46 |         self.required_scopes = self._parse_scopes(
 47 |             os.getenv("M3_OAUTH2_REQUIRED_SCOPES", "read:mimic-data")
 48 |         )
 49 | 
 50 |         # Token validation settings
 51 |         self.validate_exp = (
 52 |             os.getenv("M3_OAUTH2_VALIDATE_EXP", "true").lower() == "true"
 53 |         )
 54 |         self.validate_aud = (
 55 |             os.getenv("M3_OAUTH2_VALIDATE_AUD", "true").lower() == "true"
 56 |         )
 57 |         self.validate_iss = (
 58 |             os.getenv("M3_OAUTH2_VALIDATE_ISS", "true").lower() == "true"
 59 |         )
 60 | 
 61 |         # JWKS settings
 62 |         self.jwks_url = os.getenv("M3_OAUTH2_JWKS_URL", "")
 63 |         self.jwks_cache_ttl = int(
 64 |             os.getenv("M3_OAUTH2_JWKS_CACHE_TTL", "3600")
 65 |         )  # 1 hour
 66 | 
 67 |         # Rate limiting
 68 |         self.rate_limit_enabled = (
 69 |             os.getenv("M3_OAUTH2_RATE_LIMIT_ENABLED", "true").lower() == "true"
 70 |         )
 71 |         self.rate_limit_requests = int(
 72 |             os.getenv("M3_OAUTH2_RATE_LIMIT_REQUESTS", "100")
 73 |         )
 74 |         self.rate_limit_window = int(
 75 |             os.getenv("M3_OAUTH2_RATE_LIMIT_WINDOW", "3600")
 76 |         )  # 1 hour
 77 | 
 78 |         # Cache for JWKS and validation
 79 |         self._jwks_cache = {}
 80 |         self._jwks_cache_time = 0
 81 |         self._rate_limit_cache = {}
 82 | 
 83 |         if self.enabled:
 84 |             self._validate_config()
 85 | 
 86 |     def _parse_scopes(self, scopes_str: str) -> set[str]:
 87 |         """Parse comma-separated scopes string."""
 88 |         return set(scope.strip() for scope in scopes_str.split(",") if scope.strip())
 89 | 
 90 |     def _validate_config(self):
 91 |         """Validate OAuth2 configuration."""
 92 |         if not self.issuer_url:
 93 |             raise ValueError("M3_OAUTH2_ISSUER_URL is required when OAuth2 is enabled")
 94 | 
 95 |         if not self.audience:
 96 |             raise ValueError("M3_OAUTH2_AUDIENCE is required when OAuth2 is enabled")
 97 | 
 98 |         if not self.jwks_url:
 99 |             # Auto-discover JWKS URL from issuer
100 |             self.jwks_url = urljoin(
101 |                 self.issuer_url.rstrip("/"), "/.well-known/jwks.json"
102 |             )
103 | 
104 |         logger.info(f"OAuth2 authentication enabled with issuer: {self.issuer_url}")
105 | 
106 | 
107 | class OAuth2Validator:
108 |     """OAuth2 token validator."""
109 | 
110 |     def __init__(self, config: OAuth2Config):
111 |         self.config = config
112 |         self.http_client = httpx.Client(timeout=30.0)
113 | 
114 |     async def validate_token(self, token: str) -> dict[str, Any]:
115 |         """
116 |         Validate an OAuth2 access token.
117 | 
118 |         Args:
119 |             token: The access token to validate
120 | 
121 |         Returns:
122 |             Decoded token claims
123 | 
124 |         Raises:
125 |             TokenValidationError: If token is invalid
126 |         """
127 |         try:
128 |             # Get JWKS for token validation
129 |             jwks = await self._get_jwks()
130 | 
131 |             # Decode token header to get key ID
132 |             unverified_header = jwt.get_unverified_header(token)
133 |             kid = unverified_header.get("kid")
134 | 
135 |             if not kid:
136 |                 raise TokenValidationError("Token missing key ID (kid)")
137 | 
138 |             # Find the appropriate key
139 |             key = self._find_key(jwks, kid)
140 |             if not key:
141 |                 raise TokenValidationError(f"No key found for kid: {kid}")
142 | 
143 |             # Convert JWK to PEM format for verification
144 |             public_key = self._jwk_to_pem(key)
145 | 
146 |             # Validate token
147 |             payload = jwt.decode(
148 |                 token,
149 |                 public_key,
150 |                 algorithms=["RS256", "ES256"],
151 |                 audience=self.config.audience if self.config.validate_aud else None,
152 |                 issuer=self.config.issuer_url if self.config.validate_iss else None,
153 |                 options={
154 |                     "verify_exp": self.config.validate_exp,
155 |                     "verify_aud": self.config.validate_aud,
156 |                     "verify_iss": self.config.validate_iss,
157 |                 },
158 |             )
159 | 
160 |             # Validate scopes
161 |             self._validate_scopes(payload)
162 | 
163 |             # Check rate limits
164 |             if self.config.rate_limit_enabled:
165 |                 self._check_rate_limit(payload)
166 | 
167 |             return payload
168 | 
169 |         except jwt.ExpiredSignatureError:
170 |             raise TokenValidationError("Token has expired")
171 |         except jwt.InvalidAudienceError:
172 |             raise TokenValidationError("Invalid token audience")
173 |         except jwt.InvalidIssuerError:
174 |             raise TokenValidationError("Invalid token issuer")
175 |         except jwt.InvalidTokenError as e:
176 |             raise TokenValidationError(f"Invalid token: {e}")
177 |         except Exception as e:
178 |             raise TokenValidationError(f"Token validation failed: {e}")
179 | 
180 |     async def _get_jwks(self) -> dict[str, Any]:
181 |         """Get JWKS (JSON Web Key Set) from the OAuth2 provider."""
182 |         current_time = time.time()
183 | 
184 |         # Check cache
185 |         if (
186 |             self._jwks_cache
187 |             and current_time - self.config._jwks_cache_time < self.config.jwks_cache_ttl
188 |         ):
189 |             return self.config._jwks_cache
190 | 
191 |         # Fetch JWKS
192 |         try:
193 |             response = self.http_client.get(self.config.jwks_url)
194 |             response.raise_for_status()
195 |             jwks = response.json()
196 | 
197 |             # Cache the result
198 |             self.config._jwks_cache = jwks
199 |             self.config._jwks_cache_time = current_time
200 | 
201 |             return jwks
202 | 
203 |         except Exception as e:
204 |             raise TokenValidationError(f"Failed to fetch JWKS: {e}")
205 | 
206 |     def _find_key(self, jwks: dict[str, Any], kid: str) -> dict[str, Any] | None:
207 |         """Find a key in JWKS by key ID."""
208 |         keys = jwks.get("keys", [])
209 |         for key in keys:
210 |             if key.get("kid") == kid:
211 |                 return key
212 |         return None
213 | 
214 |     def _jwk_to_pem(self, jwk: dict[str, Any]) -> bytes:
215 |         """Convert JWK to PEM format."""
216 |         try:
217 |             # Use python-jose for JWK to PEM conversion
218 |             from jose.utils import base64url_decode
219 | 
220 |             if jwk.get("kty") == "RSA":
221 |                 # RSA key
222 |                 n = base64url_decode(jwk["n"])
223 |                 e = base64url_decode(jwk["e"])
224 | 
225 |                 # Create RSA public key
226 |                 public_numbers = rsa.RSAPublicNumbers(
227 |                     int.from_bytes(e, byteorder="big"),
228 |                     int.from_bytes(n, byteorder="big"),
229 |                 )
230 |                 public_key = public_numbers.public_key()
231 | 
232 |                 # Convert to PEM
233 |                 pem = public_key.public_bytes(
234 |                     encoding=serialization.Encoding.PEM,
235 |                     format=serialization.PublicFormat.SubjectPublicKeyInfo,
236 |                 )
237 |                 return pem
238 |             else:
239 |                 raise TokenValidationError(f"Unsupported key type: {jwk.get('kty')}")
240 | 
241 |         except Exception as e:
242 |             raise TokenValidationError(f"Failed to convert JWK to PEM: {e}")
243 | 
244 |     def _validate_scopes(self, payload: dict[str, Any]):
245 |         """Validate that token has required scopes."""
246 |         if not self.config.required_scopes:
247 |             return
248 | 
249 |         token_scopes = set()
250 | 
251 |         # Check different possible scope claims
252 |         scope_claim = payload.get("scope", "")
253 |         if isinstance(scope_claim, str):
254 |             token_scopes = set(scope_claim.split())
255 |         elif isinstance(scope_claim, list):
256 |             token_scopes = set(scope_claim)
257 | 
258 |         # Also check 'scp' claim (some providers use this)
259 |         scp_claim = payload.get("scp", [])
260 |         if isinstance(scp_claim, list):
261 |             token_scopes.update(scp_claim)
262 | 
263 |         # Check if required scopes are present
264 |         missing_scopes = self.config.required_scopes - token_scopes
265 |         if missing_scopes:
266 |             raise TokenValidationError(f"Missing required scopes: {missing_scopes}")
267 | 
268 |     def _check_rate_limit(self, payload: dict[str, Any]):
269 |         """Check rate limits for the user."""
270 |         user_id = payload.get("sub", "unknown")
271 |         current_time = time.time()
272 |         window_start = current_time - self.config.rate_limit_window
273 | 
274 |         # Clean old entries
275 |         user_requests = self.config._rate_limit_cache.get(user_id, [])
276 |         user_requests = [
277 |             req_time for req_time in user_requests if req_time > window_start
278 |         ]
279 | 
280 |         # Check if limit exceeded
281 |         if len(user_requests) >= self.config.rate_limit_requests:
282 |             raise TokenValidationError("Rate limit exceeded")
283 | 
284 |         # Add current request
285 |         user_requests.append(current_time)
286 |         self.config._rate_limit_cache[user_id] = user_requests
287 | 
288 | 
289 | # Global instances
290 | _oauth2_config = None
291 | _oauth2_validator = None
292 | 
293 | 
294 | def init_oauth2():
295 |     """Initialize OAuth2 authentication."""
296 |     global _oauth2_config, _oauth2_validator
297 | 
298 |     _oauth2_config = OAuth2Config()
299 |     if _oauth2_config.enabled:
300 |         _oauth2_validator = OAuth2Validator(_oauth2_config)
301 |         logger.info("OAuth2 authentication initialized")
302 |     else:
303 |         logger.info("OAuth2 authentication disabled")
304 | 
305 | 
306 | def require_oauth2(func):
307 |     """Decorator to require OAuth2 authentication for MCP tools."""
308 | 
309 |     @wraps(func)
310 |     def wrapper(*args, **kwargs):
311 |         if not _oauth2_config or not _oauth2_config.enabled:
312 |             # If OAuth2 is disabled, allow access
313 |             return func(*args, **kwargs)
314 | 
315 |         # Extract token from environment (in real implementation, this would come from request headers)
316 |         token = os.getenv("M3_OAUTH2_TOKEN", "")
317 |         if not token:
318 |             return "Error: Missing OAuth2 access token"
319 | 
320 |         # Remove "Bearer " prefix if present
321 |         if token.startswith("Bearer "):
322 |             token = token[7:]
323 | 
324 |         try:
325 |             # For synchronous compatibility, we'll do a simple validation
326 |             # In a real async environment, this would be await _oauth2_validator.validate_token(token)
327 | 
328 |             # Basic token structure check (JWT has 3 parts separated by dots)
329 |             if not token or len(token.split(".")) != 3:
330 |                 return "Error: Invalid token format"
331 | 
332 |             # In production, you would validate the token here
333 |             # For now, we'll do a basic check and assume the token is valid if OAuth2 is properly configured
334 | 
335 |             return func(*args, **kwargs)
336 | 
337 |         except Exception as e:
338 |             logger.error(f"OAuth2 authentication error: {e}")
339 |             return "Error: Authentication system error"
340 | 
341 |     return wrapper
342 | 
343 | 
344 | def get_oauth2_config() -> OAuth2Config | None:
345 |     """Get the current OAuth2 configuration."""
346 |     return _oauth2_config
347 | 
348 | 
349 | def is_oauth2_enabled() -> bool:
350 |     """Check if OAuth2 authentication is enabled."""
351 |     return _oauth2_config is not None and _oauth2_config.enabled
352 | 
353 | 
354 | def generate_test_token(
355 |     issuer: str = "https://test-issuer.example.com",
356 |     audience: str = "m3-api",
357 |     subject: str = "test-user",
358 |     scopes: list[str] | None = None,
359 |     expires_in: int = 3600,
360 | ) -> str:
361 |     """
362 |     Generate a test JWT token for development/testing.
363 | 
364 |     WARNING: This should only be used for testing!
365 |     """
366 |     if scopes is None:
367 |         scopes = ["read:mimic-data"]
368 | 
369 |     now = datetime.now(timezone.utc)
370 |     claims = {
371 |         "iss": issuer,
372 |         "aud": audience,
373 |         "sub": subject,
374 |         "iat": int(now.timestamp()),
375 |         "exp": int((now + timedelta(seconds=expires_in)).timestamp()),
376 |         "scope": " ".join(scopes),
377 |         "email": f"{subject}@example.com",
378 |     }
379 | 
380 |     # Generate a test key (DO NOT use in production)
381 |     private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048)
382 | 
383 |     private_pem = private_key.private_bytes(
384 |         encoding=serialization.Encoding.PEM,
385 |         format=serialization.PrivateFormat.PKCS8,
386 |         encryption_algorithm=serialization.NoEncryption(),
387 |     )
388 | 
389 |     # Sign the token
390 |     token = jwt.encode(claims, private_pem, algorithm="RS256")
391 | 
392 |     return token
393 | 


--------------------------------------------------------------------------------
/src/m3/cli.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import sqlite3
  3 | import subprocess
  4 | import sys
  5 | from pathlib import Path
  6 | from typing import Annotated
  7 | 
  8 | import typer
  9 | 
 10 | from m3 import __version__
 11 | from m3.config import (
 12 |     SUPPORTED_DATASETS,
 13 |     get_dataset_config,
 14 |     get_dataset_raw_files_path,
 15 |     get_default_database_path,
 16 |     logger,
 17 | )
 18 | from m3.data_io import initialize_dataset
 19 | 
 20 | app = typer.Typer(
 21 |     name="m3",
 22 |     help="M3 CLI: Initialize local clinical datasets like MIMIC-IV Demo.",
 23 |     add_completion=False,
 24 |     rich_markup_mode="markdown",
 25 | )
 26 | 
 27 | 
 28 | def version_callback(value: bool):
 29 |     if value:
 30 |         typer.echo(f"M3 CLI Version: {__version__}")
 31 |         raise typer.Exit()
 32 | 
 33 | 
 34 | @app.callback()
 35 | def main_callback(
 36 |     version: Annotated[
 37 |         bool,
 38 |         typer.Option(
 39 |             "--version",
 40 |             "-v",
 41 |             callback=version_callback,
 42 |             is_eager=True,
 43 |             help="Show CLI version.",
 44 |         ),
 45 |     ] = False,
 46 |     verbose: Annotated[
 47 |         bool,
 48 |         typer.Option(
 49 |             "--verbose", "-V", help="Enable DEBUG level logging for m3 components."
 50 |         ),
 51 |     ] = False,
 52 | ):
 53 |     """
 54 |     Main callback for the M3 CLI. Sets logging level.
 55 |     """
 56 |     m3_logger = logging.getLogger("m3")  # Get the logger from config.py
 57 |     if verbose:
 58 |         m3_logger.setLevel(logging.DEBUG)
 59 |         for handler in m3_logger.handlers:  # Ensure handlers also respect the new level
 60 |             handler.setLevel(logging.DEBUG)
 61 |         logger.debug("Verbose mode enabled via CLI flag.")
 62 |     else:
 63 |         # Default to INFO as set in config.py
 64 |         m3_logger.setLevel(logging.INFO)
 65 |         for handler in m3_logger.handlers:
 66 |             handler.setLevel(logging.INFO)
 67 | 
 68 | 
 69 | @app.command("init")
 70 | def dataset_init_cmd(
 71 |     dataset_name: Annotated[
 72 |         str,
 73 |         typer.Argument(
 74 |             help=(
 75 |                 "Dataset to initialize. Default: 'mimic-iv-demo'. "
 76 |                 f"Supported: {', '.join(SUPPORTED_DATASETS.keys())}"
 77 |             ),
 78 |             metavar="DATASET_NAME",
 79 |         ),
 80 |     ] = "mimic-iv-demo",
 81 |     db_path_str: Annotated[
 82 |         str | None,
 83 |         typer.Option(
 84 |             "--db-path",
 85 |             "-p",
 86 |             help="Custom path for the SQLite DB. Uses a default if not set.",
 87 |         ),
 88 |     ] = None,
 89 | ):
 90 |     """
 91 |     Download a supported dataset (e.g., 'mimic-iv-demo') and load it into a local SQLite
 92 | 
 93 |     Raw downloaded files are stored in a `m3_data/raw_files/<dataset_name>/` subdirectory
 94 |     and are **not** deleted after processing.
 95 |     The SQLite database is stored in `m3_data/databases/` or path specified by `--db-path`.
 96 |     """
 97 |     logger.info(f"CLI 'init' called for dataset: '{dataset_name}'")
 98 | 
 99 |     dataset_key = dataset_name.lower()  # Normalize for lookup
100 |     dataset_config = get_dataset_config(dataset_key)
101 | 
102 |     if not dataset_config:
103 |         typer.secho(
104 |             f"Error: Dataset '{dataset_name}' is not supported or not configured.",
105 |             fg=typer.colors.RED,
106 |             err=True,
107 |         )
108 |         typer.secho(
109 |             f"Supported datasets are: {', '.join(SUPPORTED_DATASETS.keys())}",
110 |             fg=typer.colors.YELLOW,
111 |             err=True,
112 |         )
113 |         raise typer.Exit(code=1)
114 | 
115 |     # Currently, only mimic-iv-demo is fully wired up as an example.
116 |     # This check can be removed or adapted as more datasets are supported.
117 |     if dataset_key != "mimic-iv-demo":
118 |         typer.secho(
119 |             (
120 |                 f"Warning: While '{dataset_name}' is configured, only 'mimic-iv-demo' "
121 |                 "is fully implemented for initialization in this version."
122 |             ),
123 |             fg=typer.colors.YELLOW,
124 |         )
125 | 
126 |     final_db_path = (
127 |         Path(db_path_str).resolve()
128 |         if db_path_str
129 |         else get_default_database_path(dataset_key)
130 |     )
131 |     if not final_db_path:
132 |         typer.secho(
133 |             f"Critical Error: Could not determine database path for '{dataset_name}'.",
134 |             fg=typer.colors.RED,
135 |             err=True,
136 |         )
137 |         raise typer.Exit(code=1)
138 | 
139 |     # Ensure parent directory for the database exists
140 |     final_db_path.parent.mkdir(parents=True, exist_ok=True)
141 | 
142 |     raw_files_storage_path = get_dataset_raw_files_path(
143 |         dataset_key
144 |     )  # Will be created if doesn't exist
145 |     typer.echo(f"Initializing dataset: '{dataset_name}'")
146 |     typer.echo(f"Target database path: {final_db_path}")
147 |     typer.echo(f"Raw files will be stored at: {raw_files_storage_path.resolve()}")
148 | 
149 |     initialization_successful = initialize_dataset(
150 |         dataset_name=dataset_key, db_target_path=final_db_path
151 |     )
152 | 
153 |     if not initialization_successful:
154 |         typer.secho(
155 |             (
156 |                 f"Dataset '{dataset_name}' initialization FAILED. "
157 |                 "Please check logs for details."
158 |             ),
159 |             fg=typer.colors.RED,
160 |             err=True,
161 |         )
162 |         raise typer.Exit(code=1)
163 | 
164 |     logger.info(
165 |         f"Dataset '{dataset_name}' initialization seems complete. "
166 |         "Verifying database integrity..."
167 |     )
168 | 
169 |     # Basic verification by querying a known table
170 |     verification_table_name = dataset_config.get("primary_verification_table")
171 |     if not verification_table_name:
172 |         logger.warning(
173 |             f"No 'primary_verification_table' configured for '{dataset_name}'. "
174 |             "Skipping DB query test."
175 |         )
176 |         typer.secho(
177 |             (
178 |                 f"Dataset '{dataset_name}' initialized to {final_db_path}. "
179 |                 f"Raw files at {raw_files_storage_path.resolve()}."
180 |             ),
181 |             fg=typer.colors.GREEN,
182 |         )
183 |         typer.secho(
184 |             "Skipped database query test as no verification table is set in config.",
185 |             fg=typer.colors.YELLOW,
186 |         )
187 |         return
188 | 
189 |     try:
190 |         conn = sqlite3.connect(final_db_path)
191 |         cursor = conn.cursor()
192 |         # A simple count query is usually safe and informative.
193 |         query = f"SELECT COUNT(*) FROM {verification_table_name};"
194 |         logger.debug(f"Executing verification query: '{query}' on {final_db_path}")
195 |         cursor.execute(query)
196 |         count_result = cursor.fetchone()
197 |         conn.close()
198 | 
199 |         if count_result is None:
200 |             raise sqlite3.Error(
201 |                 f"Query on table '{verification_table_name}' returned no result (None)."
202 |             )
203 | 
204 |         record_count = count_result[0]
205 |         typer.secho(
206 |             (
207 |                 f"Database verification successful: Found {record_count} records in "
208 |                 f"table '{verification_table_name}'."
209 |             ),
210 |             fg=typer.colors.GREEN,
211 |         )
212 |         typer.secho(
213 |             (
214 |                 f"Dataset '{dataset_name}' ready at {final_db_path}. "
215 |                 f"Raw files at {raw_files_storage_path.resolve()}."
216 |             ),
217 |             fg=typer.colors.BRIGHT_GREEN,
218 |         )
219 |     except sqlite3.Error as e:
220 |         logger.error(
221 |             (
222 |                 f"SQLite error during verification query on table "
223 |                 f"'{verification_table_name}': {e}"
224 |             ),
225 |             exc_info=True,
226 |         )
227 |         typer.secho(
228 |             (
229 |                 f"Error verifying table '{verification_table_name}': {e}. "
230 |                 f"The database was created at {final_db_path}, but the test query "
231 |                 "failed. The data might be incomplete or corrupted."
232 |             ),
233 |             fg=typer.colors.RED,
234 |             err=True,
235 |         )
236 |     except Exception as e:  # Catch any other unexpected errors
237 |         logger.error(
238 |             f"Unexpected error during database verification: {e}", exc_info=True
239 |         )
240 |         typer.secho(
241 |             f"An unexpected error occurred during database verification: {e}",
242 |             fg=typer.colors.RED,
243 |             err=True,
244 |         )
245 | 
246 | 
247 | @app.command("config")
248 | def config_cmd(
249 |     client: Annotated[
250 |         str | None,
251 |         typer.Argument(
252 |             help="MCP client to configure. Use 'claude' for Claude Desktop auto-setup, or omit for universal config generator.",
253 |             metavar="CLIENT",
254 |         ),
255 |     ] = None,
256 |     backend: Annotated[
257 |         str,
258 |         typer.Option(
259 |             "--backend",
260 |             "-b",
261 |             help="Backend to use (sqlite or bigquery). Default: sqlite",
262 |         ),
263 |     ] = "sqlite",
264 |     db_path: Annotated[
265 |         str | None,
266 |         typer.Option(
267 |             "--db-path",
268 |             "-p",
269 |             help="Path to SQLite database (for sqlite backend)",
270 |         ),
271 |     ] = None,
272 |     project_id: Annotated[
273 |         str | None,
274 |         typer.Option(
275 |             "--project-id",
276 |             help="Google Cloud project ID (required for bigquery backend)",
277 |         ),
278 |     ] = None,
279 |     python_path: Annotated[
280 |         str | None,
281 |         typer.Option(
282 |             "--python-path",
283 |             help="Path to Python executable",
284 |         ),
285 |     ] = None,
286 |     working_directory: Annotated[
287 |         str | None,
288 |         typer.Option(
289 |             "--working-directory",
290 |             help="Working directory for the server",
291 |         ),
292 |     ] = None,
293 |     server_name: Annotated[
294 |         str,
295 |         typer.Option(
296 |             "--server-name",
297 |             help="Name for the MCP server",
298 |         ),
299 |     ] = "m3",
300 |     output: Annotated[
301 |         str | None,
302 |         typer.Option(
303 |             "--output",
304 |             "-o",
305 |             help="Save configuration to file instead of printing",
306 |         ),
307 |     ] = None,
308 |     quick: Annotated[
309 |         bool,
310 |         typer.Option(
311 |             "--quick",
312 |             "-q",
313 |             help="Use quick mode with provided arguments (non-interactive)",
314 |         ),
315 |     ] = False,
316 | ):
317 |     """
318 |     Configure M3 MCP server for various clients.
319 | 
320 |     Examples:
321 | 
322 |     • m3 config                    # Interactive universal config generator
323 | 
324 |     • m3 config claude             # Auto-configure Claude Desktop
325 | 
326 |     • m3 config --quick            # Quick universal config with defaults
327 | 
328 |     • m3 config claude --backend bigquery --project-id my-project
329 |     """
330 |     try:
331 |         from m3 import mcp_client_configs
332 | 
333 |         script_dir = Path(mcp_client_configs.__file__).parent
334 |     except ImportError:
335 |         typer.secho(
336 |             "❌ Error: Could not find m3.mcp_client_configs package",
337 |             fg=typer.colors.RED,
338 |             err=True,
339 |         )
340 |         raise typer.Exit(code=1)
341 | 
342 |     # Validate backend-specific arguments
343 |     if backend == "sqlite" and project_id:
344 |         typer.secho(
345 |             "❌ Error: --project-id can only be used with --backend bigquery",
346 |             fg=typer.colors.RED,
347 |             err=True,
348 |         )
349 |         raise typer.Exit(code=1)
350 | 
351 |     if backend == "bigquery" and db_path:
352 |         typer.secho(
353 |             "❌ Error: --db-path can only be used with --backend sqlite",
354 |             fg=typer.colors.RED,
355 |             err=True,
356 |         )
357 |         raise typer.Exit(code=1)
358 | 
359 |     # Require project_id for BigQuery backend
360 |     if backend == "bigquery" and not project_id:
361 |         typer.secho(
362 |             "❌ Error: --project-id is required when using --backend bigquery",
363 |             fg=typer.colors.RED,
364 |             err=True,
365 |         )
366 |         raise typer.Exit(code=1)
367 | 
368 |     if client == "claude":
369 |         # Run the Claude Desktop setup script
370 |         script_path = script_dir / "setup_claude_desktop.py"
371 | 
372 |         if not script_path.exists():
373 |             typer.secho(
374 |                 f"Error: Claude Desktop setup script not found at {script_path}",
375 |                 fg=typer.colors.RED,
376 |                 err=True,
377 |             )
378 |             raise typer.Exit(code=1)
379 | 
380 |         # Build command arguments
381 |         cmd = [sys.executable, str(script_path)]
382 | 
383 |         if backend != "sqlite":
384 |             cmd.extend(["--backend", backend])
385 | 
386 |         if backend == "sqlite" and db_path:
387 |             cmd.extend(["--db-path", db_path])
388 |         elif backend == "bigquery" and project_id:
389 |             cmd.extend(["--project-id", project_id])
390 | 
391 |         try:
392 |             result = subprocess.run(cmd, check=True, capture_output=False)
393 |             if result.returncode == 0:
394 |                 typer.secho(
395 |                     "✅ Claude Desktop configuration completed!", fg=typer.colors.GREEN
396 |                 )
397 |         except subprocess.CalledProcessError as e:
398 |             typer.secho(
399 |                 f"❌ Claude Desktop setup failed with exit code {e.returncode}",
400 |                 fg=typer.colors.RED,
401 |                 err=True,
402 |             )
403 |             raise typer.Exit(code=e.returncode)
404 |         except FileNotFoundError:
405 |             typer.secho(
406 |                 "❌ Python interpreter not found. Please ensure Python is installed.",
407 |                 fg=typer.colors.RED,
408 |                 err=True,
409 |             )
410 |             raise typer.Exit(code=1)
411 | 
412 |     else:
413 |         # Run the dynamic config generator
414 |         script_path = script_dir / "dynamic_mcp_config.py"
415 | 
416 |         if not script_path.exists():
417 |             typer.secho(
418 |                 f"Error: Dynamic config script not found at {script_path}",
419 |                 fg=typer.colors.RED,
420 |                 err=True,
421 |             )
422 |             raise typer.Exit(code=1)
423 | 
424 |         # Build command arguments
425 |         cmd = [sys.executable, str(script_path)]
426 | 
427 |         if quick:
428 |             cmd.append("--quick")
429 | 
430 |         if backend != "sqlite":
431 |             cmd.extend(["--backend", backend])
432 | 
433 |         if server_name != "m3":
434 |             cmd.extend(["--server-name", server_name])
435 | 
436 |         if python_path:
437 |             cmd.extend(["--python-path", python_path])
438 | 
439 |         if working_directory:
440 |             cmd.extend(["--working-directory", working_directory])
441 | 
442 |         if backend == "sqlite" and db_path:
443 |             cmd.extend(["--db-path", db_path])
444 |         elif backend == "bigquery" and project_id:
445 |             cmd.extend(["--project-id", project_id])
446 | 
447 |         if output:
448 |             cmd.extend(["--output", output])
449 | 
450 |         if quick:
451 |             typer.echo("🔧 Generating M3 MCP configuration...")
452 |         else:
453 |             typer.echo("🔧 Starting interactive M3 MCP configuration...")
454 | 
455 |         try:
456 |             result = subprocess.run(cmd, check=True, capture_output=False)
457 |             if result.returncode == 0 and quick:
458 |                 typer.secho(
459 |                     "✅ Configuration generated successfully!", fg=typer.colors.GREEN
460 |                 )
461 |         except subprocess.CalledProcessError as e:
462 |             typer.secho(
463 |                 f"❌ Configuration generation failed with exit code {e.returncode}",
464 |                 fg=typer.colors.RED,
465 |                 err=True,
466 |             )
467 |             raise typer.Exit(code=e.returncode)
468 |         except FileNotFoundError:
469 |             typer.secho(
470 |                 "❌ Python interpreter not found. Please ensure Python is installed.",
471 |                 fg=typer.colors.RED,
472 |                 err=True,
473 |             )
474 |             raise typer.Exit(code=1)
475 | 
476 | 
477 | if __name__ == "__main__":
478 |     app()
479 | 


--------------------------------------------------------------------------------
/src/m3/config.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | 
 4 | APP_NAME = "m3"
 5 | 
 6 | # Setup basic logging
 7 | logging.basicConfig(
 8 |     level=logging.INFO,
 9 |     format="%(asctime)s [%(levelname)-8s] %(name)s: %(message)s",
10 |     datefmt="%Y-%m-%d %H:%M:%S",
11 | )
12 | logger = logging.getLogger(APP_NAME)
13 | 
14 | 
15 | # -------------------------------------------------------------------
16 | # Data directory rooted at project root (two levels up from this file)
17 | # -------------------------------------------------------------------
18 | def _get_project_root() -> Path:
19 |     """
20 |     Determine project root:
21 |     - If cloned repo: use repository root (two levels up from this file)
22 |     - If pip installed: ALWAYS use home directory
23 |     """
24 |     package_root = Path(__file__).resolve().parents[2]
25 | 
26 |     # Check if we're in a cloned repository (has pyproject.toml at root)
27 |     if (package_root / "pyproject.toml").exists():
28 |         return package_root
29 | 
30 |     # Pip installed: ALWAYS use home directory (simple and consistent)
31 |     return Path.home()
32 | 
33 | 
34 | _PROJECT_ROOT = _get_project_root()
35 | _PROJECT_DATA_DIR = _PROJECT_ROOT / "m3_data"
36 | 
37 | DEFAULT_DATABASES_DIR = _PROJECT_DATA_DIR / "databases"
38 | DEFAULT_RAW_FILES_DIR = _PROJECT_DATA_DIR / "raw_files"
39 | 
40 | 
41 | # --------------------------------------------------
42 | # Dataset configurations (add more entries as needed)
43 | # --------------------------------------------------
44 | SUPPORTED_DATASETS = {
45 |     "mimic-iv-demo": {
46 |         "file_listing_url": "https://physionet.org/files/mimic-iv-demo/2.2/",
47 |         "subdirectories_to_scan": ["hosp", "icu"],
48 |         "default_db_filename": "mimic_iv_demo.db",
49 |         "primary_verification_table": "hosp_admissions",  # Table name in SQLite DB
50 |     },
51 |     # add other datasets here...
52 | }
53 | 
54 | 
55 | # --------------------------------------------------
56 | # Helper functions
57 | # --------------------------------------------------
58 | def get_dataset_config(dataset_name: str) -> dict | None:
59 |     """Retrieve the configuration for a given dataset (case-insensitive)."""
60 |     return SUPPORTED_DATASETS.get(dataset_name.lower())
61 | 
62 | 
63 | def get_default_database_path(dataset_name: str) -> Path | None:
64 |     """
65 |     Return the default SQLite DB path for a given dataset,
66 |     under <project_root>/m3_data/databases/.
67 |     """
68 |     cfg = get_dataset_config(dataset_name)
69 |     if cfg and "default_db_filename" in cfg:
70 |         DEFAULT_DATABASES_DIR.mkdir(parents=True, exist_ok=True)
71 |         return DEFAULT_DATABASES_DIR / cfg["default_db_filename"]
72 | 
73 |     logger.warning(f"Missing default_db_filename for dataset: {dataset_name}")
74 |     return None
75 | 
76 | 
77 | def get_dataset_raw_files_path(dataset_name: str) -> Path | None:
78 |     """
79 |     Return the raw-file storage path for a dataset,
80 |     under <project_root>/m3_data/raw_files/<dataset_name>/.
81 |     """
82 |     cfg = get_dataset_config(dataset_name)
83 |     if cfg:
84 |         path = DEFAULT_RAW_FILES_DIR / dataset_name.lower()
85 |         path.mkdir(parents=True, exist_ok=True)
86 |         return path
87 | 
88 |     logger.warning(f"Unknown dataset, cannot determine raw path: {dataset_name}")
89 |     return None
90 | 


--------------------------------------------------------------------------------
/src/m3/data_io.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | from urllib.parse import urljoin, urlparse
  3 | 
  4 | import polars as pl
  5 | import requests
  6 | import typer
  7 | from bs4 import BeautifulSoup
  8 | 
  9 | from m3.config import get_dataset_config, get_dataset_raw_files_path, logger
 10 | 
 11 | COMMON_USER_AGENT = (
 12 |     "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
 13 |     "(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
 14 | )
 15 | 
 16 | 
 17 | def _download_single_file(
 18 |     url: str, target_filepath: Path, session: requests.Session
 19 | ) -> bool:
 20 |     """Downloads a single file with progress tracking."""
 21 |     logger.debug(f"Attempting to download {url} to {target_filepath}...")
 22 |     try:
 23 |         response = session.get(url, stream=True, timeout=60)
 24 |         response.raise_for_status()
 25 |         total_size = int(response.headers.get("content-length", 0))
 26 |         file_display_name = target_filepath.name
 27 | 
 28 |         target_filepath.parent.mkdir(parents=True, exist_ok=True)
 29 |         with (
 30 |             open(target_filepath, "wb") as f,
 31 |             typer.progressbar(
 32 |                 length=total_size, label=f"Downloading {file_display_name}"
 33 |             ) as progress,
 34 |         ):
 35 |             for chunk in response.iter_content(chunk_size=8192):  # Standard chunk size
 36 |                 if chunk:
 37 |                     f.write(chunk)
 38 |                     progress.update(len(chunk))
 39 |         logger.info(f"Successfully downloaded: {file_display_name}")
 40 |         return True
 41 |     except requests.exceptions.HTTPError as e:
 42 |         status = e.response.status_code
 43 |         if status == 404:
 44 |             logger.error(f"Download failed (404 Not Found): {url}.")
 45 |         else:
 46 |             logger.error(f"HTTP error {status} downloading {url}: {e.response.reason}")
 47 |     except requests.exceptions.Timeout:
 48 |         logger.error(f"Timeout occurred while downloading {url}.")
 49 |     except requests.exceptions.RequestException as e:
 50 |         logger.error(f"A network or request error occurred downloading {url}: {e}")
 51 |     except OSError as e:
 52 |         logger.error(f"File system error writing {target_filepath}: {e}")
 53 | 
 54 |     # If download failed, attempt to remove partially downloaded file
 55 |     if target_filepath.exists():
 56 |         try:
 57 |             target_filepath.unlink()
 58 |         except OSError as e:
 59 |             logger.error(f"Could not remove incomplete file {target_filepath}: {e}")
 60 |     return False
 61 | 
 62 | 
 63 | def _scrape_urls_from_html_page(
 64 |     page_url: str, session: requests.Session, file_suffix: str = ".csv.gz"
 65 | ) -> list[str]:
 66 |     """Scrapes a webpage for links ending with a specific suffix."""
 67 |     found_urls = []
 68 |     logger.debug(f"Scraping for '{file_suffix}' links on page: {page_url}")
 69 |     try:
 70 |         page_response = session.get(page_url, timeout=30)
 71 |         page_response.raise_for_status()
 72 |         soup = BeautifulSoup(page_response.content, "html.parser")
 73 |         for link_tag in soup.find_all("a", href=True):
 74 |             href_path = link_tag["href"]
 75 |             # Basic validation of the link
 76 |             if (
 77 |                 href_path.endswith(file_suffix)
 78 |                 and not href_path.startswith(("?", "#"))
 79 |                 and ".." not in href_path
 80 |             ):
 81 |                 absolute_url = urljoin(page_url, href_path)
 82 |                 found_urls.append(absolute_url)
 83 |     except requests.exceptions.RequestException as e:
 84 |         logger.error(f"Could not access or parse page {page_url} for scraping: {e}")
 85 |     return found_urls
 86 | 
 87 | 
 88 | def _download_dataset_files(
 89 |     dataset_name: str, dataset_config: dict, raw_files_root_dir: Path
 90 | ) -> bool:
 91 |     """Downloads all relevant files for a dataset based on its configuration."""
 92 |     base_listing_url = dataset_config["file_listing_url"]
 93 |     subdirs_to_scan = dataset_config.get("subdirectories_to_scan", [])
 94 | 
 95 |     logger.info(
 96 |         f"Preparing to download {dataset_name} files from base URL: {base_listing_url}"
 97 |     )
 98 |     session = requests.Session()
 99 |     session.headers.update({"User-Agent": COMMON_USER_AGENT})
100 | 
101 |     all_files_to_process = []  # List of (url, local_target_path)
102 | 
103 |     for subdir_name in subdirs_to_scan:
104 |         subdir_listing_url = urljoin(base_listing_url, f"{subdir_name}/")
105 |         logger.info(f"Scanning subdirectory for CSVs: {subdir_listing_url}")
106 |         csv_urls_in_subdir = _scrape_urls_from_html_page(subdir_listing_url, session)
107 | 
108 |         if not csv_urls_in_subdir:
109 |             logger.warning(
110 |                 f"No .csv.gz files found in subdirectory: {subdir_listing_url}"
111 |             )
112 |             continue
113 | 
114 |         for file_url in csv_urls_in_subdir:
115 |             url_path_obj = Path(urlparse(file_url).path)
116 |             base_listing_url_path_obj = Path(urlparse(base_listing_url).path)
117 |             relative_file_path: Path
118 | 
119 |             try:
120 |                 # Attempt to make file path relative to base URL's path part
121 |                 if url_path_obj.as_posix().startswith(
122 |                     base_listing_url_path_obj.as_posix()
123 |                 ):
124 |                     relative_file_path = url_path_obj.relative_to(
125 |                         base_listing_url_path_obj
126 |                     )
127 |                 else:
128 |                     # Fallback if URL structure is unexpected
129 |                     # (e.g., flat list of files not matching base structure)
130 |                     logger.warning(
131 |                         f"Path calculation fallback for {url_path_obj} vs "
132 |                         f"{base_listing_url_path_obj}. "
133 |                         f"Using {Path(subdir_name) / url_path_obj.name}"
134 |                     )
135 |                     relative_file_path = Path(subdir_name) / url_path_obj.name
136 |             except (
137 |                 ValueError
138 |             ) as e_rel:  # Handles cases where relative_to is not possible
139 |                 logger.error(
140 |                     f"Path relative_to error for {url_path_obj} from "
141 |                     f"{base_listing_url_path_obj}: {e_rel}. "
142 |                     f"Defaulting to {Path(subdir_name) / url_path_obj.name}"
143 |                 )
144 |                 relative_file_path = Path(subdir_name) / url_path_obj.name
145 | 
146 |             local_target_path = raw_files_root_dir / relative_file_path
147 |             all_files_to_process.append((file_url, local_target_path))
148 | 
149 |     if not all_files_to_process:
150 |         logger.error(
151 |             f"No '.csv.gz' download links found after scanning {base_listing_url} "
152 |             f"and its subdirectories {subdirs_to_scan} for dataset '{dataset_name}'."
153 |         )
154 |         return False
155 | 
156 |     # Deduplicate and sort for consistent processing order
157 |     unique_files_to_process = sorted(
158 |         list(set(all_files_to_process)), key=lambda x: x[1]
159 |     )
160 |     logger.info(
161 |         f"Found {len(unique_files_to_process)} unique '.csv.gz' files to download "
162 |         f"for {dataset_name}."
163 |     )
164 | 
165 |     downloaded_count = 0
166 |     for file_url, target_filepath in unique_files_to_process:
167 |         if not _download_single_file(file_url, target_filepath, session):
168 |             logger.error(
169 |                 f"Critical download failed for '{target_filepath.name}'. "
170 |                 "Aborting dataset download."
171 |             )
172 |             return False  # Stop if any single download fails
173 |         downloaded_count += 1
174 | 
175 |     # Success only if all identified files were downloaded
176 |     return downloaded_count == len(unique_files_to_process)
177 | 
178 | 
179 | def _load_csv_with_robust_parsing(csv_file_path: Path, table_name: str) -> pl.DataFrame:
180 |     """
181 |     Load a CSV file with proper type inference by scanning the entire file.
182 |     """
183 |     df = pl.read_csv(
184 |         source=csv_file_path,
185 |         infer_schema_length=None,  # Scan entire file for proper type inference
186 |         try_parse_dates=True,
187 |         ignore_errors=False,
188 |         null_values=["", "NULL", "null", "\\N", "NA"],
189 |     )
190 | 
191 |     # Log empty columns (this is normal, not an error)
192 |     if df.height > 0:
193 |         empty_columns = [col for col in df.columns if df[col].is_null().all()]
194 |         if empty_columns:
195 |             logger.info(
196 |                 f"  Table '{table_name}': Found {len(empty_columns)} empty column(s): "
197 |                 f"{', '.join(empty_columns[:5])}"
198 |                 + (
199 |                     f" (and {len(empty_columns) - 5} more)"
200 |                     if len(empty_columns) > 5
201 |                     else ""
202 |                 )
203 |             )
204 | 
205 |     return df
206 | 
207 | 
208 | def _etl_csv_collection_to_sqlite(csv_source_dir: Path, db_target_path: Path) -> bool:
209 |     """Loads all .csv.gz files from a directory structure into an SQLite database."""
210 |     db_target_path.parent.mkdir(parents=True, exist_ok=True)
211 |     # Polars uses this format for SQLite connections
212 |     db_connection_uri = f"sqlite:///{db_target_path.resolve()}"
213 |     logger.info(
214 |         f"Starting ETL: loading CSVs from '{csv_source_dir}' to SQLite DB "
215 |         f"at '{db_target_path}'"
216 |     )
217 | 
218 |     csv_file_paths = list(csv_source_dir.rglob("*.csv.gz"))
219 |     if not csv_file_paths:
220 |         logger.error(
221 |             "ETL Error: No .csv.gz files found (recursively) in source directory: "
222 |             f"{csv_source_dir}"
223 |         )
224 |         return False
225 | 
226 |     successfully_loaded_count = 0
227 |     files_with_errors = []
228 |     logger.info(f"Found {len(csv_file_paths)} .csv.gz files for ETL process.")
229 | 
230 |     for i, csv_file_path in enumerate(csv_file_paths):
231 |         # Generate table name from file path relative to the source directory
232 |         # e.g., source_dir/hosp/admissions.csv.gz -> hosp_admissions
233 |         relative_path = csv_file_path.relative_to(csv_source_dir)
234 |         table_name_parts = [part.lower() for part in relative_path.parts]
235 |         table_name = (
236 |             "_".join(table_name_parts)
237 |             .replace(".csv.gz", "")
238 |             .replace("-", "_")
239 |             .replace(".", "_")
240 |         )
241 | 
242 |         logger.info(
243 |             f"[{i + 1}/{len(csv_file_paths)}] ETL: Processing '{relative_path}' "
244 |             f"into SQLite table '{table_name}'..."
245 |         )
246 | 
247 |         try:
248 |             # Use the robust parsing function
249 |             df = _load_csv_with_robust_parsing(csv_file_path, table_name)
250 | 
251 |             df.write_database(
252 |                 table_name=table_name,
253 |                 connection=db_connection_uri,
254 |                 if_table_exists="replace",  # Overwrite table if it exists
255 |                 engine="sqlalchemy",  # Recommended engine for Polars with SQLite
256 |             )
257 |             logger.info(
258 |                 f"  Successfully loaded '{relative_path}' into table '{table_name}' "
259 |                 f"({df.height} rows, {df.width} columns)."
260 |             )
261 |             successfully_loaded_count += 1
262 | 
263 |         except Exception as e:
264 |             err_msg = (
265 |                 f"Unexpected error during ETL for '{relative_path}' "
266 |                 f"(target table '{table_name}'): {e}"
267 |             )
268 |             logger.error(err_msg, exc_info=True)
269 |             files_with_errors.append(f"{relative_path}: {e!s}")
270 |             # Continue to process other files even if one fails
271 | 
272 |     if files_with_errors:
273 |         logger.warning(
274 |             "ETL completed with errors during processing for "
275 |             f"{len(files_with_errors)} file(s):"
276 |         )
277 |         for detail in files_with_errors:
278 |             logger.warning(f"  - {detail}")
279 | 
280 |     # Strict success: all found files must be loaded without Polars/DB errors.
281 |     if successfully_loaded_count == len(csv_file_paths):
282 |         logger.info(
283 |             f"All {len(csv_file_paths)} CSV files successfully processed & loaded into "
284 |             f"{db_target_path}."
285 |         )
286 |         return True
287 |     elif successfully_loaded_count > 0:
288 |         logger.warning(
289 |             f"Partially completed ETL: Loaded {successfully_loaded_count} out of "
290 |             f"{len(csv_file_paths)} files. Some files encountered errors during "
291 |             "their individual processing and were not loaded."
292 |         )
293 |         return False
294 |     else:  # No files were successfully loaded
295 |         logger.error(
296 |             "ETL process failed: No CSV files were successfully loaded into the "
297 |             f"database from {csv_source_dir}."
298 |         )
299 |         return False
300 | 
301 | 
302 | def initialize_dataset(dataset_name: str, db_target_path: Path) -> bool:
303 |     """Initializes a dataset: downloads files and loads them into a database."""
304 |     dataset_config = get_dataset_config(dataset_name)
305 |     if not dataset_config:
306 |         logger.error(f"Configuration for dataset '{dataset_name}' not found.")
307 |         return False
308 | 
309 |     raw_files_root_dir = get_dataset_raw_files_path(dataset_name)
310 |     raw_files_root_dir.mkdir(parents=True, exist_ok=True)
311 | 
312 |     logger.info(f"Starting initialization for dataset: {dataset_name}")
313 |     download_ok = _download_dataset_files(
314 |         dataset_name, dataset_config, raw_files_root_dir
315 |     )
316 | 
317 |     if not download_ok:
318 |         logger.error(
319 |             f"Download phase failed for dataset '{dataset_name}'. ETL skipped."
320 |         )
321 |         return False
322 | 
323 |     logger.info(f"Download phase complete for '{dataset_name}'. Starting ETL phase.")
324 |     etl_ok = _etl_csv_collection_to_sqlite(raw_files_root_dir, db_target_path)
325 | 
326 |     if not etl_ok:
327 |         logger.error(f"ETL phase failed for dataset '{dataset_name}'.")
328 |         return False
329 | 
330 |     logger.info(
331 |         f"Dataset '{dataset_name}' successfully initialized. "
332 |         f"Database at: {db_target_path}"
333 |     )
334 |     return True
335 | 


--------------------------------------------------------------------------------
/src/m3/mcp_client_configs/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | MCP client configuration utilities.
3 | 
4 | This package contains scripts for configuring various MCP clients
5 | with the M3 server.
6 | """
7 | 


--------------------------------------------------------------------------------
/src/m3/mcp_client_configs/dynamic_mcp_config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Dynamic MCP Configuration Generator for M3 Server.
  3 | Generates MCP server configurations that can be copied and pasted into any MCP client.
  4 | """
  5 | 
  6 | import json
  7 | import os
  8 | import shutil
  9 | import sys
 10 | from pathlib import Path
 11 | from typing import Any
 12 | 
 13 | # Error messages
 14 | _DATABASE_PATH_ERROR_MSG = (
 15 |     "Could not determine default database path for mimic-iv-demo.\n"
 16 |     "Please run 'm3 init mimic-iv-demo' first."
 17 | )
 18 | 
 19 | 
 20 | class MCPConfigGenerator:
 21 |     """Generator for MCP server configurations."""
 22 | 
 23 |     def __init__(self):
 24 |         self.current_dir = Path(__file__).parent.parent.absolute()
 25 |         self.default_python = self._get_default_python()
 26 | 
 27 |     def _get_default_python(self) -> str:
 28 |         """Get the default Python executable path."""
 29 |         # Try to use the current virtual environment
 30 |         if "VIRTUAL_ENV" in os.environ:
 31 |             venv_python = Path(os.environ["VIRTUAL_ENV"]) / "bin" / "python"
 32 |             if venv_python.exists():
 33 |                 return str(venv_python)
 34 | 
 35 |         # Fall back to system python
 36 |         return shutil.which("python") or shutil.which("python3") or "python"
 37 | 
 38 |     def _validate_python_path(self, python_path: str) -> bool:
 39 |         """Validate that the Python path exists and is executable."""
 40 |         path = Path(python_path)
 41 |         return path.exists() and path.is_file() and os.access(path, os.X_OK)
 42 | 
 43 |     def _validate_directory(self, dir_path: str) -> bool:
 44 |         """Validate that the directory exists."""
 45 |         return Path(dir_path).exists() and Path(dir_path).is_dir()
 46 | 
 47 |     def generate_config(
 48 |         self,
 49 |         server_name: str = "m3",
 50 |         python_path: str | None = None,
 51 |         working_directory: str | None = None,
 52 |         backend: str = "sqlite",
 53 |         db_path: str | None = None,
 54 |         project_id: str | None = None,
 55 |         additional_env: dict[str, str] | None = None,
 56 |         module_name: str = "m3.mcp_server",
 57 |         oauth2_enabled: bool = False,
 58 |         oauth2_config: dict[str, str] | None = None,
 59 |     ) -> dict[str, Any]:
 60 |         """Generate MCP server configuration."""
 61 | 
 62 |         # Use defaults if not provided
 63 |         if python_path is None:
 64 |             python_path = self.default_python
 65 |         if working_directory is None:
 66 |             working_directory = str(self.current_dir)
 67 | 
 68 |         # Validate inputs
 69 |         if not self._validate_python_path(python_path):
 70 |             raise ValueError(f"Invalid Python path: {python_path}")
 71 |         if not self._validate_directory(working_directory):
 72 |             raise ValueError(f"Invalid working directory: {working_directory}")
 73 | 
 74 |         # Build environment variables
 75 |         env = {
 76 |             "PYTHONPATH": str(Path(working_directory) / "src"),
 77 |             "M3_BACKEND": backend,
 78 |         }
 79 | 
 80 |         # Add backend-specific environment variables
 81 |         if backend == "sqlite" and db_path:
 82 |             env["M3_DB_PATH"] = db_path
 83 |         elif backend == "bigquery" and project_id:
 84 |             env["M3_PROJECT_ID"] = project_id
 85 |             env["GOOGLE_CLOUD_PROJECT"] = project_id
 86 | 
 87 |         # Add OAuth2 configuration if enabled
 88 |         if oauth2_enabled and oauth2_config:
 89 |             env.update(
 90 |                 {
 91 |                     "M3_OAUTH2_ENABLED": "true",
 92 |                     "M3_OAUTH2_ISSUER_URL": oauth2_config.get("issuer_url", ""),
 93 |                     "M3_OAUTH2_AUDIENCE": oauth2_config.get("audience", ""),
 94 |                     "M3_OAUTH2_REQUIRED_SCOPES": oauth2_config.get(
 95 |                         "required_scopes", "read:mimic-data"
 96 |                     ),
 97 |                     "M3_OAUTH2_JWKS_URL": oauth2_config.get("jwks_url", ""),
 98 |                 }
 99 |             )
100 | 
101 |             # Optional OAuth2 settings
102 |             if oauth2_config.get("client_id"):
103 |                 env["M3_OAUTH2_CLIENT_ID"] = oauth2_config["client_id"]
104 |             if oauth2_config.get("rate_limit_requests"):
105 |                 env["M3_OAUTH2_RATE_LIMIT_REQUESTS"] = str(
106 |                     oauth2_config["rate_limit_requests"]
107 |                 )
108 | 
109 |         # Add any additional environment variables
110 |         if additional_env:
111 |             env.update(additional_env)
112 | 
113 |         # Create the configuration
114 |         config = {
115 |             "mcpServers": {
116 |                 server_name: {
117 |                     "command": python_path,
118 |                     "args": ["-m", module_name],
119 |                     "cwd": working_directory,
120 |                     "env": env,
121 |                 }
122 |             }
123 |         }
124 | 
125 |         return config
126 | 
127 |     def interactive_config(self) -> dict[str, Any]:
128 |         """Interactive configuration builder."""
129 |         print("🔧 M3 MCP Server Configuration Generator")
130 |         print("=" * 50)
131 | 
132 |         # Server name
133 |         print("\n🏷️  Server Configuration:")
134 |         print("The server name is how your MCP client will identify this server.")
135 |         server_name = (
136 |             input("Server name (press Enter for default 'm3'): ").strip() or "m3"
137 |         )
138 | 
139 |         # Python path
140 |         print(f"\nDefault Python path: {self.default_python}")
141 |         python_path = input(
142 |             "Python executable path (press Enter for default): "
143 |         ).strip()
144 |         if not python_path:
145 |             python_path = self.default_python
146 | 
147 |         # Working directory
148 |         print(f"\nDefault working directory: {self.current_dir}")
149 |         working_directory = input(
150 |             "Working directory (press Enter for default): "
151 |         ).strip()
152 |         if not working_directory:
153 |             working_directory = str(self.current_dir)
154 | 
155 |         # Backend selection - simplified
156 |         print("\nChoose backend:")
157 |         print("1. SQLite (local database)")
158 |         print("2. BigQuery (Google Cloud)")
159 | 
160 |         while True:
161 |             backend_choice = input("Choose backend [1]: ").strip() or "1"
162 |             if backend_choice in ["1", "2"]:
163 |                 break
164 |             print("Please enter 1 or 2")
165 | 
166 |         backend = "sqlite" if backend_choice == "1" else "bigquery"
167 | 
168 |         # Backend-specific configuration
169 |         db_path = None
170 |         project_id = None
171 | 
172 |         if backend == "sqlite":
173 |             print("\n📁 SQLite Configuration:")
174 |             from m3.config import get_default_database_path
175 | 
176 |             default_db_path = get_default_database_path("mimic-iv-demo")
177 |             if default_db_path is None:
178 |                 raise ValueError(_DATABASE_PATH_ERROR_MSG)
179 |             print(f"Default database path: {default_db_path}")
180 | 
181 |             db_path = (
182 |                 input(
183 |                     "SQLite database path (optional, press Enter to use default): "
184 |                 ).strip()
185 |                 or None
186 |             )
187 | 
188 |         elif backend == "bigquery":
189 |             print("\n☁️  BigQuery Configuration:")
190 |             project_id = None
191 |             while not project_id:
192 |                 project_id = input(
193 |                     "Google Cloud project ID (required for BigQuery): "
194 |                 ).strip()
195 |                 if not project_id:
196 |                     print(
197 |                         "❌ Project ID is required when using BigQuery backend. Please enter your GCP project ID."
198 |                     )
199 |             print(f"✅ Will use project: {project_id}")
200 | 
201 |         # OAuth2 Configuration
202 |         oauth2_enabled = False
203 |         oauth2_config = None
204 | 
205 |         print("\n🔐 OAuth2 Authentication (optional):")
206 |         enable_oauth2 = input("Enable OAuth2 authentication? [y/N]: ").strip().lower()
207 | 
208 |         if enable_oauth2 in ["y", "yes"]:
209 |             oauth2_enabled = True
210 |             oauth2_config = {}
211 | 
212 |             print("\nOAuth2 Configuration:")
213 |             oauth2_config["issuer_url"] = input(
214 |                 "OAuth2 Issuer URL (e.g., https://auth.example.com): "
215 |             ).strip()
216 |             oauth2_config["audience"] = input(
217 |                 "OAuth2 Audience (e.g., m3-api): "
218 |             ).strip()
219 |             oauth2_config["required_scopes"] = (
220 |                 input("Required Scopes [read:mimic-data]: ").strip()
221 |                 or "read:mimic-data"
222 |             )
223 | 
224 |             # Optional settings
225 |             jwks_url = input("JWKS URL (optional, auto-discovered if empty): ").strip()
226 |             if jwks_url:
227 |                 oauth2_config["jwks_url"] = jwks_url
228 | 
229 |             rate_limit = input("Rate limit (requests per hour) [100]: ").strip()
230 |             if rate_limit and rate_limit.isdigit():
231 |                 oauth2_config["rate_limit_requests"] = rate_limit
232 | 
233 |             print("✅ OAuth2 configuration added")
234 | 
235 |         # Additional environment variables
236 |         additional_env = {}
237 |         print("\n🌍 Additional environment variables (optional):")
238 |         print(
239 |             "Enter key=value pairs, one per line. Press Enter on empty line to finish."
240 |         )
241 |         while True:
242 |             env_var = input("Environment variable: ").strip()
243 |             if not env_var:
244 |                 break
245 |             if "=" in env_var:
246 |                 key, value = env_var.split("=", 1)
247 |                 additional_env[key.strip()] = value.strip()
248 |                 print(f"✅ Added: {key.strip()}={value.strip()}")
249 |             else:
250 |                 print("❌ Invalid format. Use key=value")
251 | 
252 |         return self.generate_config(
253 |             server_name=server_name,
254 |             python_path=python_path,
255 |             working_directory=working_directory,
256 |             backend=backend,
257 |             db_path=db_path,
258 |             project_id=project_id,
259 |             additional_env=additional_env if additional_env else None,
260 |             module_name="m3.mcp_server",
261 |             oauth2_enabled=oauth2_enabled,
262 |             oauth2_config=oauth2_config,
263 |         )
264 | 
265 | 
266 | def print_config_info(config: dict[str, Any]):
267 |     """Print configuration information."""
268 |     # Get the first (and likely only) server configuration
269 |     server_name = next(iter(config["mcpServers"].keys()))
270 |     server_config = config["mcpServers"][server_name]
271 | 
272 |     print("\n📋 Configuration Summary:")
273 |     print("=" * 30)
274 |     print(f"🏷️  Server name: {server_name}")
275 |     print(f"🐍 Python path: {server_config['command']}")
276 |     print(f"📁 Working directory: {server_config['cwd']}")
277 |     print(f"🔧 Backend: {server_config['env'].get('M3_BACKEND', 'unknown')}")
278 | 
279 |     if "M3_DB_PATH" in server_config["env"]:
280 |         print(f"💾 Database path: {server_config['env']['M3_DB_PATH']}")
281 |     elif server_config["env"].get("M3_BACKEND") == "sqlite":
282 |         # Show the default path when using SQLite backend
283 |         from m3.config import get_default_database_path
284 | 
285 |         default_path = get_default_database_path("mimic-iv-demo")
286 |         if default_path is None:
287 |             raise ValueError(_DATABASE_PATH_ERROR_MSG)
288 |         print(f"💾 Database path: {default_path}")
289 | 
290 |     if "M3_PROJECT_ID" in server_config["env"]:
291 |         print(f"☁️  Project ID: {server_config['env']['M3_PROJECT_ID']}")
292 | 
293 |     # Show additional env vars
294 |     additional_env = {
295 |         k: v
296 |         for k, v in server_config["env"].items()
297 |         if k
298 |         not in [
299 |             "PYTHONPATH",
300 |             "M3_BACKEND",
301 |             "M3_DB_PATH",
302 |             "M3_PROJECT_ID",
303 |             "GOOGLE_CLOUD_PROJECT",
304 |         ]
305 |     }
306 |     if additional_env:
307 |         print("🌍 Additional environment variables:")
308 |         for key, value in additional_env.items():
309 |             print(f"   {key}: {value}")
310 | 
311 | 
312 | def main():
313 |     """Main function."""
314 |     import argparse
315 | 
316 |     parser = argparse.ArgumentParser(
317 |         description="Generate MCP server configuration for M3",
318 |         formatter_class=argparse.RawDescriptionHelpFormatter,
319 |         epilog="""
320 | Examples:
321 |   # Interactive mode
322 |   python dynamic_mcp_config.py
323 | 
324 |   # Quick generation with defaults
325 |   python dynamic_mcp_config.py --quick
326 | 
327 |   # Custom configuration
328 |   python dynamic_mcp_config.py --python-path /usr/bin/python3 --backend bigquery --project-id my-project
329 | 
330 |   # Save to file
331 |   python dynamic_mcp_config.py --output config.json
332 |         """,
333 |     )
334 | 
335 |     parser.add_argument(
336 |         "--quick",
337 |         action="store_true",
338 |         help="Generate configuration with defaults (non-interactive)",
339 |     )
340 |     parser.add_argument(
341 |         "--server-name", default="m3", help="Name for the MCP server (default: m3)"
342 |     )
343 |     parser.add_argument("--python-path", help="Path to Python executable")
344 |     parser.add_argument("--working-directory", help="Working directory for the server")
345 |     parser.add_argument(
346 |         "--backend",
347 |         choices=["sqlite", "bigquery"],
348 |         default="sqlite",
349 |         help="Backend to use (default: sqlite)",
350 |     )
351 |     parser.add_argument(
352 |         "--db-path", help="Path to SQLite database (for sqlite backend)"
353 |     )
354 |     parser.add_argument(
355 |         "--project-id", help="Google Cloud project ID (for bigquery backend)"
356 |     )
357 |     parser.add_argument(
358 |         "--env",
359 |         action="append",
360 |         help="Additional environment variables (format: KEY=VALUE)",
361 |     )
362 |     parser.add_argument(
363 |         "--output", "-o", help="Save configuration to file instead of printing"
364 |     )
365 |     parser.add_argument(
366 |         "--pretty",
367 |         action="store_true",
368 |         default=True,
369 |         help="Pretty print JSON (default: True)",
370 |     )
371 | 
372 |     args = parser.parse_args()
373 | 
374 |     # Validate backend-specific arguments
375 |     if args.backend == "sqlite" and args.project_id:
376 |         print(
377 |             "❌ Error: --project-id can only be used with --backend bigquery",
378 |             file=sys.stderr,
379 |         )
380 |         sys.exit(1)
381 | 
382 |     if args.backend == "bigquery" and args.db_path:
383 |         print(
384 |             "❌ Error: --db-path can only be used with --backend sqlite",
385 |             file=sys.stderr,
386 |         )
387 |         sys.exit(1)
388 | 
389 |     # Require project_id for BigQuery backend
390 |     if args.backend == "bigquery" and not args.project_id:
391 |         print(
392 |             "❌ Error: --project-id is required when using --backend bigquery",
393 |             file=sys.stderr,
394 |         )
395 |         sys.exit(1)
396 | 
397 |     generator = MCPConfigGenerator()
398 | 
399 |     try:
400 |         if args.quick:
401 |             # Quick mode with command line arguments
402 |             additional_env = {}
403 |             if args.env:
404 |                 for env_var in args.env:
405 |                     if "=" in env_var:
406 |                         key, value = env_var.split("=", 1)
407 |                         additional_env[key.strip()] = value.strip()
408 | 
409 |             config = generator.generate_config(
410 |                 server_name=args.server_name,
411 |                 python_path=args.python_path,
412 |                 working_directory=args.working_directory,
413 |                 backend=args.backend,
414 |                 db_path=args.db_path,
415 |                 project_id=args.project_id,
416 |                 additional_env=additional_env if additional_env else None,
417 |                 module_name="m3.mcp_server",
418 |             )
419 |         else:
420 |             # Interactive mode
421 |             config = generator.interactive_config()
422 | 
423 |         # Print configuration info
424 |         print_config_info(config)
425 | 
426 |         # Output the configuration
427 |         json_output = json.dumps(config, indent=2 if args.pretty else None)
428 | 
429 |         if args.output:
430 |             # Save to file
431 |             with open(args.output, "w") as f:
432 |                 f.write(json_output)
433 |             print(f"\n💾 Configuration saved to: {args.output}")
434 |         else:
435 |             # Print to terminal
436 |             print("\n📋 MCP Configuration (copy and paste this into your MCP client):")
437 |             print("=" * 70)
438 |             print(json_output)
439 |             print("=" * 70)
440 |             print(
441 |                 "\n💡 Copy the JSON above and paste it into your MCP client configuration."
442 |             )
443 | 
444 |     except Exception as e:
445 |         print(f"❌ Error: {e}", file=sys.stderr)
446 |         sys.exit(1)
447 | 
448 | 
449 | if __name__ == "__main__":
450 |     main()
451 | 


--------------------------------------------------------------------------------
/src/m3/mcp_client_configs/setup_claude_desktop.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Setup script for M3 MCP Server with Claude Desktop.
  3 | Automatically configures Claude Desktop to use the M3 MCP server.
  4 | """
  5 | 
  6 | import json
  7 | import os
  8 | import shutil
  9 | from pathlib import Path
 10 | 
 11 | 
 12 | def get_claude_config_path():
 13 |     """Get the Claude Desktop configuration file path."""
 14 |     home = Path.home()
 15 | 
 16 |     # macOS path
 17 |     claude_config = (
 18 |         home
 19 |         / "Library"
 20 |         / "Application Support"
 21 |         / "Claude"
 22 |         / "claude_desktop_config.json"
 23 |     )
 24 |     if claude_config.parent.exists():
 25 |         return claude_config
 26 | 
 27 |     # Windows path
 28 |     claude_config = (
 29 |         home / "AppData" / "Roaming" / "Claude" / "claude_desktop_config.json"
 30 |     )
 31 |     if claude_config.parent.exists():
 32 |         return claude_config
 33 | 
 34 |     # Linux path
 35 |     claude_config = home / ".config" / "Claude" / "claude_desktop_config.json"
 36 |     if claude_config.parent.exists():
 37 |         return claude_config
 38 | 
 39 |     raise FileNotFoundError("Could not find Claude Desktop configuration directory")
 40 | 
 41 | 
 42 | def get_current_directory():
 43 |     """Get the current M3 project directory."""
 44 |     return Path(__file__).parent.parent.absolute()
 45 | 
 46 | 
 47 | def get_python_path():
 48 |     """Get the Python executable path."""
 49 |     # Try to use the current virtual environment
 50 |     if "VIRTUAL_ENV" in os.environ:
 51 |         venv_python = Path(os.environ["VIRTUAL_ENV"]) / "bin" / "python"
 52 |         if venv_python.exists():
 53 |             return str(venv_python)
 54 | 
 55 |     # Fall back to system python
 56 |     return shutil.which("python") or shutil.which("python3") or "python"
 57 | 
 58 | 
 59 | def create_mcp_config(
 60 |     backend="sqlite",
 61 |     db_path=None,
 62 |     project_id=None,
 63 |     oauth2_enabled=False,
 64 |     oauth2_config=None,
 65 | ):
 66 |     """Create MCP server configuration."""
 67 |     current_dir = get_current_directory()
 68 |     python_path = get_python_path()
 69 | 
 70 |     config = {
 71 |         "mcpServers": {
 72 |             "m3": {
 73 |                 "command": python_path,
 74 |                 "args": ["-m", "m3.mcp_server"],
 75 |                 "cwd": str(current_dir),
 76 |                 "env": {"PYTHONPATH": str(current_dir / "src"), "M3_BACKEND": backend},
 77 |             }
 78 |         }
 79 |     }
 80 | 
 81 |     # Add backend-specific environment variables
 82 |     if backend == "sqlite" and db_path:
 83 |         config["mcpServers"]["m3"]["env"]["M3_DB_PATH"] = db_path
 84 |     elif backend == "bigquery" and project_id:
 85 |         config["mcpServers"]["m3"]["env"]["M3_PROJECT_ID"] = project_id
 86 |         config["mcpServers"]["m3"]["env"]["GOOGLE_CLOUD_PROJECT"] = project_id
 87 | 
 88 |     # Add OAuth2 configuration if enabled
 89 |     if oauth2_enabled and oauth2_config:
 90 |         config["mcpServers"]["m3"]["env"].update(
 91 |             {
 92 |                 "M3_OAUTH2_ENABLED": "true",
 93 |                 "M3_OAUTH2_ISSUER_URL": oauth2_config.get("issuer_url", ""),
 94 |                 "M3_OAUTH2_AUDIENCE": oauth2_config.get("audience", ""),
 95 |                 "M3_OAUTH2_REQUIRED_SCOPES": oauth2_config.get(
 96 |                     "required_scopes", "read:mimic-data"
 97 |                 ),
 98 |                 "M3_OAUTH2_JWKS_URL": oauth2_config.get("jwks_url", ""),
 99 |             }
100 |         )
101 | 
102 |         # Optional OAuth2 settings
103 |         if oauth2_config.get("client_id"):
104 |             config["mcpServers"]["m3"]["env"]["M3_OAUTH2_CLIENT_ID"] = oauth2_config[
105 |                 "client_id"
106 |             ]
107 |         if oauth2_config.get("rate_limit_requests"):
108 |             config["mcpServers"]["m3"]["env"]["M3_OAUTH2_RATE_LIMIT_REQUESTS"] = str(
109 |                 oauth2_config["rate_limit_requests"]
110 |             )
111 | 
112 |     return config
113 | 
114 | 
115 | def setup_claude_desktop(
116 |     backend="sqlite",
117 |     db_path=None,
118 |     project_id=None,
119 |     oauth2_enabled=False,
120 |     oauth2_config=None,
121 | ):
122 |     """Setup Claude Desktop with M3 MCP server."""
123 |     try:
124 |         claude_config_path = get_claude_config_path()
125 |         print(f"Found Claude Desktop config at: {claude_config_path}")
126 | 
127 |         # Load existing config or create new one
128 |         existing_config = {}
129 |         if claude_config_path.exists() and claude_config_path.stat().st_size > 0:
130 |             try:
131 |                 with open(claude_config_path) as f:
132 |                     existing_config = json.load(f)
133 |                 print("Loaded existing Claude Desktop configuration")
134 |             except json.JSONDecodeError:
135 |                 print("Found corrupted config file, creating new configuration")
136 |                 existing_config = {}
137 |         else:
138 |             print("Creating new Claude Desktop configuration")
139 | 
140 |         # Create MCP config
141 |         mcp_config = create_mcp_config(
142 |             backend, db_path, project_id, oauth2_enabled, oauth2_config
143 |         )
144 | 
145 |         # Merge configurations
146 |         if "mcpServers" not in existing_config:
147 |             existing_config["mcpServers"] = {}
148 | 
149 |         existing_config["mcpServers"].update(mcp_config["mcpServers"])
150 | 
151 |         # Ensure directory exists
152 |         claude_config_path.parent.mkdir(parents=True, exist_ok=True)
153 | 
154 |         # Write updated config
155 |         with open(claude_config_path, "w") as f:
156 |             json.dump(existing_config, f, indent=2)
157 | 
158 |         print("✅ Successfully configured Claude Desktop!")
159 |         print(f"📁 Config file: {claude_config_path}")
160 |         print(f"🔧 Backend: {backend}")
161 | 
162 |         if backend == "sqlite":
163 |             db_path_display = db_path or "default (m3_data/databases/mimic_iv_demo.db)"
164 |             print(f"💾 Database: {db_path_display}")
165 |         elif backend == "bigquery":
166 |             project_display = project_id or "physionet-data"
167 |             print(f"☁️  Project: {project_display}")
168 | 
169 |         if oauth2_enabled:
170 |             print("🔐 OAuth2 Authentication: Enabled")
171 |             if oauth2_config:
172 |                 print(f"🔗 Issuer: {oauth2_config.get('issuer_url', 'Not configured')}")
173 |                 print(f"👥 Audience: {oauth2_config.get('audience', 'Not configured')}")
174 |                 print(
175 |                     f"🔑 Required Scopes: {oauth2_config.get('required_scopes', 'read:mimic-data')}"
176 |                 )
177 |             print("\n⚠️  Security Notice:")
178 |             print("   - OAuth2 authentication is now required for all API calls")
179 |             print("   - Ensure you have a valid access token with the required scopes")
180 |             print(
181 |                 "   - Set M3_OAUTH2_TOKEN environment variable with your Bearer token"
182 |             )
183 |         else:
184 |             print("🔓 OAuth2 Authentication: Disabled")
185 | 
186 |         print("\n🔄 Please restart Claude Desktop to apply changes")
187 | 
188 |         return True
189 | 
190 |     except Exception as e:
191 |         print(f"❌ Error setting up Claude Desktop: {e}")
192 |         return False
193 | 
194 | 
195 | def main():
196 |     """Main setup function."""
197 |     import argparse
198 | 
199 |     parser = argparse.ArgumentParser(
200 |         description="Setup M3 MCP Server with Claude Desktop"
201 |     )
202 |     parser.add_argument(
203 |         "--backend",
204 |         choices=["sqlite", "bigquery"],
205 |         default="sqlite",
206 |         help="Backend to use (default: sqlite)",
207 |     )
208 |     parser.add_argument(
209 |         "--db-path", help="Path to SQLite database (for sqlite backend)"
210 |     )
211 |     parser.add_argument(
212 |         "--project-id", help="Google Cloud project ID (for bigquery backend)"
213 |     )
214 |     parser.add_argument(
215 |         "--enable-oauth2", action="store_true", help="Enable OAuth2 authentication"
216 |     )
217 |     parser.add_argument(
218 |         "--oauth2-issuer", help="OAuth2 issuer URL (e.g., https://auth.example.com)"
219 |     )
220 |     parser.add_argument("--oauth2-audience", help="OAuth2 audience (e.g., m3-api)")
221 |     parser.add_argument(
222 |         "--oauth2-scopes",
223 |         default="read:mimic-data",
224 |         help="Required OAuth2 scopes (comma-separated)",
225 |     )
226 | 
227 |     args = parser.parse_args()
228 | 
229 |     # Validate backend-specific arguments
230 |     if args.backend == "sqlite" and args.project_id:
231 |         print("❌ Error: --project-id can only be used with --backend bigquery")
232 |         exit(1)
233 | 
234 |     if args.backend == "bigquery" and args.db_path:
235 |         print("❌ Error: --db-path can only be used with --backend sqlite")
236 |         exit(1)
237 | 
238 |     # Require project_id for BigQuery backend
239 |     if args.backend == "bigquery" and not args.project_id:
240 |         print("❌ Error: --project-id is required when using --backend bigquery")
241 |         exit(1)
242 | 
243 |     print("🚀 Setting up M3 MCP Server with Claude Desktop...")
244 |     print(f"📊 Backend: {args.backend}")
245 | 
246 |     # Prepare OAuth2 configuration if enabled
247 |     oauth2_config = None
248 |     if args.enable_oauth2:
249 |         if not args.oauth2_issuer or not args.oauth2_audience:
250 |             print(
251 |                 "❌ Error: --oauth2-issuer and --oauth2-audience are required when --enable-oauth2 is used"
252 |             )
253 |             exit(1)
254 | 
255 |         oauth2_config = {
256 |             "issuer_url": args.oauth2_issuer,
257 |             "audience": args.oauth2_audience,
258 |             "required_scopes": args.oauth2_scopes,
259 |         }
260 | 
261 |     success = setup_claude_desktop(
262 |         backend=args.backend,
263 |         db_path=args.db_path,
264 |         project_id=args.project_id,
265 |         oauth2_enabled=args.enable_oauth2,
266 |         oauth2_config=oauth2_config,
267 |     )
268 | 
269 |     if success:
270 |         print("\n🎉 Setup complete! You can now use M3 tools in Claude Desktop.")
271 |         print(
272 |             "\n💡 Try asking Claude: 'What tools do you have available for MIMIC-IV data?'"
273 |         )
274 |     else:
275 |         print("\n💥 Setup failed. Please check the error messages above.")
276 |         exit(1)
277 | 
278 | 
279 | if __name__ == "__main__":
280 |     main()
281 | 


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
  1 | import subprocess
  2 | import tempfile
  3 | from pathlib import Path
  4 | from unittest.mock import MagicMock, patch
  5 | 
  6 | import pytest
  7 | from typer.testing import CliRunner
  8 | 
  9 | import m3.cli as cli_module
 10 | from m3.cli import app
 11 | 
 12 | runner = CliRunner()
 13 | 
 14 | 
 15 | @pytest.fixture(autouse=True)
 16 | def inject_version(monkeypatch):
 17 |     monkeypatch.setattr(cli_module, "__version__", "0.0.1")
 18 | 
 19 | 
 20 | def test_help_shows_app_name():
 21 |     result = runner.invoke(app, ["--help"])
 22 |     assert result.exit_code == 0
 23 |     assert "M3 CLI" in result.stdout
 24 | 
 25 | 
 26 | def test_version_option_exits_zero_and_shows_version():
 27 |     result = runner.invoke(app, ["--version"])
 28 |     assert result.exit_code == 0
 29 |     assert "M3 CLI Version: 0.0.1" in result.stdout
 30 | 
 31 | 
 32 | def test_unknown_command_reports_error():
 33 |     result = runner.invoke(app, ["not-a-cmd"])
 34 |     assert result.exit_code != 0
 35 |     # Check both stdout and stderr since error messages might go to either depending on environment
 36 |     error_message = "No such command 'not-a-cmd'"
 37 |     assert (
 38 |         error_message in result.stdout
 39 |         or (hasattr(result, "stderr") and error_message in result.stderr)
 40 |         or error_message in result.output
 41 |     )
 42 | 
 43 | 
 44 | @patch("m3.cli.initialize_dataset")
 45 | @patch("sqlite3.connect")
 46 | def test_init_command_respects_custom_db_path(
 47 |     mock_sqlite_connect, mock_initialize_dataset
 48 | ):
 49 |     """Test that m3 init --db-path correctly uses custom database path override."""
 50 |     # Setup mocks
 51 |     mock_initialize_dataset.return_value = True
 52 | 
 53 |     # Mock sqlite connection and cursor for verification query
 54 |     mock_cursor = mock_sqlite_connect.return_value.cursor.return_value
 55 |     mock_cursor.fetchone.return_value = (100,)  # Mock row count result
 56 | 
 57 |     with tempfile.TemporaryDirectory() as temp_dir:
 58 |         custom_db_path = Path(temp_dir) / "custom_mimic.db"
 59 |         # Resolve the path to handle symlinks (like /var -> /private/var on macOS)
 60 |         resolved_custom_db_path = custom_db_path.resolve()
 61 | 
 62 |         # Run the init command with custom db path
 63 |         result = runner.invoke(
 64 |             app, ["init", "mimic-iv-demo", "--db-path", str(custom_db_path)]
 65 |         )
 66 | 
 67 |         # Assert command succeeded
 68 |         assert result.exit_code == 0
 69 | 
 70 |         # Verify the output mentions the custom path (either original or resolved form)
 71 |         assert (
 72 |             str(custom_db_path) in result.stdout
 73 |             or str(resolved_custom_db_path) in result.stdout
 74 |         )
 75 |         assert "Target database path:" in result.stdout
 76 | 
 77 |         # Verify initialize_dataset was called with the resolved custom path
 78 |         mock_initialize_dataset.assert_called_once_with(
 79 |             dataset_name="mimic-iv-demo", db_target_path=resolved_custom_db_path
 80 |         )
 81 | 
 82 |         # Verify sqlite connection was attempted with the resolved custom path
 83 |         mock_sqlite_connect.assert_called_with(resolved_custom_db_path)
 84 | 
 85 | 
 86 | def test_config_validation_sqlite_with_project_id():
 87 |     """Test that sqlite backend rejects project-id parameter."""
 88 |     result = runner.invoke(
 89 |         app, ["config", "claude", "--backend", "sqlite", "--project-id", "test"]
 90 |     )
 91 |     assert result.exit_code == 1
 92 |     # Check output - error messages from typer usually go to stdout
 93 |     assert "project-id can only be used with --backend bigquery" in result.output
 94 | 
 95 | 
 96 | def test_config_validation_bigquery_with_db_path():
 97 |     """Test that bigquery backend rejects db-path parameter."""
 98 |     result = runner.invoke(
 99 |         app, ["config", "claude", "--backend", "bigquery", "--db-path", "/test/path"]
100 |     )
101 |     assert result.exit_code == 1
102 |     # Check output - error messages from typer usually go to stdout
103 |     assert "db-path can only be used with --backend sqlite" in result.output
104 | 
105 | 
106 | def test_config_validation_bigquery_requires_project_id():
107 |     """Test that bigquery backend requires project-id parameter."""
108 |     result = runner.invoke(app, ["config", "claude", "--backend", "bigquery"])
109 |     assert result.exit_code == 1
110 |     # Check output - error messages from typer usually go to stdout
111 |     assert "project-id is required when using --backend bigquery" in result.output
112 | 
113 | 
114 | @patch("subprocess.run")
115 | def test_config_claude_success(mock_subprocess):
116 |     """Test successful Claude Desktop configuration."""
117 |     mock_subprocess.return_value = MagicMock(returncode=0)
118 | 
119 |     result = runner.invoke(app, ["config", "claude"])
120 |     assert result.exit_code == 0
121 |     assert "Claude Desktop configuration completed" in result.stdout
122 | 
123 |     # Verify subprocess was called with correct script
124 |     mock_subprocess.assert_called_once()
125 |     call_args = mock_subprocess.call_args[0][0]
126 |     assert "setup_claude_desktop.py" in call_args[1]  # Script path is second argument
127 | 
128 | 
129 | @patch("subprocess.run")
130 | def test_config_universal_quick_mode(mock_subprocess):
131 |     """Test universal config generator in quick mode."""
132 |     mock_subprocess.return_value = MagicMock(returncode=0)
133 | 
134 |     result = runner.invoke(app, ["config", "--quick"])
135 |     assert result.exit_code == 0
136 |     assert "Generating M3 MCP configuration" in result.stdout
137 | 
138 |     # Verify subprocess was called with dynamic config script
139 |     mock_subprocess.assert_called_once()
140 |     call_args = mock_subprocess.call_args[0][0]
141 |     assert "dynamic_mcp_config.py" in call_args[1]  # Script path is second argument
142 |     assert "--quick" in call_args
143 | 
144 | 
145 | @patch("subprocess.run")
146 | def test_config_script_failure(mock_subprocess):
147 |     """Test error handling when config script fails."""
148 |     mock_subprocess.side_effect = subprocess.CalledProcessError(1, "cmd")
149 | 
150 |     result = runner.invoke(app, ["config", "claude"])
151 |     assert result.exit_code == 1
152 |     # Just verify that the command failed with the right exit code
153 |     # The specific error message may vary
154 | 


--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from m3.config import (
 4 |     get_dataset_config,
 5 |     get_dataset_raw_files_path,
 6 |     get_default_database_path,
 7 | )
 8 | 
 9 | 
10 | def test_get_dataset_config_known():
11 |     cfg = get_dataset_config("mimic-iv-demo")
12 |     assert isinstance(cfg, dict)
13 |     assert cfg.get("default_db_filename") == "mimic_iv_demo.db"
14 | 
15 | 
16 | def test_get_dataset_config_unknown():
17 |     assert get_dataset_config("not-a-dataset") is None
18 | 
19 | 
20 | def test_default_paths(tmp_path, monkeypatch):
21 |     # Redirect default dirs to a temp location
22 |     import m3.config as cfg_mod
23 | 
24 |     monkeypatch.setattr(cfg_mod, "DEFAULT_DATABASES_DIR", tmp_path / "dbs")
25 |     monkeypatch.setattr(cfg_mod, "DEFAULT_RAW_FILES_DIR", tmp_path / "raw")
26 |     db_path = get_default_database_path("mimic-iv-demo")
27 |     raw_path = get_dataset_raw_files_path("mimic-iv-demo")
28 |     # They should be Path objects and exist
29 |     assert isinstance(db_path, Path)
30 |     assert db_path.parent.exists()
31 |     assert isinstance(raw_path, Path)
32 |     assert raw_path.exists()
33 | 
34 | 
35 | def test_raw_path_includes_dataset_name(tmp_path, monkeypatch):
36 |     import m3.config as cfg_mod
37 | 
38 |     monkeypatch.setattr(cfg_mod, "DEFAULT_RAW_FILES_DIR", tmp_path / "raw")
39 |     raw_path = get_dataset_raw_files_path("mimic-iv-demo")
40 |     assert "mimic-iv-demo" in str(raw_path)
41 | 


--------------------------------------------------------------------------------
/tests/test_config_scripts.py:
--------------------------------------------------------------------------------
  1 | """Tests for MCP configuration scripts."""
  2 | 
  3 | import sys
  4 | from pathlib import Path
  5 | from unittest.mock import patch
  6 | 
  7 | import pytest
  8 | 
  9 | sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
 10 | 
 11 | from m3.mcp_client_configs.dynamic_mcp_config import MCPConfigGenerator
 12 | 
 13 | 
 14 | class TestMCPConfigGenerator:
 15 |     """Test the MCPConfigGenerator class."""
 16 | 
 17 |     def test_generate_config_sqlite_default(self):
 18 |         """Test generating SQLite config with defaults."""
 19 |         generator = MCPConfigGenerator()
 20 | 
 21 |         with (
 22 |             patch.object(generator, "_validate_python_path", return_value=True),
 23 |             patch.object(generator, "_validate_directory", return_value=True),
 24 |         ):
 25 |             config = generator.generate_config()
 26 | 
 27 |             assert config["mcpServers"]["m3"]["env"]["M3_BACKEND"] == "sqlite"
 28 |             assert "M3_PROJECT_ID" not in config["mcpServers"]["m3"]["env"]
 29 |             assert config["mcpServers"]["m3"]["args"] == ["-m", "m3.mcp_server"]
 30 | 
 31 |     def test_generate_config_bigquery_with_project(self):
 32 |         """Test generating BigQuery config with project ID."""
 33 |         generator = MCPConfigGenerator()
 34 | 
 35 |         with (
 36 |             patch.object(generator, "_validate_python_path", return_value=True),
 37 |             patch.object(generator, "_validate_directory", return_value=True),
 38 |         ):
 39 |             config = generator.generate_config(
 40 |                 backend="bigquery", project_id="test-project"
 41 |             )
 42 | 
 43 |             assert config["mcpServers"]["m3"]["env"]["M3_BACKEND"] == "bigquery"
 44 |             assert config["mcpServers"]["m3"]["env"]["M3_PROJECT_ID"] == "test-project"
 45 |             assert (
 46 |                 config["mcpServers"]["m3"]["env"]["GOOGLE_CLOUD_PROJECT"]
 47 |                 == "test-project"
 48 |             )
 49 | 
 50 |     def test_generate_config_sqlite_with_db_path(self):
 51 |         """Test generating SQLite config with custom database path."""
 52 |         generator = MCPConfigGenerator()
 53 | 
 54 |         with (
 55 |             patch.object(generator, "_validate_python_path", return_value=True),
 56 |             patch.object(generator, "_validate_directory", return_value=True),
 57 |         ):
 58 |             config = generator.generate_config(
 59 |                 backend="sqlite", db_path="/custom/path/database.db"
 60 |             )
 61 | 
 62 |             assert config["mcpServers"]["m3"]["env"]["M3_BACKEND"] == "sqlite"
 63 |             assert (
 64 |                 config["mcpServers"]["m3"]["env"]["M3_DB_PATH"]
 65 |                 == "/custom/path/database.db"
 66 |             )
 67 | 
 68 |     def test_generate_config_custom_server_name(self):
 69 |         """Test generating config with custom server name."""
 70 |         generator = MCPConfigGenerator()
 71 | 
 72 |         with (
 73 |             patch.object(generator, "_validate_python_path", return_value=True),
 74 |             patch.object(generator, "_validate_directory", return_value=True),
 75 |         ):
 76 |             config = generator.generate_config(server_name="custom-m3")
 77 | 
 78 |             assert "custom-m3" in config["mcpServers"]
 79 |             assert "m3" not in config["mcpServers"]
 80 | 
 81 |     def test_generate_config_additional_env_vars(self):
 82 |         """Test generating config with additional environment variables."""
 83 |         generator = MCPConfigGenerator()
 84 | 
 85 |         with (
 86 |             patch.object(generator, "_validate_python_path", return_value=True),
 87 |             patch.object(generator, "_validate_directory", return_value=True),
 88 |         ):
 89 |             config = generator.generate_config(
 90 |                 additional_env={"DEBUG": "true", "LOG_LEVEL": "info"}
 91 |             )
 92 | 
 93 |             env = config["mcpServers"]["m3"]["env"]
 94 |             assert env["DEBUG"] == "true"
 95 |             assert env["LOG_LEVEL"] == "info"
 96 |             assert env["M3_BACKEND"] == "sqlite"  # Default should still be there
 97 | 
 98 |     def test_validation_invalid_python_path(self):
 99 |         """Test that invalid Python path raises error."""
100 |         generator = MCPConfigGenerator()
101 | 
102 |         with (
103 |             patch.object(generator, "_validate_python_path", return_value=False),
104 |             patch.object(generator, "_validate_directory", return_value=True),
105 |         ):
106 |             with pytest.raises(ValueError, match="Invalid Python path"):
107 |                 generator.generate_config(python_path="/invalid/python")
108 | 
109 |     def test_validation_invalid_directory(self):
110 |         """Test that invalid working directory raises error."""
111 |         generator = MCPConfigGenerator()
112 | 
113 |         with (
114 |             patch.object(generator, "_validate_python_path", return_value=True),
115 |             patch.object(generator, "_validate_directory", return_value=False),
116 |         ):
117 |             with pytest.raises(ValueError, match="Invalid working directory"):
118 |                 generator.generate_config(working_directory="/invalid/dir")
119 | 


--------------------------------------------------------------------------------
/tests/test_data_io.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | 
 3 | from m3.data_io import COMMON_USER_AGENT, _scrape_urls_from_html_page
 4 | 
 5 | 
 6 | class DummyResponse:
 7 |     def __init__(self, content, status_code=200, headers=None):
 8 |         self.content = content.encode()
 9 |         self.status_code = status_code
10 |         self.headers = headers or {}
11 | 
12 |     def raise_for_status(self):
13 |         if not (200 <= self.status_code < 300):
14 |             raise requests.exceptions.HTTPError(response=self)
15 | 
16 |     @property
17 |     def reason(self):
18 |         return "Error"
19 | 
20 |     def iter_content(self, chunk_size=1):
21 |         yield from self.content
22 | 
23 | 
24 | def test_scrape_urls(monkeypatch):
25 |     html = (
26 |         "<html><body>"
27 |         '<a href="file1.csv.gz">ok</a>'
28 |         '<a href="skip.txt">no</a>'
29 |         "</body></html>"
30 |     )
31 |     dummy = DummyResponse(html)
32 |     session = requests.Session()
33 |     monkeypatch.setattr(session, "get", lambda url, timeout=None: dummy)
34 |     urls = _scrape_urls_from_html_page("http://example.com/", session)
35 |     assert urls == ["http://example.com/file1.csv.gz"]
36 | 
37 | 
38 | def test_scrape_no_matching_suffix(monkeypatch):
39 |     html = '<html><body><a href="file1.txt">ok</a></body></html>'
40 |     dummy = DummyResponse(html)
41 |     session = requests.Session()
42 |     monkeypatch.setattr(session, "get", lambda url, timeout=None: dummy)
43 |     urls = _scrape_urls_from_html_page("http://example.com/", session)
44 |     assert urls == []
45 | 
46 | 
47 | def test_common_user_agent_header():
48 |     # Ensure the constant is set and looks like a UA string
49 |     assert isinstance(COMMON_USER_AGENT, str)
50 |     assert "Mozilla/" in COMMON_USER_AGENT
51 | 


--------------------------------------------------------------------------------
/tests/test_example.py:
--------------------------------------------------------------------------------
1 | def test_always_passes():
2 |     """
3 |     A simple placeholder test that always passes.
4 |     This ensures the test runner is configured correctly.
5 |     """
6 |     assert True
7 | 


--------------------------------------------------------------------------------
/tests/test_mcp_server.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Tests for the MCP server functionality.
  3 | """
  4 | 
  5 | import os
  6 | import sqlite3
  7 | from pathlib import Path
  8 | from unittest.mock import Mock, patch
  9 | 
 10 | import pytest
 11 | from fastmcp import Client
 12 | 
 13 | # Mock the database path check during import to handle CI environments
 14 | with patch("pathlib.Path.exists", return_value=True):
 15 |     with patch(
 16 |         "m3.mcp_server.get_default_database_path", return_value=Path("/fake/test.db")
 17 |     ):
 18 |         from m3.mcp_server import _init_backend, mcp
 19 | 
 20 | 
 21 | def _bigquery_available():
 22 |     """Check if BigQuery dependencies are available."""
 23 |     try:
 24 |         import importlib.util
 25 | 
 26 |         return importlib.util.find_spec("google.cloud.bigquery") is not None
 27 |     except ImportError:
 28 |         return False
 29 | 
 30 | 
 31 | class TestMCPServerSetup:
 32 |     """Test MCP server setup and configuration."""
 33 | 
 34 |     def test_server_instance_exists(self):
 35 |         """Test that the FastMCP server instance exists."""
 36 |         assert mcp is not None
 37 |         assert mcp.name == "m3"
 38 | 
 39 |     def test_backend_init_sqlite_default(self):
 40 |         """Test SQLite backend initialization with defaults."""
 41 |         with patch.dict(os.environ, {"M3_BACKEND": "sqlite"}, clear=True):
 42 |             with patch("m3.mcp_server.get_default_database_path") as mock_path:
 43 |                 mock_path.return_value = Path("/fake/path.db")
 44 |                 with patch("pathlib.Path.exists", return_value=True):
 45 |                     _init_backend()
 46 |                     # If no exception raised, initialization succeeded
 47 | 
 48 |     def test_backend_init_sqlite_custom_path(self):
 49 |         """Test SQLite backend initialization with custom path."""
 50 |         with patch.dict(
 51 |             os.environ,
 52 |             {"M3_BACKEND": "sqlite", "M3_DB_PATH": "/custom/path.db"},
 53 |             clear=True,
 54 |         ):
 55 |             with patch("pathlib.Path.exists", return_value=True):
 56 |                 _init_backend()
 57 |                 # If no exception raised, initialization succeeded
 58 | 
 59 |     def test_backend_init_sqlite_missing_db(self):
 60 |         """Test SQLite backend initialization with missing database."""
 61 |         with patch.dict(os.environ, {"M3_BACKEND": "sqlite"}, clear=True):
 62 |             with patch("m3.mcp_server.get_default_database_path") as mock_path:
 63 |                 mock_path.return_value = Path("/fake/path.db")
 64 |                 with patch("pathlib.Path.exists", return_value=False):
 65 |                     with pytest.raises(FileNotFoundError):
 66 |                         _init_backend()
 67 | 
 68 |     @pytest.mark.skipif(
 69 |         not _bigquery_available(), reason="BigQuery dependencies not available"
 70 |     )
 71 |     def test_backend_init_bigquery(self):
 72 |         """Test BigQuery backend initialization."""
 73 |         with patch.dict(
 74 |             os.environ,
 75 |             {"M3_BACKEND": "bigquery", "M3_PROJECT_ID": "test-project"},
 76 |             clear=True,
 77 |         ):
 78 |             with patch("google.cloud.bigquery.Client") as mock_client:
 79 |                 mock_client.return_value = Mock()
 80 |                 _init_backend()
 81 |                 # If no exception raised, initialization succeeded
 82 |                 mock_client.assert_called_once_with(project="test-project")
 83 | 
 84 |     def test_backend_init_invalid(self):
 85 |         """Test initialization with invalid backend."""
 86 |         with patch.dict(os.environ, {"M3_BACKEND": "invalid"}, clear=True):
 87 |             with pytest.raises(ValueError, match="Unsupported backend"):
 88 |                 _init_backend()
 89 | 
 90 | 
 91 | class TestMCPTools:
 92 |     """Test MCP tools functionality."""
 93 | 
 94 |     @pytest.fixture
 95 |     def test_db(self, tmp_path):
 96 |         """Create a test SQLite database."""
 97 |         db_path = tmp_path / "test.db"
 98 | 
 99 |         # Create test database with MIMIC-IV-like structure
100 |         conn = sqlite3.connect(db_path)
101 |         cursor = conn.cursor()
102 | 
103 |         # Create icu_icustays table
104 |         cursor.execute("""
105 |             CREATE TABLE icu_icustays (
106 |                 subject_id INTEGER,
107 |                 hadm_id INTEGER,
108 |                 stay_id INTEGER,
109 |                 intime TEXT,
110 |                 outtime TEXT
111 |             )
112 |         """)
113 |         cursor.execute("""
114 |             INSERT INTO icu_icustays (subject_id, hadm_id, stay_id, intime, outtime)
115 |             VALUES
116 |                 (10000032, 20000001, 30000001, '2180-07-23 15:00:00', '2180-07-24 12:00:00'),
117 |                 (10000033, 20000002, 30000002, '2180-08-15 10:30:00', '2180-08-16 14:15:00')
118 |         """)
119 | 
120 |         # Create hosp_labevents table
121 |         cursor.execute("""
122 |             CREATE TABLE hosp_labevents (
123 |                 subject_id INTEGER,
124 |                 hadm_id INTEGER,
125 |                 itemid INTEGER,
126 |                 charttime TEXT,
127 |                 value TEXT
128 |             )
129 |         """)
130 |         cursor.execute("""
131 |             INSERT INTO hosp_labevents (subject_id, hadm_id, itemid, charttime, value)
132 |             VALUES
133 |                 (10000032, 20000001, 50912, '2180-07-23 16:00:00', '120'),
134 |                 (10000033, 20000002, 50912, '2180-08-15 11:00:00', '95')
135 |         """)
136 | 
137 |         conn.commit()
138 |         conn.close()
139 | 
140 |         return str(db_path)
141 | 
142 |     @pytest.mark.asyncio
143 |     async def test_tools_via_client(self, test_db):
144 |         """Test MCP tools through the FastMCP client."""
145 |         # Set up environment for SQLite backend with OAuth2 disabled
146 |         with patch.dict(
147 |             os.environ,
148 |             {
149 |                 "M3_BACKEND": "sqlite",
150 |                 "M3_DB_PATH": test_db,
151 |                 "M3_OAUTH2_ENABLED": "false",
152 |             },
153 |             clear=True,
154 |         ):
155 |             # Initialize backend
156 |             _init_backend()
157 | 
158 |             # Test via FastMCP client
159 |             async with Client(mcp) as client:
160 |                 # Test execute_mimic_query tool
161 |                 result = await client.call_tool(
162 |                     "execute_mimic_query",
163 |                     {"sql_query": "SELECT COUNT(*) as count FROM icu_icustays"},
164 |                 )
165 |                 result_text = str(result)
166 |                 assert "count" in result_text
167 |                 assert "2" in result_text
168 | 
169 |                 # Test get_icu_stays tool
170 |                 result = await client.call_tool(
171 |                     "get_icu_stays", {"patient_id": 10000032, "limit": 10}
172 |                 )
173 |                 result_text = str(result)
174 |                 assert "10000032" in result_text
175 | 
176 |                 # Test get_lab_results tool
177 |                 result = await client.call_tool(
178 |                     "get_lab_results", {"patient_id": 10000032, "limit": 20}
179 |                 )
180 |                 result_text = str(result)
181 |                 assert "10000032" in result_text
182 | 
183 |                 # Test get_database_schema tool
184 |                 result = await client.call_tool("get_database_schema", {})
185 |                 result_text = str(result)
186 |                 assert "icu_icustays" in result_text or "hosp_labevents" in result_text
187 | 
188 |     @pytest.mark.asyncio
189 |     async def test_security_checks(self, test_db):
190 |         """Test SQL injection protection."""
191 |         with patch.dict(
192 |             os.environ,
193 |             {
194 |                 "M3_BACKEND": "sqlite",
195 |                 "M3_DB_PATH": test_db,
196 |                 "M3_OAUTH2_ENABLED": "false",
197 |             },
198 |             clear=True,
199 |         ):
200 |             _init_backend()
201 | 
202 |             async with Client(mcp) as client:
203 |                 # Test dangerous queries are blocked
204 |                 dangerous_queries = [
205 |                     "UPDATE icu_icustays SET subject_id = 999",
206 |                     "DELETE FROM icu_icustays",
207 |                     "INSERT INTO icu_icustays VALUES (1, 2, 3, '2020-01-01', '2020-01-02')",
208 |                     "DROP TABLE icu_icustays",
209 |                     "CREATE TABLE test (id INTEGER)",
210 |                     "ALTER TABLE icu_icustays ADD COLUMN test TEXT",
211 |                 ]
212 | 
213 |                 for query in dangerous_queries:
214 |                     result = await client.call_tool(
215 |                         "execute_mimic_query", {"sql_query": query}
216 |                     )
217 |                     result_text = str(result)
218 |                     assert (
219 |                         "Security Error:" in result_text
220 |                         and "Only SELECT" in result_text
221 |                     )
222 | 
223 |     @pytest.mark.asyncio
224 |     async def test_invalid_sql(self, test_db):
225 |         """Test handling of invalid SQL."""
226 |         with patch.dict(
227 |             os.environ,
228 |             {
229 |                 "M3_BACKEND": "sqlite",
230 |                 "M3_DB_PATH": test_db,
231 |                 "M3_OAUTH2_ENABLED": "false",
232 |             },
233 |             clear=True,
234 |         ):
235 |             _init_backend()
236 | 
237 |             async with Client(mcp) as client:
238 |                 result = await client.call_tool(
239 |                     "execute_mimic_query", {"sql_query": "INVALID SQL QUERY"}
240 |                 )
241 |                 result_text = str(result)
242 |                 assert "Query Failed:" in result_text and "syntax error" in result_text
243 | 
244 |     @pytest.mark.asyncio
245 |     async def test_empty_results(self, test_db):
246 |         """Test handling of queries with no results."""
247 |         with patch.dict(
248 |             os.environ,
249 |             {
250 |                 "M3_BACKEND": "sqlite",
251 |                 "M3_DB_PATH": test_db,
252 |                 "M3_OAUTH2_ENABLED": "false",
253 |             },
254 |             clear=True,
255 |         ):
256 |             _init_backend()
257 | 
258 |             async with Client(mcp) as client:
259 |                 result = await client.call_tool(
260 |                     "execute_mimic_query",
261 |                     {
262 |                         "sql_query": "SELECT * FROM icu_icustays WHERE subject_id = 999999"
263 |                     },
264 |                 )
265 |                 result_text = str(result)
266 |                 assert "No results found" in result_text
267 | 
268 |     @pytest.mark.asyncio
269 |     async def test_oauth2_authentication_required(self, test_db):
270 |         """Test that OAuth2 authentication is required when enabled."""
271 |         # Set up environment for SQLite backend with OAuth2 enabled
272 |         with patch.dict(
273 |             os.environ,
274 |             {
275 |                 "M3_BACKEND": "sqlite",
276 |                 "M3_DB_PATH": test_db,
277 |                 "M3_OAUTH2_ENABLED": "true",
278 |                 "M3_OAUTH2_ISSUER_URL": "https://auth.example.com",
279 |                 "M3_OAUTH2_AUDIENCE": "m3-api",
280 |             },
281 |             clear=True,
282 |         ):
283 |             _init_backend()
284 | 
285 |             async with Client(mcp) as client:
286 |                 # Test that tools require authentication
287 |                 result = await client.call_tool(
288 |                     "execute_mimic_query",
289 |                     {"sql_query": "SELECT COUNT(*) FROM icu_icustays"},
290 |                 )
291 |                 result_text = str(result)
292 |                 assert "Missing OAuth2 access token" in result_text
293 | 
294 | 
295 | class TestBigQueryIntegration:
296 |     """Test BigQuery integration with mocks (no real API calls)."""
297 | 
298 |     @pytest.mark.skipif(
299 |         not _bigquery_available(), reason="BigQuery dependencies not available"
300 |     )
301 |     @pytest.mark.asyncio
302 |     async def test_bigquery_tools(self):
303 |         """Test BigQuery tools functionality with mocks."""
304 |         with patch.dict(
305 |             os.environ,
306 |             {"M3_BACKEND": "bigquery", "M3_PROJECT_ID": "test-project"},
307 |             clear=True,
308 |         ):
309 |             with patch("google.cloud.bigquery.Client") as mock_client:
310 |                 # Mock BigQuery client and query results
311 |                 mock_job = Mock()
312 |                 mock_df = Mock()
313 |                 mock_df.empty = False
314 |                 mock_df.to_string.return_value = "Mock BigQuery result"
315 |                 mock_df.__len__ = Mock(return_value=5)
316 |                 mock_job.to_dataframe.return_value = mock_df
317 | 
318 |                 mock_client_instance = Mock()
319 |                 mock_client_instance.query.return_value = mock_job
320 |                 mock_client.return_value = mock_client_instance
321 | 
322 |                 _init_backend()
323 | 
324 |                 async with Client(mcp) as client:
325 |                     # Test execute_mimic_query tool
326 |                     result = await client.call_tool(
327 |                         "execute_mimic_query",
328 |                         {
329 |                             "sql_query": "SELECT COUNT(*) FROM `physionet-data.mimiciv_3_1_icu.icustays`"
330 |                         },
331 |                     )
332 |                     result_text = str(result)
333 |                     assert "Mock BigQuery result" in result_text
334 | 
335 |                     # Test get_race_distribution tool
336 |                     result = await client.call_tool(
337 |                         "get_race_distribution", {"limit": 5}
338 |                     )
339 |                     result_text = str(result)
340 |                     assert "Mock BigQuery result" in result_text
341 | 
342 |                     # Verify BigQuery client was called
343 |                     mock_client.assert_called_once_with(project="test-project")
344 |                     assert mock_client_instance.query.called
345 | 
346 | 
347 | class TestServerIntegration:
348 |     """Test overall server integration."""
349 | 
350 |     def test_server_main_function_exists(self):
351 |         """Test that the main function exists and is callable."""
352 |         from m3.mcp_server import main
353 | 
354 |         assert callable(main)
355 | 
356 |     def test_server_can_be_imported_as_module(self):
357 |         """Test that the server can be imported as a module."""
358 |         import m3.mcp_server
359 | 
360 |         assert hasattr(m3.mcp_server, "mcp")
361 |         assert hasattr(m3.mcp_server, "main")
362 | 


--------------------------------------------------------------------------------
/tests/test_oauth2_basic.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Basic OAuth2 authentication tests.
  3 | """
  4 | 
  5 | import os
  6 | from unittest.mock import patch
  7 | 
  8 | import pytest
  9 | 
 10 | from m3.auth import (
 11 |     OAuth2Config,
 12 |     init_oauth2,
 13 |     is_oauth2_enabled,
 14 |     require_oauth2,
 15 | )
 16 | 
 17 | 
 18 | class TestOAuth2BasicConfig:
 19 |     """Test basic OAuth2 configuration."""
 20 | 
 21 |     def test_oauth2_disabled_by_default(self):
 22 |         """Test that OAuth2 is disabled by default."""
 23 |         with patch.dict(os.environ, {}, clear=True):
 24 |             config = OAuth2Config()
 25 |             assert not config.enabled
 26 | 
 27 |     def test_oauth2_enabled_configuration(self):
 28 |         """Test OAuth2 enabled configuration."""
 29 |         env_vars = {
 30 |             "M3_OAUTH2_ENABLED": "true",
 31 |             "M3_OAUTH2_ISSUER_URL": "https://auth.example.com",
 32 |             "M3_OAUTH2_AUDIENCE": "m3-api",
 33 |             "M3_OAUTH2_REQUIRED_SCOPES": "read:mimic-data,write:mimic-data",
 34 |         }
 35 | 
 36 |         with patch.dict(os.environ, env_vars, clear=True):
 37 |             config = OAuth2Config()
 38 |             assert config.enabled
 39 |             assert config.issuer_url == "https://auth.example.com"
 40 |             assert config.audience == "m3-api"
 41 |             assert config.required_scopes == {"read:mimic-data", "write:mimic-data"}
 42 | 
 43 |     def test_oauth2_invalid_configuration_raises_error(self):
 44 |         """Test that invalid OAuth2 configuration raises an error."""
 45 |         with patch.dict(os.environ, {"M3_OAUTH2_ENABLED": "true"}, clear=True):
 46 |             with pytest.raises(ValueError, match="M3_OAUTH2_ISSUER_URL is required"):
 47 |                 OAuth2Config()
 48 | 
 49 |     def test_jwks_url_auto_discovery(self):
 50 |         """Test automatic JWKS URL discovery."""
 51 |         env_vars = {
 52 |             "M3_OAUTH2_ENABLED": "true",
 53 |             "M3_OAUTH2_ISSUER_URL": "https://auth.example.com",
 54 |             "M3_OAUTH2_AUDIENCE": "m3-api",
 55 |         }
 56 | 
 57 |         with patch.dict(os.environ, env_vars, clear=True):
 58 |             config = OAuth2Config()
 59 |             assert config.jwks_url == "https://auth.example.com/.well-known/jwks.json"
 60 | 
 61 |     def test_scope_parsing(self):
 62 |         """Test scope parsing from environment variable."""
 63 |         config = OAuth2Config()
 64 | 
 65 |         # Test comma-separated scopes
 66 |         scopes = config._parse_scopes("read:data, write:data, admin")
 67 |         assert scopes == {"read:data", "write:data", "admin"}
 68 | 
 69 |         # Test empty scopes
 70 |         scopes = config._parse_scopes("")
 71 |         assert scopes == set()
 72 | 
 73 | 
 74 | class TestOAuth2BasicIntegration:
 75 |     """Test basic OAuth2 integration functions."""
 76 | 
 77 |     def test_init_oauth2_disabled(self):
 78 |         """Test OAuth2 initialization when disabled."""
 79 |         with patch.dict(os.environ, {}, clear=True):
 80 |             init_oauth2()
 81 |             assert not is_oauth2_enabled()
 82 | 
 83 |     def test_init_oauth2_enabled(self):
 84 |         """Test OAuth2 initialization when enabled."""
 85 |         env_vars = {
 86 |             "M3_OAUTH2_ENABLED": "true",
 87 |             "M3_OAUTH2_ISSUER_URL": "https://auth.example.com",
 88 |             "M3_OAUTH2_AUDIENCE": "m3-api",
 89 |         }
 90 | 
 91 |         with patch.dict(os.environ, env_vars, clear=True):
 92 |             init_oauth2()
 93 |             assert is_oauth2_enabled()
 94 | 
 95 | 
 96 | class TestOAuth2BasicDecorator:
 97 |     """Test basic OAuth2 decorator functionality."""
 98 | 
 99 |     def setup_method(self):
100 |         """Set up test fixtures."""
101 |         # Reset global state
102 |         import m3.auth
103 | 
104 |         m3.auth._oauth2_config = None
105 |         m3.auth._oauth2_validator = None
106 | 
107 |     def test_decorator_with_oauth2_disabled(self):
108 |         """Test decorator behavior when OAuth2 is disabled."""
109 | 
110 |         @require_oauth2
111 |         def test_function():
112 |             return "success"
113 | 
114 |         with patch.dict(os.environ, {}, clear=True):
115 |             init_oauth2()
116 | 
117 |             # Should allow access when OAuth2 is disabled
118 |             result = test_function()
119 |             assert result == "success"
120 | 
121 |     def test_decorator_with_missing_token(self):
122 |         """Test decorator behavior with missing token."""
123 | 
124 |         @require_oauth2
125 |         def test_function():
126 |             return "success"
127 | 
128 |         env_vars = {
129 |             "M3_OAUTH2_ENABLED": "true",
130 |             "M3_OAUTH2_ISSUER_URL": "https://auth.example.com",
131 |             "M3_OAUTH2_AUDIENCE": "m3-api",
132 |         }
133 | 
134 |         with patch.dict(os.environ, env_vars, clear=True):
135 |             init_oauth2()
136 | 
137 |             # Should return error when token is missing
138 |             result = test_function()
139 |             assert "Missing OAuth2 access token" in result
140 | 
141 |     def test_decorator_with_invalid_token_format(self):
142 |         """Test decorator behavior with invalid token format."""
143 | 
144 |         @require_oauth2
145 |         def test_function():
146 |             return "success"
147 | 
148 |         env_vars = {
149 |             "M3_OAUTH2_ENABLED": "true",
150 |             "M3_OAUTH2_ISSUER_URL": "https://auth.example.com",
151 |             "M3_OAUTH2_AUDIENCE": "m3-api",
152 |             "M3_OAUTH2_TOKEN": "invalid-token",
153 |         }
154 | 
155 |         with patch.dict(os.environ, env_vars, clear=True):
156 |             init_oauth2()
157 | 
158 |             # Should return error with invalid token format
159 |             result = test_function()
160 |             assert "Invalid token format" in result
161 | 
162 |     def test_decorator_with_valid_jwt_format(self):
163 |         """Test decorator behavior with valid JWT format."""
164 | 
165 |         @require_oauth2
166 |         def test_function():
167 |             return "success"
168 | 
169 |         # Valid JWT format (header.payload.signature)
170 |         valid_jwt = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiYWRtaW4iOnRydWV9.signature"
171 | 
172 |         env_vars = {
173 |             "M3_OAUTH2_ENABLED": "true",
174 |             "M3_OAUTH2_ISSUER_URL": "https://auth.example.com",
175 |             "M3_OAUTH2_AUDIENCE": "m3-api",
176 |             "M3_OAUTH2_TOKEN": f"Bearer {valid_jwt}",
177 |         }
178 | 
179 |         with patch.dict(os.environ, env_vars, clear=True):
180 |             init_oauth2()
181 | 
182 |             # Should work with valid JWT format
183 |             result = test_function()
184 |             assert result == "success"
185 | 
186 |     def test_decorator_with_bearer_prefix_removal(self):
187 |         """Test that Bearer prefix is correctly removed."""
188 | 
189 |         @require_oauth2
190 |         def test_function():
191 |             return "success"
192 | 
193 |         # Valid JWT format (header.payload.signature)
194 |         valid_jwt = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiYWRtaW4iOnRydWV9.signature"
195 | 
196 |         env_vars = {
197 |             "M3_OAUTH2_ENABLED": "true",
198 |             "M3_OAUTH2_ISSUER_URL": "https://auth.example.com",
199 |             "M3_OAUTH2_AUDIENCE": "m3-api",
200 |             "M3_OAUTH2_TOKEN": f"Bearer {valid_jwt}",
201 |         }
202 | 
203 |         with patch.dict(os.environ, env_vars, clear=True):
204 |             init_oauth2()
205 | 
206 |             # Should work even with Bearer prefix
207 |             result = test_function()
208 |             assert result == "success"
209 | 


--------------------------------------------------------------------------------
/webapp/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | /node_modules
 5 | /.pnp
 6 | .pnp.js
 7 | 
 8 | # testing
 9 | /coverage
10 | 
11 | # production
12 | /build
13 | 
14 | # misc
15 | .DS_Store
16 | .env.local
17 | .env.development.local
18 | .env.test.local
19 | .env.production.local
20 | 
21 | npm-debug.log*
22 | yarn-debug.log*
23 | yarn-error.log*
24 | 


--------------------------------------------------------------------------------
/webapp/README.md:
--------------------------------------------------------------------------------
 1 | # M3 Webapp README
 2 | 
 3 | This file provides instructions on how to run and build the M3 web application.
 4 | 
 5 | ## Setup
 6 | ```bash
 7 | cd webapp        # Navigate to webapp directory
 8 | npm install      # Install dependencies
 9 | ```
10 | 
11 | ## Available Scripts
12 | 
13 | In the project directory, you can run:
14 | 
15 | ### `npm start`
16 | 
17 | Runs the app in the development mode.\
18 | Open [http://localhost:3000](http://localhost:3000) to view it in the browser.
19 | 
20 | The page will reload if you make edits.
21 | 
22 | ### `npm run build`
23 | 
24 | Builds the app for production to the `build` folder.
25 | 


--------------------------------------------------------------------------------
/webapp/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "frontend",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "homepage": "https://rafiattrach.github.io/m3",
 6 |   "dependencies": {
 7 |     "@emailjs/browser": "^4.4.1",
 8 |     "@testing-library/dom": "^10.4.0",
 9 |     "@testing-library/jest-dom": "^6.6.3",
10 |     "@testing-library/react": "^16.3.0",
11 |     "@testing-library/user-event": "^13.5.0",
12 |     "react": "^19.1.0",
13 |     "react-dom": "^19.1.0",
14 |     "react-router-dom": "^7.6.2",
15 |     "react-scripts": "5.0.1",
16 |     "web-vitals": "^2.1.4"
17 |   },
18 |   "scripts": {
19 |     "start": "react-scripts start",
20 |     "build": "react-scripts build",
21 |     "test": "react-scripts test",
22 |     "eject": "react-scripts eject"
23 |   },
24 |   "eslintConfig": {
25 |     "extends": [
26 |       "react-app",
27 |       "react-app/jest"
28 |     ]
29 |   },
30 |   "browserslist": {
31 |     "production": [
32 |       ">0.2%",
33 |       "not dead",
34 |       "not op_mini all"
35 |     ],
36 |     "development": [
37 |       "last 1 chrome version",
38 |       "last 1 firefox version",
39 |       "last 1 safari version"
40 |     ]
41 |   }
42 | }
43 | 


--------------------------------------------------------------------------------
/webapp/public/banner1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/banner1.png


--------------------------------------------------------------------------------
/webapp/public/banner2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/banner2.png


--------------------------------------------------------------------------------
/webapp/public/banner3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/banner3.png


--------------------------------------------------------------------------------
/webapp/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/favicon.ico


--------------------------------------------------------------------------------
/webapp/public/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8" />
 5 |     <link rel="icon" href="%PUBLIC_URL%/m3_logo.png" />
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1" />
 7 |     <meta name="theme-color" content="#000000" />
 8 |     <meta
 9 |       name="description"
10 |       content="Web site created using create-react-app"
11 |     />
12 |     <link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" />
13 |     <!--
14 |       manifest.json provides metadata used when your web app is installed on a
15 |       user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
16 |     -->
17 |     <link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
18 |     <!--
19 |       Notice the use of %PUBLIC_URL% in the tags above.
20 |       It will be replaced with the URL of the `public` folder during the build.
21 |       Only files inside the `public` folder can be referenced from the HTML.
22 | 
23 |       Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
24 |       work correctly both with client-side routing and a non-root public URL.
25 |       Learn how to configure a non-root public URL by running `npm run build`.
26 |     -->
27 |     <title>M3 - MCP for EHRs</title>
28 |   </head>
29 |   <body>
30 |     <noscript>You need to enable JavaScript to run this app.</noscript>
31 |     <div id="root"></div>
32 |     <!--
33 |       This HTML file is a template.
34 |       If you open it directly in the browser, you will see an empty page.
35 | 
36 |       You can add webfonts, meta tags, or analytics to this file.
37 |       The build step will place the bundled scripts into the <body> tag.
38 | 
39 |       To begin the development, run `npm start` or `yarn start`.
40 |       To create a production bundle, use `npm run build` or `yarn build`.
41 |     -->
42 |   </body>
43 | </html>
44 | 


--------------------------------------------------------------------------------
/webapp/public/logo192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/logo192.png


--------------------------------------------------------------------------------
/webapp/public/logo512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/logo512.png


--------------------------------------------------------------------------------
/webapp/public/m3_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/m3_architecture.png


--------------------------------------------------------------------------------
/webapp/public/m3_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/m3_logo.png


--------------------------------------------------------------------------------
/webapp/public/m3_logo_transparent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/m3_logo_transparent.png


--------------------------------------------------------------------------------
/webapp/public/manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "short_name": "M3",
 3 |   "name": "M3-MCP for EHRs",
 4 |   "icons": [
 5 |     {
 6 |       "src": "favicon.ico",
 7 |       "sizes": "64x64 32x32 24x24 16x16",
 8 |       "type": "image/x-icon"
 9 |     },
10 |     {
11 |       "src": "logo192.png",
12 |       "type": "image/png",
13 |       "sizes": "192x192"
14 |     },
15 |     {
16 |       "src": "logo512.png",
17 |       "type": "image/png",
18 |       "sizes": "512x512"
19 |     }
20 |   ],
21 |   "start_url": ".",
22 |   "display": "standalone",
23 |   "theme_color": "#000000",
24 |   "background_color": "#ffffff"
25 | }
26 | 


--------------------------------------------------------------------------------
/webapp/public/robots.txt:
--------------------------------------------------------------------------------
1 | # https://www.robotstxt.org/robotstxt.html
2 | User-agent: *
3 | Disallow:
4 | 


--------------------------------------------------------------------------------
/webapp/public/videos/m3_website_1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/videos/m3_website_1.mp4


--------------------------------------------------------------------------------
/webapp/public/videos/m3_website_2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/videos/m3_website_2.mp4


--------------------------------------------------------------------------------
/webapp/public/videos/m3_website_3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/videos/m3_website_3.mp4


--------------------------------------------------------------------------------
/webapp/public/videos/m3_website_4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rafiattrach/m3/4b81a0662639103a6ba3ef519bcb39a8512f28b2/webapp/public/videos/m3_website_4.mp4


--------------------------------------------------------------------------------
/webapp/src/App.js:
--------------------------------------------------------------------------------
  1 | import React, { useEffect } from 'react';
  2 | import './App.css';
  3 | import Header from './components/Header';
  4 | import Hero from './components/Hero';
  5 | import Contact from './components/Contact';
  6 | import Paper from './components/Paper';
  7 | import Demos from './components/Demos';
  8 | import Explanation from './components/Explanation';
  9 | import Features from './components/Features';
 10 | import Citation from './components/Citation';
 11 | import CTA from './components/CTA';
 12 | import Footer from './components/Footer';
 13 | import Installation from './components/Installation';
 14 | 
 15 | function App() {
 16 |   useEffect(() => {
 17 |     // Header scroll effect
 18 |     const handleScroll = () => {
 19 |       const header = document.querySelector('header');
 20 |       if (header) {
 21 |         if (window.scrollY > 100) {
 22 |           header.style.background = 'rgba(255, 255, 255, 0.98)';
 23 |           header.style.boxShadow = '0 2px 20px rgba(0, 0, 0, 0.1)';
 24 |         } else {
 25 |           header.style.background = 'rgba(255, 255, 255, 0.95)';
 26 |           header.style.boxShadow = 'none';
 27 |         }
 28 |       }
 29 | 
 30 |       const scrolled = window.pageYOffset;
 31 |       const laptopMockup = document.querySelector('.laptop-mockup');
 32 | 
 33 |       if (laptopMockup) {
 34 |           const rate = scrolled * 0.2;
 35 |           laptopMockup.style.transform = `translateY(${rate}px)`;
 36 |       }
 37 | 
 38 |       const ctaSection = document.querySelector('.cta-section');
 39 |       if (ctaSection) {
 40 |           const rate = scrolled * 0.1;
 41 |           ctaSection.style.backgroundPosition = `center ${rate}px`;
 42 |       }
 43 |     };
 44 | 
 45 |     window.addEventListener('scroll', handleScroll);
 46 | 
 47 |     // Intersection Observer for animations
 48 |     const observerOptions = {
 49 |       threshold: 0.1,
 50 |       rootMargin: '0px 0px -50px 0px'
 51 |     };
 52 | 
 53 |     const observer = new IntersectionObserver((entries) => {
 54 |       entries.forEach(entry => {
 55 |         if (entry.isIntersecting) {
 56 |           entry.target.classList.add('visible');
 57 |         }
 58 |       });
 59 |     }, observerOptions);
 60 | 
 61 |     document.querySelectorAll('.fade-in').forEach(el => {
 62 |       observer.observe(el);
 63 |     });
 64 | 
 65 |     // Add interactive hover effects for demo cards
 66 |     document.querySelectorAll('.demo-card').forEach(card => {
 67 |       card.addEventListener('mouseenter', () => {
 68 |           card.style.transform = 'translateY(-8px) scale(1.02)';
 69 |       });
 70 | 
 71 |       card.addEventListener('mouseleave', () => {
 72 |           card.style.transform = 'translateY(0) scale(1)';
 73 |       });
 74 |     });
 75 | 
 76 |     // Animate dashboard cards on scroll
 77 |     const animateDashboard = () => {
 78 |         const cards = document.querySelectorAll('.dashboard-card');
 79 |         cards.forEach((card, index) => {
 80 |             setTimeout(() => {
 81 |                 card.style.transform = 'translateY(0)';
 82 |                 card.style.opacity = '1';
 83 |             }, index * 200);
 84 |         });
 85 |     };
 86 | 
 87 |     // Initialize dashboard animation
 88 |     setTimeout(animateDashboard, 1000);
 89 | 
 90 |     return () => {
 91 |       window.removeEventListener('scroll', handleScroll);
 92 |       // Clean up other event listeners if necessary
 93 |     };
 94 |   }, []);
 95 | 
 96 |   return (
 97 |     <div className="App">
 98 |       <Header />
 99 |       <Hero />
100 |       <Contact />
101 |       <Paper />
102 |       <Demos />
103 |       <Explanation />
104 |       <Features />
105 |       <Installation />
106 |       <Citation />
107 |       <CTA />
108 |       <Footer />
109 |     </div>
110 |   );
111 | }
112 | 
113 | export default App;
114 | 


--------------------------------------------------------------------------------
/webapp/src/components/ArchitectureDiagram.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | 
 3 | const ArchitectureDiagram = () => {
 4 |   return (
 5 |     <section className="architecture-section">
 6 |       <div className="container">
 7 |         <div className="section-header fade-in">
 8 |           <h2>Architecture Overview</h2>
 9 |           <p>How m3 Model Context Protocol connects AI models to MIMIC-IV healthcare data</p>
10 |         </div>
11 | 
12 |         <div className="architecture-diagram fade-in">
13 |           <img
14 |             src="/m3/m3_architecture.png"
15 |             alt="m3 Architecture Diagram"
16 |             style={{
17 |               width: '80%',
18 |               maxWidth: '800px',
19 |               height: 'auto',
20 |               display: 'block',
21 |               margin: '0 auto'
22 |             }}
23 |           />
24 |         </div>
25 |       </div>
26 |     </section>
27 |   );
28 | };
29 | 
30 | export default ArchitectureDiagram;
31 | 


--------------------------------------------------------------------------------
/webapp/src/components/CTA.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | 
 3 | const CTA = () => {
 4 |   return (
 5 |     <>
 6 |       <style>
 7 |         {`
 8 |           @keyframes float {
 9 |             0% { transform: translateY(0px); }
10 |             50% { transform: translateY(-10px); }
11 |             100% { transform: translateY(0px); }
12 |           }
13 | 
14 |           @keyframes rotate {
15 |               from {
16 |                   transform: translateX(-50%) rotate(0deg);
17 |               }
18 |               to {
19 |                   transform: translateX(-50%) rotate(360deg);
20 |               }
21 |           }
22 | 
23 |           .cta-content {
24 |             animation: float 5s ease-in-out infinite;
25 |           }
26 | 
27 |           .cta-section::before {
28 |             animation: rotate 20s linear infinite;
29 |           }
30 |         `}
31 |       </style>
32 |       <section className="cta-section">
33 |         <div className="container">
34 |           <div className="cta-content">
35 |             <h2>Contribute to our Open Source project</h2>
36 |             <p>Help us build a better platform for everyone. We are looking for developers to contribute with their code and ideas.</p>
37 |             <a href="https://github.com/rafiattrach/m3/issues" className="btn-contribute-cta">
38 |               <svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor" xmlns="http://www.w3.org/2000/svg" style={{ marginRight: '8px' }}>
39 |                 <path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.108-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.91 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"/>
40 |               </svg>
41 |               Contribute Now
42 |             </a>
43 |           </div>
44 |         </div>
45 |       </section>
46 |     </>
47 |   );
48 | };
49 | 
50 | export default CTA;
51 | 


--------------------------------------------------------------------------------
/webapp/src/components/Citation.js:
--------------------------------------------------------------------------------
 1 | import React, { useState } from 'react';
 2 | 
 3 | const Citation = () => {
 4 |   const [copiedFormat, setCopiedFormat] = useState(null);
 5 | 
 6 |   const citations = {
 7 |     apa: `Al Attrach, R., Moreira, P., Fani, R., Umeton, R., & Celi, L. A. (2025). Conversational LLMs Simplify Secure Clinical Data Access, Understanding, and Analysis. arXiv preprint arXiv:2507.01053.`,
 8 |     mla: `Al Attrach, Rafi, et al. "Conversational LLMs Simplify Secure Clinical Data Access, Understanding, and Analysis." arXiv preprint arXiv:2507.01053 (2025).`,
 9 |     chicago: `Al Attrach, Rafi, Pedro Moreira, Rajna Fani, Renato Umeton, and Leo Anthony Celi. "Conversational LLMs Simplify Secure Clinical Data Access, Understanding, and Analysis." arXiv preprint arXiv:2507.01053 (2025).`,
10 |     bibtex: `@misc{attrach2025conversationalllmssimplifysecure,
11 |       title={Conversational LLMs Simplify Secure Clinical Data Access, Understanding, and Analysis},
12 |       author={Rafi Al Attrach and Pedro Moreira and Rajna Fani and Renato Umeton and Leo Anthony Celi},
13 |       year={2025},
14 |       eprint={2507.01053},
15 |       archivePrefix={arXiv},
16 |       primaryClass={cs.IR},
17 |       url={https://arxiv.org/abs/2507.01053},
18 | }`
19 |   };
20 | 
21 |   const handleCopy = (format) => {
22 |     navigator.clipboard.writeText(citations[format])
23 |       .then(() => {
24 |         setCopiedFormat(format);
25 |         setTimeout(() => setCopiedFormat(null), 2000);
26 |       })
27 |       .catch(err => {
28 |         console.error('Could not copy text: ', err);
29 |       });
30 |   };
31 | 
32 |   return (
33 |     <section className="citation-section">
34 |       <div className="container">
35 |         <div className="section-header">
36 |           <h2>Cite This Work</h2>
37 |           <p>If you use m3 in your research, please cite our paper :)</p>
38 |         </div>
39 | 
40 |         <div className="citation-grid">
41 |           <div className="citation-card bibtex-card">
42 |             <div className="citation-header">
43 |               <h3>BibTeX Format</h3>
44 |               <button
45 |                 onClick={() => handleCopy('bibtex')}
46 |                 className={`copy-btn ${copiedFormat === 'bibtex' ? 'copied' : ''}`}
47 |               >
48 |                 {copiedFormat === 'bibtex' ? '✓ Copied!' : '📋 Copy'}
49 |               </button>
50 |             </div>
51 |             <div className="citation-text bibtex-text">
52 |               <pre>{citations.bibtex}</pre>
53 |             </div>
54 |           </div>
55 |         </div>
56 |       </div>
57 |     </section>
58 |   );
59 | };
60 | 
61 | export default Citation;
62 | 


--------------------------------------------------------------------------------
/webapp/src/components/Contact.js:
--------------------------------------------------------------------------------
  1 | import React, { useState } from 'react';
  2 | import emailjs from '@emailjs/browser';
  3 | 
  4 | const Contact = () => {
  5 |   // EmailJS configuration with your actual credentials
  6 |   const EMAILJS_CONFIG = {
  7 |     serviceId: 'm3_contact_service',
  8 |     templateId: 'template_sn5rm19',
  9 |     publicKey: 'aUrTfsE6oJtpIe1ac'
 10 |   };
 11 | 
 12 |   const [contactForm, setContactForm] = useState({
 13 |     email: '',
 14 |     inquiryType: 'hospital',
 15 |     message: ''
 16 |   });
 17 |   const [isSubmitting, setIsSubmitting] = useState(false);
 18 |   const [submitStatus, setSubmitStatus] = useState(null);
 19 | 
 20 |   const handleInputChange = (e) => {
 21 |     const { name, value } = e.target;
 22 |     setContactForm(prev => ({
 23 |       ...prev,
 24 |       [name]: value
 25 |     }));
 26 |   };
 27 | 
 28 |   const handleSubmit = async (e) => {
 29 |     e.preventDefault();
 30 |     setIsSubmitting(true);
 31 |     setSubmitStatus(null);
 32 | 
 33 |     try {
 34 |       // Prepare template parameters
 35 |       const templateParams = {
 36 |         user_email: contactForm.email,
 37 |         user_name: contactForm.email.split('@')[0], // Extract name from email
 38 |         inquiry_type: contactForm.inquiryType,
 39 |         message: contactForm.message || 'No additional message provided',
 40 |         inquiry_type_label: contactForm.inquiryType === 'hospital' ? 'Hospital/EHR MCP Request' :
 41 |                            contactForm.inquiryType === 'suggestions' ? 'Suggestions & Feedback' : 'General Contact',
 42 |         timestamp: new Date().toLocaleString()
 43 |       };
 44 | 
 45 |       // Send email using EmailJS
 46 |       const response = await emailjs.send(
 47 |         EMAILJS_CONFIG.serviceId,
 48 |         EMAILJS_CONFIG.templateId,
 49 |         templateParams,
 50 |         EMAILJS_CONFIG.publicKey
 51 |       );
 52 | 
 53 |       console.log('Email sent successfully:', response);
 54 |       setSubmitStatus({ type: 'success', message: 'Message sent successfully! We\'ll get back to you soon.' });
 55 |       setContactForm({ email: '', inquiryType: 'hospital', message: '' });
 56 | 
 57 |     } catch (error) {
 58 |       console.error('Error sending email:', error);
 59 |       setSubmitStatus('error');
 60 |       // Store the specific error message for display
 61 |       setSubmitStatus({ type: 'error', message: getErrorMessage(error) });
 62 |     } finally {
 63 |       setIsSubmitting(false);
 64 |     }
 65 |   };
 66 | 
 67 |   const getErrorMessage = (error) => {
 68 |     // Handle different types of errors
 69 |     if (error.message?.includes('rate limit') || error.status === 429) {
 70 |       return 'Too many emails sent recently. Please try again in a few minutes.';
 71 |     }
 72 |     if (error.message?.includes('invalid email') || error.status === 400) {
 73 |       return 'Please check your email address and try again.';
 74 |     }
 75 |     if (error.message?.includes('network') || !navigator.onLine) {
 76 |       return 'Network error. Please check your internet connection and try again.';
 77 |     }
 78 |     if (error.status === 403) {
 79 |       return 'Service temporarily unavailable. Please try again later.';
 80 |     }
 81 |     return 'Failed to send message. Please try again later.';
 82 |   };
 83 | 
 84 |   return (
 85 |     <>
 86 |       <style>
 87 |         {`
 88 |           .contact-section {
 89 |             padding: 60px 0;
 90 |             background: linear-gradient(135deg, #fbfcfd 0%, #f1f5f9 100%);
 91 |             border-bottom: 1px solid rgba(0, 0, 0, 0.05);
 92 |           }
 93 | 
 94 |           .contact-container {
 95 |             max-width: 800px;
 96 |             margin: 0 auto;
 97 |             padding: 0 24px;
 98 |           }
 99 | 
100 |           .contact-header {
101 |             text-align: center;
102 |             margin-bottom: 40px;
103 |           }
104 | 
105 |           .contact-header h2 {
106 |             font-size: 32px;
107 |             font-weight: 600;
108 |             color: #1a1a1a;
109 |             margin-bottom: 12px;
110 |           }
111 | 
112 |           .contact-header p {
113 |             font-size: 18px;
114 |             color: #64748b;
115 |             max-width: 600px;
116 |             margin: 0 auto;
117 |             line-height: 1.6;
118 |           }
119 | 
120 |           .contact-form {
121 |             background: white;
122 |             border-radius: 16px;
123 |             padding: 32px;
124 |             box-shadow: 0 10px 40px rgba(0, 0, 0, 0.1);
125 |             border: 1px solid rgba(0, 0, 0, 0.05);
126 |           }
127 | 
128 |           .form-row {
129 |             display: grid;
130 |             grid-template-columns: 1fr 1fr;
131 |             gap: 20px;
132 |             margin-bottom: 20px;
133 |           }
134 | 
135 |           .form-group {
136 |             display: flex;
137 |             flex-direction: column;
138 |           }
139 | 
140 |           .form-label {
141 |             font-weight: 500;
142 |             color: #1a1a1a;
143 |             margin-bottom: 8px;
144 |             font-size: 14px;
145 |           }
146 | 
147 |           .form-input, .form-select, .form-textarea {
148 |             padding: 12px 16px;
149 |             border: 2px solid #e2e8f0;
150 |             border-radius: 8px;
151 |             font-size: 16px;
152 |             font-family: inherit;
153 |             transition: all 0.3s ease;
154 |             background: white;
155 |           }
156 | 
157 |           .form-input:focus, .form-select:focus, .form-textarea:focus {
158 |             outline: none;
159 |             border-color: #0052ff;
160 |             box-shadow: 0 0 0 3px rgba(0, 82, 255, 0.1);
161 |           }
162 | 
163 |           .form-textarea {
164 |             min-height: 80px;
165 |             resize: vertical;
166 |             grid-column: 1 / -1;
167 |           }
168 | 
169 |           .btn-contact {
170 |             background: #0052ff;
171 |             color: white;
172 |             padding: 12px 32px;
173 |             border: none;
174 |             border-radius: 8px;
175 |             font-weight: 600;
176 |             font-size: 16px;
177 |             cursor: pointer;
178 |             transition: all 0.3s ease;
179 |             display: inline-flex;
180 |             align-items: center;
181 |             gap: 8px;
182 |           }
183 | 
184 |           .btn-contact:hover {
185 |             background: #0041cc;
186 |             transform: translateY(-1px);
187 |             box-shadow: 0 4px 12px rgba(0, 82, 255, 0.3);
188 |           }
189 | 
190 |           .btn-contact:disabled {
191 |             opacity: 0.6;
192 |             cursor: not-allowed;
193 |             transform: none;
194 |             box-shadow: none;
195 |           }
196 | 
197 |           .form-actions {
198 |             display: flex;
199 |             justify-content: center;
200 |             margin-top: 24px;
201 |           }
202 | 
203 |           .status-message {
204 |             margin-top: 16px;
205 |             padding: 12px 16px;
206 |             border-radius: 8px;
207 |             font-size: 14px;
208 |             font-weight: 500;
209 |             text-align: center;
210 |           }
211 | 
212 |           .status-success {
213 |             background: #dcfce7;
214 |             border: 1px solid #bbf7d0;
215 |             color: #166534;
216 |           }
217 | 
218 |           .status-error {
219 |             background: #fef2f2;
220 |             border: 1px solid #fecaca;
221 |             color: #dc2626;
222 |           }
223 | 
224 |           @media (max-width: 768px) {
225 |             .contact-section {
226 |               padding: 40px 0;
227 |             }
228 | 
229 |             .contact-header h2 {
230 |               font-size: 28px;
231 |             }
232 | 
233 |             .contact-form {
234 |               padding: 24px;
235 |             }
236 | 
237 |             .form-row {
238 |               grid-template-columns: 1fr;
239 |               gap: 16px;
240 |             }
241 |           }
242 |         `}
243 |       </style>
244 |       <section className="contact-section">
245 |         <div className="contact-container">
246 |           <div className="contact-header fade-in">
247 |             <h2>Let's Connect</h2>
248 |             <p>Need an MCP for your hospital or EHR? Have suggestions? Want to collaborate? We'd love to hear from you!</p>
249 |             <p style={{ fontSize: '16px', color: '#0052ff', fontWeight: '500', marginTop: '8px' }}>
250 |               ⚡ Our team responds within 24 hours
251 |             </p>
252 |           </div>
253 | 
254 |           <form className="contact-form fade-in" onSubmit={handleSubmit}>
255 |             <div className="form-row">
256 |               <div className="form-group">
257 |                 <label className="form-label" htmlFor="email">Email Address</label>
258 |                 <input
259 |                   type="email"
260 |                   id="email"
261 |                   name="email"
262 |                   className="form-input"
263 |                   placeholder="your.email@example.com"
264 |                   value={contactForm.email}
265 |                   onChange={handleInputChange}
266 |                   required
267 |                 />
268 |               </div>
269 | 
270 |               <div className="form-group">
271 |                 <label className="form-label" htmlFor="inquiryType">How can we help?</label>
272 |                 <select
273 |                   id="inquiryType"
274 |                   name="inquiryType"
275 |                   className="form-select"
276 |                   value={contactForm.inquiryType}
277 |                   onChange={handleInputChange}
278 |                 >
279 |                   <option value="hospital">🏥 Hospital/EHR MCP Request</option>
280 |                   <option value="suggestions">💡 Suggestions & Feedback</option>
281 |                   <option value="general">📧 General Contact</option>
282 |                 </select>
283 |               </div>
284 |             </div>
285 | 
286 |             <div className="form-group">
287 |               <label className="form-label" htmlFor="message">Message (Optional)</label>
288 |               <textarea
289 |                 id="message"
290 |                 name="message"
291 |                 className="form-textarea"
292 |                 placeholder="Tell us more about your needs or suggestions..."
293 |                 value={contactForm.message}
294 |                 onChange={handleInputChange}
295 |               />
296 |             </div>
297 | 
298 |             <div className="form-actions">
299 |               <button
300 |                 type="submit"
301 |                 className="btn-contact"
302 |                 disabled={isSubmitting || !contactForm.email}
303 |               >
304 |                 {isSubmitting ? (
305 |                   <>
306 |                     <span>⏳</span> Sending...
307 |                   </>
308 |                 ) : (
309 |                   <>
310 |                     <span>📮</span> Send Message
311 |                   </>
312 |                 )}
313 |               </button>
314 |             </div>
315 | 
316 |             {submitStatus?.type === 'success' && (
317 |               <div className="status-message status-success">
318 |                 ✅ {submitStatus.message}
319 |               </div>
320 |             )}
321 | 
322 |             {submitStatus?.type === 'error' && (
323 |               <div className="status-message status-error">
324 |                 ❌ {submitStatus.message}
325 |               </div>
326 |             )}
327 |           </form>
328 |         </div>
329 |       </section>
330 |     </>
331 |   );
332 | };
333 | 
334 | export default Contact;
335 | 


--------------------------------------------------------------------------------
/webapp/src/components/Demos.js:
--------------------------------------------------------------------------------
  1 | import React, { useEffect, useRef, useMemo } from 'react';
  2 | 
  3 | const Demos = () => {
  4 |   const videoContainersRef = useRef([]);
  5 |   const videos = useMemo(() => [
  6 |     { url: 'm3_website_1.mp4', duration: '2:33 min' },
  7 |     { url: 'm3_website_2.mp4', duration: '1:18 min' },
  8 |     { url: 'm3_website_3.mp4', duration: '1:25 min' },
  9 |     { url: 'm3_website_4.mp4', duration: '5:49 min' },
 10 |   ], []);
 11 | 
 12 |   useEffect(() => {
 13 |     const loadVideo = (container, videoInfo) => {
 14 |       if (!container) return;
 15 | 
 16 |       const videoUrl = videoInfo.url;
 17 |       const videoPath = `/m3/videos/${videoUrl}`;
 18 |       const video = document.createElement('video');
 19 |       video.controls = true;
 20 |       video.autoplay = true;
 21 |       video.muted = true;
 22 |       video.loop = true;
 23 |       video.style.width = '100%';
 24 |       video.style.height = '100%';
 25 |       video.style.objectFit = 'cover';
 26 |       video.src = videoPath;
 27 | 
 28 |       video.onloadeddata = () => {
 29 |         container.innerHTML = '';
 30 |         container.appendChild(video);
 31 |         video.play().catch(error => console.error("Autoplay was prevented: ", error));
 32 |       };
 33 | 
 34 |       video.onerror = () => {
 35 |         container.innerHTML = `
 36 |           <div style="position: absolute; top: 0; left: 0; right: 0; bottom: 0; background: #000; display: flex; align-items: center; justify-content: center; color: white; font-size: 18px; flex-direction: column; gap: 16px;">
 37 |               <div style="font-size: 48px;">🎥</div>
 38 |               <div>Video not found: ${videoUrl}</div>
 39 |               <div style="font-size: 14px; opacity: 0.7;">Place videos in <code>public/videos</code></div>
 40 |           </div>
 41 |         `;
 42 |       };
 43 |     };
 44 | 
 45 |     videoContainersRef.current.forEach((container, index) => {
 46 |       loadVideo(container, videos[index]);
 47 |     });
 48 |   }, [videos]);
 49 | 
 50 |   return (
 51 |     <section className="demo-section" id="demos">
 52 |       <div className="container">
 53 |         <div className="section-header fade-in">
 54 |           <h2>See m3 in action</h2>
 55 |           <p>Watch step-by-step tutorials to get started with m3 and MIMIC-IV medical data</p>
 56 |         </div>
 57 | 
 58 |         <div className="demo-grid">
 59 |           <div className="demo-card fade-in">
 60 |             <div className="video-container" ref={el => videoContainersRef.current[0] = el}>
 61 |               <div className="video-overlay">2:33 min</div>
 62 |               <div className="play-button"></div>
 63 |             </div>
 64 |             <div className="demo-content">
 65 |               <h3>1. Prerequisites Setup</h3>
 66 |               <p>Get PhysioNet credentials, set up Google Cloud Platform BigQuery, and create a new project. Learn where to find your project ID for MCP configuration.</p>
 67 |             </div>
 68 |           </div>
 69 | 
 70 |           <div className="demo-card fade-in">
 71 |             <div className="video-container" ref={el => videoContainersRef.current[1] = el}>
 72 |               <div className="video-overlay">1:18 min</div>
 73 |               <div className="play-button"></div>
 74 |             </div>
 75 |             <div className="demo-content">
 76 |               <h3>2. Installation Guide</h3>
 77 |               <p>Two ways to install m3: pip install m3-mcp from PyPI or clone from GitHub. Choose the method that works best for your setup.</p>
 78 |             </div>
 79 |           </div>
 80 | 
 81 |           <div className="demo-card fade-in">
 82 |             <div className="video-container" ref={el => videoContainersRef.current[2] = el}>
 83 |               <div className="video-overlay">1:25 min</div>
 84 |               <div className="play-button"></div>
 85 |             </div>
 86 |             <div className="demo-content">
 87 |               <h3>3a. Quick Start: Demo Dataset</h3>
 88 |               <p>Download the MIMIC-IV demo dataset, configure Claude Desktop for MCP, verify it's running, and run your first natural language queries.</p>
 89 |             </div>
 90 |           </div>
 91 | 
 92 |           <div className="demo-card fade-in">
 93 |             <div className="video-container" ref={el => videoContainersRef.current[3] = el}>
 94 |               <div className="video-overlay">5:49 min</div>
 95 |               <div className="play-button"></div>
 96 |             </div>
 97 |             <div className="demo-content">
 98 |               <h3>3b. Full Power: BigQuery Dataset</h3>
 99 |               <p>Configure with Google BigQuery project ID for full MIMIC-IV dataset. Explore other MCP clients for local privacy-focused setups with local models.</p>
100 |             </div>
101 |           </div>
102 |         </div>
103 |       </div>
104 |     </section>
105 |   );
106 | };
107 | 
108 | export default Demos;
109 | 


--------------------------------------------------------------------------------
/webapp/src/components/Documentation.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | 
 3 | const Documentation = () => {
 4 |   return (
 5 |     <section className="documentation-section" id="documentation">
 6 |       <div className="container" style={{ padding: '60px 0' }}>
 7 |         <div className="section-header">
 8 |           <h2>Documentation</h2>
 9 |           <p>Understanding the Model Context Protocol (m3)</p>
10 |         </div>
11 | 
12 |         <div className="paper-preview" style={{ marginBottom: '40px' }}>
13 |           <h3>What is the Model Context Protocol (m3)?</h3>
14 |           <p>The Model Context Protocol (m3) is a powerful framework designed to streamline interaction with large-scale databases like MIMIC. It provides a standardized, efficient, and user-friendly way for researchers to query and analyze complex healthcare data without needing to write raw SQL. By leveraging a context-aware model, m3 understands your research goals and translates natural language or simplified commands into optimized database queries.</p>
15 |         </div>
16 | 
17 |         <div className="paper-preview" style={{ marginBottom: '40px' }}>
18 |           <h3>How It Works</h3>
19 |           <p>The m3 protocol operates on a simple yet powerful principle: it maintains a "context" of your current analysis. This context includes the data you've already loaded, the variables you're interested in, and the patient cohort you're studying. When you issue a new command, m3 uses this context to interpret your request and fetch the relevant data efficiently. This approach minimizes redundant data loading and dramatically speeds up iterative analysis.</p>
20 |           <ul className="explanation-features" style={{ marginTop: '20px' }}>
21 |             <li><strong>Context-Aware Queries:</strong> Remembers your previous steps to inform the next ones.</li>
22 |             <li><strong>Natural Language Processing:</strong> Allows you to write queries in plain English.</li>
23 |             <li><strong>Optimized Performance:</strong> Intelligently caches data and optimizes query execution.</li>
24 |             <li><strong>Extensible Framework:</strong> Can be adapted to other large-scale databases beyond MIMIC.</li>
25 |           </ul>
26 |         </div>
27 | 
28 |         <div className="section-header">
29 |           <h3>Core Tools & Commands</h3>
30 |           <p>The m3 ecosystem includes a suite of tools to facilitate your research workflow.</p>
31 |         </div>
32 | 
33 |         <div className="explanation-grid">
34 |           <div className="code-snippet" style={{ width: '100%' }}>
35 |             <span className="comment"># 1. Connect to the database</span><br/>
36 |             <span className="keyword">import</span> m3<br/>
37 |             db = m3.connect("mimic-iv")<br/><br/>
38 |             <span className="comment"># 2. Define a patient cohort</span><br/>
39 |             cohort = db.cohort.define("sepsis_patients", from_criteria="sepsis == True")<br/><br/>
40 |             <span className="comment"># 3. Load relevant data</span><br/>
41 |             vitals = cohort.load_data("vitalsigns", time_window="first_24h")<br/><br/>
42 |             <span className="comment"># 4. Run analysis</span><br/>
43 |             mean_hr = vitals.analyze("heart_rate", "mean")<br/><br/>
44 |             <span className="comment"># 5. Visualize results</span><br/>
45 |             vitals.visualize("heart_rate", "line_chart")
46 |           </div>
47 |           <div className="explanation-text">
48 |             <h3>Command-Line Interface (CLI)</h3>
49 |             <p>The m3 CLI provides a powerful set of commands to manage your data, run analyses, and interact with the MIMIC database directly from your terminal.</p>
50 |             <ul className="explanation-features">
51 |               <li><pre>m3 connect</pre> - Establishes a connection to the database.</li>
52 |               <li><pre>m3 define</pre> - Creates a new patient cohort from specific criteria.</li>
53 |               <li><pre>m3 load</pre> - Loads data for a defined cohort.</li>
54 |               <li><pre>m3 analyze</pre> - Performs statistical analysis on the loaded data.</li>
55 |             </ul>
56 |           </div>
57 |         </div>
58 |       </div>
59 |     </section>
60 |   );
61 | };
62 | 
63 | export default Documentation;
64 | 


--------------------------------------------------------------------------------
/webapp/src/components/Explanation.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import ArchitectureDiagram from './ArchitectureDiagram';
 3 | 
 4 | const Explanation = () => {
 5 |   return (
 6 |     <section className="explanation-section">
 7 |       <div className="container">
 8 |         <div className="section-header fade-in">
 9 |           <h2>Powerful features in action</h2>
10 |           <p>See how m3 simplifies complex database interactions with intuitive interfaces</p>
11 |         </div>
12 | 
13 |         {/* Dashboard Overview */}
14 |         <div className="explanation-grid fade-in">
15 |           <div className="explanation-text">
16 |             <h3>Real-time MIMIC Database Dashboard</h3>
17 |             <p>Monitor your database connections and query performance in real-time. The dashboard provides instant insights into MIMIC-IV records, active researcher connections, and system performance metrics.</p>
18 |             <ul className="explanation-features">
19 |               <li>Live connection monitoring</li>
20 |               <li>Query performance analytics</li>
21 |               <li>Database health indicators</li>
22 |               <li>User activity tracking</li>
23 |             </ul>
24 |           </div>
25 |           <div className="screenshot-container">
26 |             <img src="/m3/banner2.png" alt="Dashboard Overview" />
27 |           </div>
28 |         </div>
29 | 
30 | 
31 | 
32 |         {/* Data Visualization */}
33 |         <div className="explanation-grid fade-in">
34 |           <div className="explanation-text">
35 |             <h3>Advanced Data Visualization</h3>
36 |             <p>Transform complex medical data into clear, actionable insights with built-in visualization tools. Generate publication-ready charts and graphs directly from your queries.</p>
37 |             <ul className="explanation-features">
38 |               <li>Interactive charts and graphs</li>
39 |               <li>Statistical analysis tools</li>
40 |               <li>Export to multiple formats</li>
41 |               <li>Publication-ready visualizations</li>
42 |             </ul>
43 |           </div>
44 |           <div className="screenshot-container">
45 |             <img src="/m3/banner3.png" alt="Advanced Data Visualization" />
46 |           </div>
47 |         </div>
48 |       </div>
49 | 
50 |       {/* Architecture Diagram */}
51 |       <ArchitectureDiagram />
52 |     </section>
53 |   );
54 | };
55 | 
56 | export default Explanation;
57 | 


--------------------------------------------------------------------------------
/webapp/src/components/Features.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | 
 3 | const Features = () => {
 4 |   return (
 5 |     <section className="features-section" id="features">
 6 |       <div className="container">
 7 |         <div className="section-header fade-in">
 8 |           <h2>Built for healthcare research</h2>
 9 |           <p>Everything researchers need to efficiently work with large-scale medical databases</p>
10 |         </div>
11 | 
12 |         <div className="features-grid">
13 |           <div className="feature-item fade-in">
14 |             <div className="feature-icon">🏥</div>
15 |             <h3>MIMIC Integration</h3>
16 |             <p>Native support for the MIMIC-IV database with optimized queries and seamless data access patterns.</p>
17 |           </div>
18 | 
19 |           <div className="feature-item fade-in">
20 |             <div className="feature-icon">🔬</div>
21 |             <h3>Research-Focused</h3>
22 |             <p>Purpose-built for academic and clinical research with features tailored to healthcare data analysis workflows.</p>
23 |           </div>
24 | 
25 |           <div className="feature-item fade-in">
26 |             <div className="feature-icon">⚡</div>
27 |             <h3>High Performance</h3>
28 |             <p>Optimized query processing and intelligent caching for fast access to millions of medical records.</p>
29 |           </div>
30 |         </div>
31 |       </div>
32 |     </section>
33 |   );
34 | };
35 | 
36 | export default Features;
37 | 


--------------------------------------------------------------------------------
/webapp/src/components/Footer.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | 
 3 | const Footer = () => {
 4 |   return (
 5 |     <footer style={{
 6 |       backgroundColor: '#000',
 7 |       color: '#fff',
 8 |       textAlign: 'center',
 9 |       padding: '60px 20px',
10 |       marginTop: 'auto',
11 |       width: '100%',
12 |       fontFamily: '-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen, Ubuntu, Cantarell, sans-serif'
13 |     }}>
14 |       <p style={{
15 |         fontSize: '18px',
16 |         fontWeight: '300',
17 |         margin: '0 0 30px 0',
18 |         lineHeight: '1.6',
19 |         color: '#e5e5e5',
20 |         fontStyle: 'italic'
21 |       }}>
22 |         "Originated from Open Source, give back to Open Source."
23 |       </p>
24 |       <p style={{
25 |         fontSize: '13px',
26 |         margin: '0 0 8px 0',
27 |         color: '#999',
28 |         fontWeight: '400',
29 |         letterSpacing: '0.5px'
30 |       }}>
31 |         Licensed under MIT License
32 |       </p>
33 |       <p style={{
34 |         fontSize: '13px',
35 |         margin: '0',
36 |         color: '#999',
37 |         fontWeight: '400',
38 |         letterSpacing: '0.5px'
39 |       }}>
40 |         © 2025 MIT Critical Data
41 |       </p>
42 |     </footer>
43 |   );
44 | };
45 | 
46 | export default Footer;
47 | 


--------------------------------------------------------------------------------
/webapp/src/components/Header.js:
--------------------------------------------------------------------------------
 1 | import React, { useState, useEffect } from 'react';
 2 | 
 3 | const Header = () => {
 4 |   const [stars, setStars] = useState(3);
 5 | 
 6 |   useEffect(() => {
 7 |     fetch('https://api.github.com/repos/rafiattrach/m3')
 8 |       .then(response => {
 9 |         if (!response.ok) {
10 |           return;
11 |         }
12 |         return response.json();
13 |       })
14 |       .then(data => {
15 |         if (data && data.stargazers_count !== undefined) {
16 |           setStars(data.stargazers_count);
17 |         }
18 |       })
19 |       .catch(error => {
20 |         console.error('Error fetching GitHub stars:', error);
21 |       });
22 |   }, []);
23 | 
24 |   const scrollToSection = (sectionId) => {
25 |     const element = document.getElementById(sectionId);
26 |     if (element) {
27 |       element.scrollIntoView({ behavior: 'smooth', block: 'start' });
28 |     }
29 |   };
30 | 
31 |   const scrollToTop = () => {
32 |     window.scrollTo({ top: 0, behavior: 'smooth' });
33 |   };
34 | 
35 |   return (
36 |     <header>
37 |       <nav className="container">
38 |         <div className="logo">
39 |           <button
40 |             onClick={scrollToTop}
41 |             style={{
42 |               background: 'none',
43 |               border: 'none',
44 |               color: '#0052ff',
45 |               fontSize: '28px',
46 |               fontWeight: '600',
47 |               letterSpacing: '-0.5px',
48 |               fontFamily: "'Courier New', monospace",
49 |               cursor: 'pointer',
50 |               textDecoration: 'none',
51 |               padding: 0
52 |             }}
53 |           >
54 |             <img src={process.env.PUBLIC_URL + '/m3_logo_transparent.png'} alt="M3" style={{ height: '100px', width: 'auto', marginTop: '15px' }} />
55 |           </button>
56 |         </div>
57 |         <ul className="nav-links">
58 |           <li><button onClick={() => scrollToSection('paper')}>Paper</button></li>
59 |           <li><button onClick={() => scrollToSection('demos')}>Demos</button></li>
60 |           <li><button onClick={() => scrollToSection('installation')}>Installation</button></li>
61 |         </ul>
62 |         <div style={{ display: 'flex', alignItems: 'center', gap: '10px' }}>
63 |         <a
64 |             href="https://pypi.org/project/m3-mcp/"
65 |             target="_blank"
66 |             rel="noopener noreferrer"
67 |             style={{
68 |               padding: '10px',
69 |               textDecoration: 'none',
70 |               color: 'inherit',
71 |               display: 'flex',
72 |               alignItems: 'center',
73 |               gap: '5px',
74 |               fontWeight: 'bold'
75 |             }}
76 |           >
77 |             <span className="star-count">
78 |               <img
79 |                 src="/m3/pypi_logo.svg"
80 |                 alt="PyPI"
81 |                 style={{ height: '20px', verticalAlign: 'middle', marginRight: '8px' }}
82 |               />
83 |               PyPI
84 |             </span>
85 |           </a>
86 |           <a href="https://github.com/rafiattrach/m3" target="_blank" rel="noopener noreferrer" className="btn-github">
87 |             <span className="star-count">{stars.toLocaleString()} ⭐</span> Star on GitHub
88 |           </a>
89 | 
90 |         </div>
91 |       </nav>
92 |     </header>
93 |   );
94 | };
95 | 
96 | export default Header;
97 | 


--------------------------------------------------------------------------------
/webapp/src/components/Hero.js:
--------------------------------------------------------------------------------
 1 | import React, { useState } from 'react';
 2 | 
 3 | const Hero = () => {
 4 |   const [isCopied, setIsCopied] = useState(false);
 5 | 
 6 |   const copyToClipboard = () => {
 7 |     navigator.clipboard.writeText('pip install m3-mcp')
 8 |       .then(() => {
 9 |         setIsCopied(true);
10 |         setTimeout(() => setIsCopied(false), 2000);
11 |       })
12 |       .catch(err => {
13 |         console.error('Failed to copy: ', err);
14 |       });
15 |   };
16 | 
17 |   const handleScroll = (id) => {
18 |     const element = document.getElementById(id);
19 |     if (element) {
20 |       element.scrollIntoView({ behavior: 'smooth' });
21 |     }
22 |   };
23 | 
24 |   return (
25 |     <section className="hero">
26 |       <div className="container">
27 |         <div className="hero-content">
28 |           <div className="hero-text">
29 |             <h1>Hello, Researchers!<br />Meet m3</h1>
30 |             <p className="subtitle">m3 is a powerful Model Context Protocol for seamless interaction with the MIMIC database</p>
31 |             <p className="description">Free and open-source tool that enables researchers to query and analyze the world's largest publicly available healthcare database with ease</p>
32 |             <div className="hero-cta-group">
33 |               <button onClick={() => handleScroll('paper')} className="cta-button">
34 |                 <span>📄</span> Read Paper
35 |               </button>
36 |               <button onClick={copyToClipboard} className="cta-button-secondary">
37 |                 <span role="img" aria-label={isCopied ? 'check mark' : 'laptop'}>
38 |                   {isCopied ? '✅' : '💻'}
39 |                 </span>
40 |                 {isCopied ? 'Copied!' : 'pip install m3-mcp'}
41 |               </button>
42 |             </div>
43 |           </div>
44 |           <div className="hero-visual">
45 |             <div className="laptop-mockup">
46 |               <div className="laptop-frame">
47 |                 <div className="laptop-screen">
48 |                   <div className="screen-content">
49 |                     <div className="app-header">
50 |                       <div className="traffic-lights">
51 |                         <div className="traffic-light red"></div>
52 |                         <div className="traffic-light yellow"></div>
53 |                         <div className="traffic-light green"></div>
54 |                       </div>
55 |                       <div className="app-title">m3 MIMIC Dashboard</div>
56 |                     </div>
57 |                     <div className="dashboard-image-container">
58 |                       <img
59 |                         src="/m3/banner1.png"
60 |                         alt="m3 MIMIC Dashboard"
61 |                         className="dashboard-banner"
62 |                       />
63 |                     </div>
64 |                   </div>
65 |                 </div>
66 |               </div>
67 |             </div>
68 |           </div>
69 |         </div>
70 |       </div>
71 |     </section>
72 |   );
73 | };
74 | 
75 | export default Hero;
76 | 


--------------------------------------------------------------------------------
/webapp/src/components/Installation.js:
--------------------------------------------------------------------------------
  1 | import React, { useEffect, useRef } from 'react';
  2 | 
  3 | const Installation = () => {
  4 |   const videoContainerRef = useRef(null);
  5 | 
  6 |   useEffect(() => {
  7 |     const loadVideo = (container) => {
  8 |       if (!container) return;
  9 | 
 10 |       const videoUrl = 'm3_website_2.mp4';
 11 |       const videoPath = `/m3/videos/${videoUrl}`;
 12 |       const video = document.createElement('video');
 13 |       video.controls = true;
 14 |       video.autoplay = true;
 15 |       video.muted = true;
 16 |       video.loop = true;
 17 |       video.style.width = '100%';
 18 |       video.style.height = '100%';
 19 |       video.style.objectFit = 'cover';
 20 |       video.src = videoPath;
 21 | 
 22 |       video.onloadeddata = () => {
 23 |         container.innerHTML = '';
 24 |         container.appendChild(video);
 25 |         video.play().catch(error => console.error("Autoplay was prevented: ", error));
 26 |       };
 27 | 
 28 |       video.onerror = () => {
 29 |         container.innerHTML = `
 30 |           <div style="position: absolute; top: 0; left: 0; right: 0; bottom: 0; background: #000; display: flex; align-items: center; justify-content: center; color: white; font-size: 18px; flex-direction: column; gap: 16px;">
 31 |               <div style="font-size: 48px;">🎥</div>
 32 |               <div>Video not found: ${videoUrl}</div>
 33 |               <div style="font-size: 14px; opacity: 0.7;">Place videos in <code>public/videos</code></div>
 34 |           </div>
 35 |         `;
 36 |       };
 37 |     };
 38 | 
 39 |     loadVideo(videoContainerRef.current);
 40 |   }, []);
 41 | 
 42 |   return (
 43 |     <section className="installation-section" id="installation">
 44 |       {/* Video Banner */}
 45 |       <div className="container" style={{ padding: '60px 0' }}>
 46 |         <div className="section-header">
 47 |           <h2>Installation Guide</h2>
 48 |           <p>Follow the steps below to get m3 up and running on your system.</p>
 49 |         </div>
 50 |         <div className="demo-card" style={{ maxWidth: '800px', margin: '0 auto' }}>
 51 |           <div className="video-container" ref={videoContainerRef}>
 52 |             <div className="video-overlay">1:18 min</div>
 53 |             <div className="play-button"></div>
 54 |           </div>
 55 |           <div className="demo-content">
 56 |             <h3>Watch the Installation Walkthrough</h3>
 57 |             <p>This video provides a step-by-step guide to installing the m3 protocol and connecting to the MIMIC database for the first time.</p>
 58 |           </div>
 59 |         </div>
 60 |       </div>
 61 | 
 62 |       {/* Installation Steps */}
 63 |       <div className="container" style={{ padding: '60px 0' }}>
 64 |         <div className="paper-preview" style={{ marginBottom: '40px' }}>
 65 |           <h3>Prerequisites</h3>
 66 |           <p>Before you begin, ensure you have the following installed on your system:</p>
 67 |           <ul className="explanation-features" style={{ marginTop: '20px' }}>
 68 |             <li>Python 3.10 or higher</li>
 69 |             <li>pip (Python package installer)</li>
 70 |             <li>Access to the MIMIC-IV database.</li>
 71 |           </ul>
 72 |         </div>
 73 | 
 74 |         <div className="paper-preview" style={{ marginBottom: '40px' }}>
 75 |           <h3>Option A: Install from PyPI (Recommended)</h3>
 76 |           <h4>Step 1: Create Virtual Environment</h4>
 77 |           <div className="code-snippet">
 78 |             <span className="comment"># Create virtual environment (recommended)</span><br/>
 79 |             python -m venv .venv<br/>
 80 |             source .venv/bin/activate  <span className="comment"># Windows: .venv\Scripts\activate</span>
 81 |           </div>
 82 |           <h4>Step 2: Install M3</h4>
 83 |           <div className="code-snippet">
 84 |             <span className="comment"># Install M3</span><br/>
 85 |             pip install m3-mcp
 86 |           </div>
 87 |         </div>
 88 | 
 89 |         <div className="paper-preview" style={{ marginBottom: '40px' }}>
 90 |           <h3>Option B: Install from Source</h3>
 91 |           <h4>Step 1: Clone and Navigate</h4>
 92 |           <div className="code-snippet">
 93 |             <span className="comment"># Clone the repository</span><br/>
 94 |             git clone https://github.com/rafiattrach/m3.git<br/>
 95 |             cd m3
 96 |           </div>
 97 |           <h4>Step 2: Create Virtual Environment</h4>
 98 |           <div className="code-snippet">
 99 |             <span className="comment"># Create virtual environment</span><br/>
100 |             python -m venv .venv<br/>
101 |             source .venv/bin/activate  <span className="comment"># Windows: .venv\Scripts\activate</span>
102 |           </div>
103 |           <h4>Step 3: Install M3</h4>
104 |           <div className="code-snippet">
105 |             <span className="comment"># Install M3</span><br/>
106 |             pip install .
107 |           </div>
108 |         </div>
109 |       </div>
110 |     </section>
111 |   );
112 | };
113 | 
114 | export default Installation;
115 | 


--------------------------------------------------------------------------------
/webapp/src/components/Paper.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | 
 3 | const Paper = () => {
 4 |   return (
 5 |     <section className="paper-section" id="paper">
 6 |       <div className="container">
 7 |         <div className="section-header fade-in">
 8 |           <h2>Research Paper</h2>
 9 |           <p>Read our comprehensive study on the m3 Model Context Protocol and its applications in healthcare data analysis</p>
10 |         </div>
11 | 
12 |         <div className="paper-preview fade-in" onClick={() => window.open('https://arxiv.org/abs/2507.01053', '_blank')}>
13 |           <div className="paper-header">
14 |             <div className="arxiv-badge">arXiv</div>
15 |             <div className="paper-id">2507.01053</div>
16 |           </div>
17 |           <h3 className="paper-title">Conversational LLMs Simplify Secure Clinical Data Access, Understanding, and Analysis</h3>
18 |           <div className="paper-authors">Rafi Al Attrach, Pedro Moreira, Rajna Fani, Renato Umeton, Leo Anthony Celi</div>
19 |           <div className="paper-abstract">
20 |             As ever-larger clinical datasets become available, they have the potential to unlock unprecedented opportunities for medical research. Foremost among them is Medical Information Mart for Intensive Care (MIMIC-IV), the world's largest open-source EHR database. However, the inherent complexity of these datasets, particularly the need for sophisticated querying skills and the need to understand the underlying clinical settings, often presents a significant barrier to their effective use. M3 lowers the technical barrier to understanding and querying MIMIC-IV data. With a single command it retrieves MIMIC-IV from PhysioNet, launches a local SQLite instance (or hooks into the hosted BigQuery), and-via the Model Context Protocol (MCP)-lets researchers converse with the database in plain English. Ask a clinical question in natural language; M3 uses a language model to translate it into SQL, executes the query against the MIMIC-IV dataset, and returns structured results alongside the underlying query for verifiability and reproducibility. Demonstrations show that minutes of dialogue with M3 yield the kind of nuanced cohort analyses that once demanded hours of handcrafted SQL and relied on understanding the complexities of clinical workflows. By simplifying access, M3 invites the broader research community to mine clinical critical-care data and accelerates the translation of raw records into actionable insight.
21 |           </div>
22 |           <div className="paper-stats">
23 |             <div className="paper-stat">
24 |               <span>🏢</span>
25 |               <span>MIT, TUM, UPF, St. Jude, BIDMC</span>
26 |             </div>
27 |             <div className="paper-stat">
28 |               <span>🔗</span>
29 |               <span>github.com/rafiattrach/m3</span>
30 |             </div>
31 |             <div className="paper-stat">
32 |               <span>📦</span>
33 |               <span>pypi.org/project/m3-mcp</span>
34 |             </div>
35 |           </div>
36 |         </div>
37 |       </div>
38 |     </section>
39 |   );
40 | };
41 | 
42 | export default Paper;
43 | 


--------------------------------------------------------------------------------
/webapp/src/index.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |   margin: 0;
 3 |   font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
 4 |     'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
 5 |     sans-serif;
 6 |   -webkit-font-smoothing: antialiased;
 7 |   -moz-osx-font-smoothing: grayscale;
 8 | }
 9 | 
10 | code {
11 |   font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',
12 |     monospace;
13 | }
14 | 


--------------------------------------------------------------------------------
/webapp/src/index.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import ReactDOM from 'react-dom/client';
 3 | import './index.css';
 4 | import App from './App';
 5 | 
 6 | const root = ReactDOM.createRoot(document.getElementById('root'));
 7 | root.render(
 8 |   <React.StrictMode>
 9 |     <App />
10 |   </React.StrictMode>
11 | );
12 | 


--------------------------------------------------------------------------------