├── .python-version ├── source ├── video │ ├── usage_03.mp4 │ └── usage_03.gif ├── img │ ├── macOS_cli.png │ ├── usage_01.png │ ├── usage_02.png │ └── logo_with_name.jpg ├── reports │ ├── 2025 │ │ └── 0921 │ │ │ └── stata_mcp_a_research_report_on_ai_assisted_empirical_research.md │ ├── ai_coding_ability │ │ ├── ability.png │ │ └── main.ipynb │ └── README.md ├── docs │ ├── Usages │ │ ├── config.json │ │ ├── ClaudeCode │ │ │ ├── 00_intro.md │ │ │ └── 01_install.md │ │ ├── agent_as │ │ │ └── agent_as_tool.md │ │ ├── Questions.md │ │ ├── Evaluation.md │ │ └── Advanced.md │ ├── JupyterStata.md │ ├── Rights │ │ └── CLA.md │ ├── ChinaUsers │ │ └── uv.md │ ├── ProblemCatalog.md │ ├── Difference.md │ ├── Rules │ │ └── git_std_rule.md │ └── README │ │ ├── cn │ │ └── README.md │ │ └── sp │ │ └── README.md ├── example │ └── data │ │ └── README.md └── agent_examples │ ├── langchain │ └── main.py │ ├── task_prompt │ ├── agent_langchain.py │ └── agent_openai.py │ ├── README.md │ └── openai │ └── main.py ├── src └── stata_mcp │ ├── core │ ├── __init__.py │ ├── types │ │ ├── _error.py │ │ └── __init__.py │ ├── stata │ │ ├── stata_do │ │ │ ├── __init__.py │ │ │ └── do.py │ │ ├── builtin_tools │ │ │ ├── __init__.py │ │ │ ├── ado_install │ │ │ │ ├── __init__.py │ │ │ │ ├── net_install.py │ │ │ │ ├── ssc_install.py │ │ │ │ ├── github_install.py │ │ │ │ └── base.py │ │ │ └── stata_help.py │ │ ├── stata_finder │ │ │ ├── __init__.py │ │ │ ├── linux.py │ │ │ ├── macos.py │ │ │ ├── finder.py │ │ │ └── windows.py │ │ ├── stata_controller │ │ │ ├── __init__.py │ │ │ └── controller.py │ │ └── __init__.py │ ├── claude_proj │ │ ├── __init__.py │ │ ├── cwd_cfg.py │ │ ├── proj.py │ │ └── claude_cfg.py │ └── data_info │ │ ├── __init__.py │ │ ├── dta.py │ │ └── csv.py │ ├── utils │ ├── Installer │ │ ├── __init__.py │ │ └── installer.py │ ├── __init__.py │ └── Prompt │ │ ├── __init__.py │ │ └── string.py │ ├── cli │ ├── __init__.py │ └── _cli.py │ ├── evaluate │ ├── __init__.py │ ├── _model.py │ ├── advice.py │ └── score_it.py │ ├── __init__.py │ ├── sandbox │ ├── __init__.py │ ├── core │ │ ├── execute.py │ │ ├── __init__.py │ │ ├── result_processor.py │ │ └── sandbox_manager.py │ ├── README.md │ └── jupyter_manager │ │ ├── kernel_pool.py │ │ ├── __init__.py │ │ └── kernel_manager.py │ └── agent_as │ ├── agent_as_rag │ ├── __init__.py │ ├── _tools.py │ ├── handoff.py │ └── _base.py │ ├── __init__.py │ ├── agent_as_tool │ ├── __init__.py │ ├── any_as_tools.py │ └── stata_agent.py │ ├── agent_base.py │ ├── set_model.py │ └── repl_agents.py ├── main.py ├── .github ├── ISSUE_TEMPLATE │ ├── feature_request_cn.md │ ├── general_issue_cn.md │ ├── bug_report_cn.md │ ├── feature_request.md │ ├── general_issue.md │ └── bug_report.md ├── labels.yml ├── workflows │ ├── labels.yml │ ├── python-package.yml │ └── lint.yml └── dependabot.yml ├── CITATION.cff ├── .gitignore ├── pyproject.toml ├── CONTRIBUTING.md ├── SECURITY.md └── CLAUDE.md /.python-version: -------------------------------------------------------------------------------- 1 | 3.11 2 | -------------------------------------------------------------------------------- /source/video/usage_03.mp4: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/stata_mcp/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/stata_mcp/core/types/_error.py: -------------------------------------------------------------------------------- 1 | class StataMCPError(Exception): 2 | pass 3 | -------------------------------------------------------------------------------- /source/img/macOS_cli.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SepineTam/stata-mcp/HEAD/source/img/macOS_cli.png -------------------------------------------------------------------------------- /source/img/usage_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SepineTam/stata-mcp/HEAD/source/img/usage_01.png -------------------------------------------------------------------------------- /source/img/usage_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SepineTam/stata-mcp/HEAD/source/img/usage_02.png -------------------------------------------------------------------------------- /source/video/usage_03.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SepineTam/stata-mcp/HEAD/source/video/usage_03.gif -------------------------------------------------------------------------------- /source/img/logo_with_name.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SepineTam/stata-mcp/HEAD/source/img/logo_with_name.jpg -------------------------------------------------------------------------------- /src/stata_mcp/utils/Installer/__init__.py: -------------------------------------------------------------------------------- 1 | from .installer import Installer 2 | 3 | __all__ = ["Installer"] 4 | -------------------------------------------------------------------------------- /src/stata_mcp/cli/__init__.py: -------------------------------------------------------------------------------- 1 | from ._cli import main as main 2 | 3 | if __name__ == "__main__": 4 | main() 5 | -------------------------------------------------------------------------------- /src/stata_mcp/core/stata/stata_do/__init__.py: -------------------------------------------------------------------------------- 1 | from .do import StataDo 2 | 3 | __all__ = [ 4 | "StataDo" 5 | ] 6 | -------------------------------------------------------------------------------- /src/stata_mcp/core/types/__init__.py: -------------------------------------------------------------------------------- 1 | from _error import StataMCPError 2 | 3 | __all__ = [ 4 | "StataMCPError" 5 | ] 6 | -------------------------------------------------------------------------------- /src/stata_mcp/core/claude_proj/__init__.py: -------------------------------------------------------------------------------- 1 | from .proj import ClaudeProject 2 | 3 | __all__ = [ 4 | "ClaudeProject", 5 | ] 6 | -------------------------------------------------------------------------------- /src/stata_mcp/core/stata/builtin_tools/__init__.py: -------------------------------------------------------------------------------- 1 | from .stata_help import StataHelp 2 | 3 | __all__ = [ 4 | "StataHelp", 5 | ] 6 | -------------------------------------------------------------------------------- /src/stata_mcp/core/stata/stata_finder/__init__.py: -------------------------------------------------------------------------------- 1 | from .finder import StataFinder 2 | 3 | __all__ = [ 4 | "StataFinder", 5 | ] 6 | -------------------------------------------------------------------------------- /source/reports/ai_coding_ability/ability.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SepineTam/stata-mcp/HEAD/source/reports/ai_coding_ability/ability.png -------------------------------------------------------------------------------- /src/stata_mcp/core/stata/stata_controller/__init__.py: -------------------------------------------------------------------------------- 1 | from .controller import StataController 2 | 3 | __all__ = [ 4 | "StataController", 5 | ] 6 | -------------------------------------------------------------------------------- /src/stata_mcp/core/data_info/__init__.py: -------------------------------------------------------------------------------- 1 | from .csv import CsvDataInfo 2 | from .dta import DtaDataInfo 3 | 4 | __all__ = [ 5 | "CsvDataInfo", 6 | "DtaDataInfo", 7 | ] 8 | -------------------------------------------------------------------------------- /src/stata_mcp/evaluate/__init__.py: -------------------------------------------------------------------------------- 1 | from .agent_runner import AgentRunner 2 | from .score_it import ScoreModel 3 | 4 | __all__ = [ 5 | "ScoreModel", 6 | "AgentRunner" 7 | ] 8 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from stata_mcp.mcp_servers import stata_mcp as mcp 2 | 3 | 4 | def main(transport: str = "stdio"): 5 | mcp.run(transport=transport) 6 | 7 | 8 | if __name__ == "__main__": 9 | main() 10 | -------------------------------------------------------------------------------- /src/stata_mcp/__init__.py: -------------------------------------------------------------------------------- 1 | from importlib.metadata import version 2 | 3 | __version__ = version("stata-mcp") 4 | __author__ = "Song Tan " 5 | 6 | 7 | if __name__ == "__main__": 8 | print(f"Hello Stata-MCP@version{__version__}") 9 | -------------------------------------------------------------------------------- /source/docs/Usages/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "mcpServers": { 3 | "stata-mcp": { 4 | "command": "uvx", 5 | "args": [ 6 | "stata-mcp" 7 | ], 8 | "env": { 9 | "stata_cli": "stata-mp" 10 | } 11 | } 12 | } 13 | } -------------------------------------------------------------------------------- /src/stata_mcp/core/stata/__init__.py: -------------------------------------------------------------------------------- 1 | from .stata_controller import StataController 2 | from .stata_do import StataDo 3 | from .stata_finder import StataFinder 4 | 5 | __all__ = [ 6 | "StataFinder", 7 | "StataController", 8 | "StataDo" 9 | ] 10 | -------------------------------------------------------------------------------- /src/stata_mcp/sandbox/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : __init__.py 9 | -------------------------------------------------------------------------------- /src/stata_mcp/sandbox/core/execute.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : execute.py 9 | -------------------------------------------------------------------------------- /src/stata_mcp/agent_as/agent_as_rag/__init__.py: -------------------------------------------------------------------------------- 1 | from ._base import KnowledgeBase 2 | from ._tools import FetchFromDocs 3 | from .handoff import HandoffAgent 4 | 5 | __all__ = [ 6 | "FetchFromDocs", 7 | "KnowledgeBase", 8 | "HandoffAgent", 9 | ] 10 | -------------------------------------------------------------------------------- /src/stata_mcp/sandbox/core/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : __init__.py 9 | -------------------------------------------------------------------------------- /src/stata_mcp/sandbox/README.md: -------------------------------------------------------------------------------- 1 | # Sand Box / 沙盒 2 | 3 | 相关功能将于 v2.1.0系列上新,届时将通过沙盒的形式运行以Session为单位的Stata运行环境。 4 | 5 | Related features will be introduced in the v2.1.0 series, which will run Stata runtime environments on a per-session basis through sandboxing. 6 | 7 | 8 | -------------------------------------------------------------------------------- /src/stata_mcp/core/stata/builtin_tools/ado_install/__init__.py: -------------------------------------------------------------------------------- 1 | from .github_install import GITHUB_Install 2 | from .net_install import NET_Install 3 | from .ssc_install import SSC_Install 4 | 5 | __all__ = [ 6 | "GITHUB_Install", 7 | "NET_Install", 8 | "SSC_Install" 9 | ] 10 | -------------------------------------------------------------------------------- /src/stata_mcp/agent_as/__init__.py: -------------------------------------------------------------------------------- 1 | from .agent_as_rag import HandoffAgent, KnowledgeBase 2 | from .agent_as_tool import StataAgent 3 | from .repl_agents import REPLAgent 4 | from .set_model import set_model 5 | 6 | __all__ = [ 7 | "set_model", 8 | "REPLAgent", 9 | "StataAgent", 10 | "KnowledgeBase", 11 | "HandoffAgent", 12 | ] 13 | -------------------------------------------------------------------------------- /src/stata_mcp/sandbox/jupyter_manager/kernel_pool.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : kernel_pool.py 9 | 10 | class KernelPool: 11 | def __init__(self): 12 | pass 13 | -------------------------------------------------------------------------------- /src/stata_mcp/sandbox/core/result_processor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : result_processor.py 9 | 10 | class ResultProcessor: 11 | def __init__(self): 12 | pass 13 | -------------------------------------------------------------------------------- /source/reports/README.md: -------------------------------------------------------------------------------- 1 | # Stata-MCP Research Reports Index 2 | 3 | ## 2025 4 | 5 | - 20250921: [Stata-MCP: A Research Report on AI-Assisted Empirical Research](2025/0921/stata_mcp_a_research_report_on_ai_assisted_empirical_research.md) 6 | 7 | ## Others 8 | 9 | - AI coding ability: [code](ai_coding_ability/main.ipynb) and [figure](ai_coding_ability/ability.png) 10 | -------------------------------------------------------------------------------- /src/stata_mcp/agent_as/agent_as_tool/__init__.py: -------------------------------------------------------------------------------- 1 | from .adversarial_thinking_agent import AdversarialThinkingAgent 2 | from .any_as_tools import agent_list_to_tools, dict_to_agent_tools 3 | from .stata_agent import StataAgent 4 | 5 | __all__ = [ 6 | "AdversarialThinkingAgent", 7 | "StataAgent", 8 | "agent_list_to_tools", 9 | "dict_to_agent_tools" 10 | ] 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request_cn.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: ✨ 功能请求 3 | about: 建议新功能或增强 4 | title: '[功能] ' 5 | labels: 'enhancement' 6 | assignees: '' 7 | --- 8 | 9 | ## 功能描述 10 | 11 | 12 | ## 解决的问题 13 | 14 | 15 | ## 建议的解决方案 16 | 17 | 18 | ## 已考虑的替代方案 19 | 20 | 21 | ## 补充信息 22 | -------------------------------------------------------------------------------- /src/stata_mcp/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import platform 2 | from datetime import datetime 3 | 4 | 5 | def set_config(key, value): 6 | with open(".env", "w+", encoding="utf-8") as f: 7 | f.write(f"{key}={value}") 8 | return {key: value} 9 | 10 | 11 | def get_nowtime(): 12 | return datetime.strftime(datetime.now(), "%Y%m%d%H%M%S") 13 | 14 | 15 | def get_os(): 16 | return platform.system() 17 | -------------------------------------------------------------------------------- /src/stata_mcp/sandbox/jupyter_manager/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : __init__.py 9 | 10 | from kernel_manager import KernelManager 11 | from kernel_pool import KernelPool 12 | 13 | __all__ = [ 14 | "KernelPool", 15 | "KernelManager" 16 | ] 17 | -------------------------------------------------------------------------------- /source/docs/JupyterStata.md: -------------------------------------------------------------------------------- 1 | # Jupyter Stata 2 | 3 | If you want to use jupyter for coding Stata, you can use a extension named [`nbstata`](https://github.com/hugetim/nbstata). 4 | 5 | First, you need to install `nbstata` in VScode or whatever IDE you want to use. 6 | 7 | Then, config it and use it. 8 | 9 | More information you can find in the listed links: 10 | - [docs by the author](https://hugetim.github.io/nbstata) 11 | - [lianxh blog (连享会)](https://www.lianxh.cn/details/1309.html) 12 | -------------------------------------------------------------------------------- /src/stata_mcp/sandbox/core/sandbox_manager.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : sandbox_manager.py 9 | 10 | from ..jupyter_manager import KernelManager, KernelPool 11 | 12 | 13 | class SandboxManager: 14 | def __init__(self): 15 | self.kernel_pool = KernelPool() 16 | self.kernel = KernelManager('stata') 17 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "Tan" 5 | given-names: "Song" 6 | orcid: "https://orcid.org/0009-0006-7449-1094" 7 | email: "sepinetam@gmail.com" 8 | title: "Stata-MCP: Let LLM help you achieve your regression analysis with Stata" 9 | version: 1.13.11 10 | date-released: 2025-12-17 11 | url: "https://www.statamcp.com" 12 | repository-code: "https://github.com/sepinetam/stata-mcp" 13 | license: Apache-2.0 -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/general_issue_cn.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 📝 一般问题 3 | about: 报告一般性问题或疑问 4 | title: '[问题] ' 5 | labels: 'question' 6 | assignees: '' 7 | --- 8 | 9 | ## AI 模型 10 | 11 | - [ ] ChatGPT 12 | - [ ] Claude 13 | - [ ] GPT-4 14 | - [ ] 其他模型 (请说明): 15 | 16 | ## 问题描述 17 | 18 | 19 | ## 背景信息 20 | 21 | 22 | ## 已尝试的解决方法 23 | 24 | 25 | ## 环境信息 26 | 27 | - 操作系统: [例如 macOS 14.3] 28 | - Stata 版本: [例如 Stata 17 SE] 29 | - 软件包版本: [例如 1.0.3] 30 | 31 | ## 补充信息 32 | -------------------------------------------------------------------------------- /.github/labels.yml: -------------------------------------------------------------------------------- 1 | # 新功能需求标签 2 | - name: "enhancement" 3 | color: "84b6eb" # 浅蓝色 4 | description: "New feature request or enhancement suggestion" 5 | 6 | # Bug 标签 7 | - name: "bug" 8 | color: "d73a4a" # 红色 9 | description: "Something isn't working" 10 | 11 | # Bug 标签 12 | - name: "question" 13 | color: "d73a4a" # 红色 14 | description: "Ask a question" 15 | 16 | # 可选:优先级标签 17 | - name: "priority: high" 18 | color: "ff0000" 19 | description: "High priority issue that needs immediate attention" 20 | 21 | - name: "priority: low" 22 | color: "fbca04" # 黄色 23 | description: "Low priority issue that can be addressed later" -------------------------------------------------------------------------------- /.github/workflows/labels.yml: -------------------------------------------------------------------------------- 1 | name: Sync labels 2 | 3 | on: 4 | push: 5 | paths: 6 | - '.github/labels.yml' 7 | branches: 8 | - main # 或者 master,取决于你的默认分支 9 | workflow_dispatch: # 允许手动触发 10 | 11 | jobs: 12 | labels: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v3 16 | 17 | - name: Sync Labels 18 | uses: EndBug/label-sync@v2 19 | with: 20 | # 配置文件路径 21 | config-file: .github/labels.yml 22 | # GitHub token 23 | token: ${{ secrets.GITHUB_TOKEN }} 24 | # 删除配置文件中未定义的标签 25 | delete-other-labels: false 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report_cn.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🐛 Bug 报告 3 | about: 报告问题或非预期行为 4 | title: '[Bug] ' 5 | labels: 'bug' 6 | assignees: '' 7 | --- 8 | 9 | ## AI 模型 10 | 11 | - [ ] ChatGPT 12 | - [ ] Claude 13 | - [ ] GPT-4 14 | - [ ] 其他模型 (请说明): 15 | 16 | ## Bug 描述 17 | 18 | 19 | ## 复现步骤 20 | 21 | 1. 22 | 2. 23 | 3. 24 | 25 | ## 预期行为 26 | 27 | 28 | ## 实际行为 29 | 30 | 31 | ## 环境信息 32 | 33 | - 操作系统: [例如 macOS 14.3] 34 | - Stata 版本: [例如 Stata 17 SE] 35 | - 软件包版本: [例如 1.0.3] 36 | 37 | ## 补充信息 38 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | publish: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - name: Checkout code 13 | uses: actions/checkout@v4 14 | 15 | - name: Set up Python 16 | uses: actions/setup-python@v4 17 | with: 18 | python-version: '3.11' 19 | 20 | - name: Install uv 21 | uses: astral-sh/setup-uv@v3 22 | 23 | - name: Install dependencies 24 | run: uv sync 25 | 26 | - name: Build package 27 | run: uv build 28 | 29 | - name: Publish to PyPI 30 | run: uv publish --token ${{ secrets.PYPI_TOKEN }} 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python-generated files 2 | __pycache__/ 3 | *.py[oc] 4 | build/ 5 | dist/ 6 | dev/ 7 | wheels/ 8 | *.egg-info 9 | 10 | # Virtual environments 11 | .venv 12 | 13 | # Environment variables 14 | .env 15 | 16 | # IDEs 17 | .idea/ 18 | .vscode/ 19 | 20 | # OS X 21 | *.DS_Store 22 | 23 | # Config 24 | config.py 25 | 26 | # test 27 | test/ 28 | tests/ 29 | *.test.* 30 | 31 | # Logs 32 | logs/ 33 | log/ 34 | 35 | # private 36 | src_private/ 37 | private/ 38 | privates/ 39 | source/paper 40 | 41 | # local file 42 | *.local.* 43 | local/ 44 | 45 | # temp files 46 | tmp/ 47 | *.tmp 48 | *.tmp.* 49 | 50 | # releases 51 | releases/ 52 | 53 | # backup 54 | backup/ 55 | backup.* 56 | *.backup 57 | *.backup.* 58 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: ✨ Feature Request 3 | about: Suggest a new feature or enhancement 4 | title: '[FEATURE] ' 5 | labels: 'enhancement' 6 | assignees: '' 7 | --- 8 | 9 | ## Feature Description 10 | 11 | 12 | ## Problem It Solves 13 | 14 | 15 | ## Proposed Solution 16 | 17 | 18 | ## Alternatives Considered 19 | 20 | 21 | ## Additional Context 22 | -------------------------------------------------------------------------------- /src/stata_mcp/evaluate/_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : _model.py 9 | 10 | import os 11 | 12 | from openai import OpenAI 13 | 14 | DEFAULT_CLIENT = OpenAI( 15 | api_key=os.getenv("OPENAI_API_KEY"), 16 | base_url=os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"), 17 | ) 18 | 19 | DEFAULT_MODEL = os.getenv("OPENAI_MODEL", "gpt-5-mini-2025-08-07") 20 | CHAT_MODEL = os.getenv("CHAT_MODEL", "gpt-5-mini-2025-08-07") 21 | THINKING_MODEL = os.getenv("THINKING_MODEL") 22 | 23 | print(DEFAULT_MODEL, CHAT_MODEL, THINKING_MODEL) 24 | -------------------------------------------------------------------------------- /source/docs/Rights/CLA.md: -------------------------------------------------------------------------------- 1 | # Contributor License Agreement (CLA) 2 | 3 | This CLA content is based on the [Google's CLA](https://cla.developers.google.com/about), and is modified to fit the project. If you are trying to join, that reveals that you have ever signed the CLA or agree with the CLA. 4 | 5 | # More CLA information 6 | By submitting a pull request to this project, you certify that: 7 | 8 | 1. The contribution is your original work, or you have the right to submit it. 9 | 2. You grant the project maintainers an irrevocable license to use, modify, and distribute your contribution. 10 | 3. You agree that your contribution is provided under the project's license. 11 | 12 | If you do not agree, do not submit contributions. 13 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" 12 | versioning-strategy: increase-if-necessary 13 | open-pull-requests-limit: 10 14 | commit-message: 15 | prefix: "chore(deps)" 16 | include: "scope" 17 | -------------------------------------------------------------------------------- /source/example/data/README.md: -------------------------------------------------------------------------------- 1 | If you want to find the example data, you can visit the follow URL to fetch data. 2 | 3 | ## Example Data URL LIST 4 | 5 | | ID | File Name | URL | 6 | |----|--------------------------|---------------------------------------------------------------| 7 | | 01 | OLS.dta | https://example-data.statamcp.com/01_OLS.dta | 8 | | 02 | Tax_Reform_std_DID.dta | https://example-data.statamcp.com/02_Tax_Reform_std_DID.dta | 9 | | 03 | Tax_SDID.dta | https://example-data.statamcp.com/03_Tax_SDID.dta | 10 | | 04 | Education_Wage_RDD.dta | https://example-data.statamcp.com/04_Education_Wage_RDD.dta | 11 | | 05 | Education_Returns_IV.dta | https://example-data.statamcp.com/05_Education_Returns_IV.dta | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "stata-mcp" 3 | version = "1.13.11" 4 | description = "Let LLM help you achieve your regression analysis with Stata" 5 | readme = "README.md" 6 | authors = [ 7 | { name = "Song Tan", email = "sepine@statamcp.com" } 8 | ] 9 | requires-python = ">=3.11" 10 | license = {text = "Apache-2.0"} 11 | dependencies = [ 12 | "python-dotenv>=1.1.1", 13 | "mcp[cli]>=1.16.0", 14 | "pandas>=2.3.0", 15 | "pexpect>=4.9.0", 16 | "openpyxl>=3.1.5", 17 | "openai-agents>=0.3.2", 18 | "openai>=1.109.1", 19 | "pathvalidate>=3.3.1", 20 | "openai-api-polling>=0.1.1", 21 | ] 22 | 23 | [project.scripts] 24 | stata-mcp = "stata_mcp.cli:main" 25 | 26 | [build-system] 27 | requires = ["hatchling"] 28 | build-backend = "hatchling.build" 29 | 30 | [project.urls] 31 | Homepage = "https://www.statamcp.com" 32 | Source = "https://github.com/sepinetam/stata-mcp" 33 | -------------------------------------------------------------------------------- /source/docs/Usages/ClaudeCode/00_intro.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | > As the actual experience, we find that MCP Server works for Agent is better than a single chat model. We try to each of Agent we can find out even developed agents with different sdk, therefore we find out the best way to use Stata-MCP in an Agent at present. 4 | 5 | In this section, we will introduce the basic usage of Claude Code with Stata-MCP. All the example is using macOS with Apple Silicon, if you are a windows user, you can copy this page to ChatGPT and ask for help. 6 | 7 | ## Quickly Start 8 | ### Install Stata-MCP 9 | Install Stata-MCP in Claude Code is easy: 10 | ```bash 11 | claude mcp add stata-mcp uvx stata-mcp 12 | ``` 13 | 14 | ### Let Claude use Stata-MCP 15 | After boot Claude Code, you can talk to it like this: 16 | ```markdown 17 | > Load the default data of stata, and analyze the data. Export the summarization of the data into a '.docx' file. 18 | ``` 19 | 20 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/general_issue.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 📝 General Issue 3 | about: Report a general issue or question 4 | title: '[ISSUE] ' 5 | labels: 'question' 6 | assignees: '' 7 | --- 8 | 9 | ## AI Model 10 | 11 | - [ ] ChatGPT 12 | - [ ] Claude 13 | - [ ] GPT-4 14 | - [ ] Other (please specify): 15 | 16 | ## Issue Description 17 | 18 | 19 | ## Context 20 | 21 | 22 | ## What I've Tried 23 | 24 | 25 | ## Environment 26 | 27 | - OS: [e.g. macOS 14.3] 28 | - Stata Version: [e.g. Stata 17 SE] 29 | - Package Version: [e.g. 1.0.3] 30 | 31 | ## Additional Information 32 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🐛 Bug Report 3 | about: Report an issue or unexpected behavior 4 | title: '[BUG] ' 5 | labels: 'bug' 6 | assignees: '' 7 | --- 8 | 9 | ## AI Model 10 | 11 | - [ ] ChatGPT 12 | - [ ] Claude 13 | - [ ] GPT-4 14 | - [ ] Other (please specify): 15 | 16 | ## Bug Description 17 | 18 | 19 | ## Steps to Reproduce 20 | 21 | 1. 22 | 2. 23 | 3. 24 | 25 | ## Expected Behavior 26 | 27 | 28 | ## Actual Behavior 29 | 30 | 31 | ## Environment 32 | 33 | - OS: [e.g. macOS 14.3] 34 | - Stata Version: [e.g. Stata 17 SE] 35 | - Package Version: [e.g. 1.0.3] 36 | 37 | ## Additional Context 38 | -------------------------------------------------------------------------------- /src/stata_mcp/core/claude_proj/cwd_cfg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : cwd_cfg.py 9 | 10 | from pathlib import Path 11 | 12 | 13 | # TODO: Add the save guard for relative path like "../../" 14 | def get_exp_cwd(path: str | Path = ".") -> Path: 15 | """ 16 | Config the current working directory 17 | """ 18 | path_obj = Path(path).expanduser() # solve the problem like Path("~/Documents").is_absolute() 19 | if path_obj.is_absolute(): 20 | try: 21 | path_obj.mkdir(parents=True, exist_ok=True) 22 | return path_obj 23 | except OSError as e: 24 | raise e 25 | else: 26 | TERMINAL_CURRENT_PATH = Path.cwd() 27 | combined_path = (TERMINAL_CURRENT_PATH / path_obj).resolve() 28 | combined_path.mkdir(parents=True, exist_ok=True) 29 | return combined_path 30 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | # .github/workflows/lint.yml 2 | name: Code Lint & Format 3 | 4 | on: 5 | push: 6 | paths: 7 | - "src/**" 8 | branches: 9 | - "**" 10 | pull_request: 11 | branches: 12 | - master 13 | 14 | jobs: 15 | lint: 16 | runs-on: ubuntu-latest 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | python: [3.11] 21 | 22 | steps: 23 | - name: Checkout code 24 | uses: actions/checkout@v2 25 | 26 | - name: Set up Python ${{ matrix.python }} 27 | uses: actions/setup-python@v4 28 | with: 29 | python-version: ${{ matrix.python }} 30 | 31 | - name: Install lint tools 32 | run: | 33 | python -m pip install --upgrade pip 34 | pip install black flake8 35 | 36 | - name: Auto-format with Black 37 | run: black src 38 | 39 | - name: Black format check 40 | run: black --check src 41 | 42 | - name: Flake8 static analysis 43 | run: flake8 src --extend-ignore=E501,W291 44 | -------------------------------------------------------------------------------- /source/docs/ChinaUsers/uv.md: -------------------------------------------------------------------------------- 1 | # uv 2 | uv is one of the most popular tools for managing your python projects, you can find the official documents on [GitHub](https://github.com/astral-sh/uv). 3 | 4 | Given that there is a lot of user is Chinese, it is difficult to reach the pypi (limitation of the web speed), here is a short introduction for change the source to pypi.org. 5 | 6 | The following is sourcing from [Tsinghua University Mirror](https://mirror.tuna.tsinghua.edu.cn/help/pypi/). 7 | 8 | Edit your `~/.config/uv/uv.toml` or `/etc/uv/uv.toml` file with the following content: 9 | ```toml 10 | [[index]] 11 | url = "https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple/" 12 | default = true 13 | ``` 14 | 15 | If you want to edit the file, you maybe need use `sudo`, here is a simple way to edit the file without `sudo` (on macOS and Linux with `nano` command, Windows user should find your own way to edit the file): 16 | ```bash 17 | sudo nano /etc/uv/uv.toml 18 | # 19 | # AFTER edit, type control+x to exit 20 | cat /etc/uv/uv.toml # to find out whether it is saved. 21 | ``` 22 | 23 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # CONTRIBUTING 2 | Thank you for your interest in contributing to this project! We welcome contributions from everyone. Please follow the guidelines below to ensure a smooth process. 3 | 4 | ## Git 5 | If you are not familiar with git, please read the [git_std_rule](source/docs/Rules/git_std_rule.md) file. 6 | Alternative, you can ask Claude Code to help your commit, just like: `help me commit my change to git`, 7 | although it is powerful, please concentrate on its message and what it do, it may be a mistake from it. 8 | 9 | Or, you can add the file manually and ask it to generate the commit message for you automatically, like: 10 | ```bash 11 | # In Claude Code, the sign ! means this is a terminal command 12 | !git add 13 | > help me commit the message to github 14 | 15 | # After review the commit, you can push the commit 16 | git push 17 | ``` 18 | 19 | ## Legal Notice 20 | 21 | By contributing to this project, you agree to the terms of the [Contributor License Agreement](docs/Rights/CLA.md). If you do not have the rights to submit the code, please do not contribute. 22 | -------------------------------------------------------------------------------- /src/stata_mcp/core/stata/builtin_tools/ado_install/net_install.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : net_install.py 9 | 10 | from .base import AdoInstallBase 11 | 12 | 13 | class NET_Install(AdoInstallBase): 14 | def install(self, package: str, directory_or_url: str = None) -> str: 15 | ex_from = ", " if directory_or_url and not self.REPLACE_MESSAGE else "" 16 | from_message = f"{ex_from} from({directory_or_url})" if directory_or_url else "" 17 | 18 | install_command = f"net install {package}{self.REPLACE_MESSAGE}{from_message}" 19 | runner_result = self.controller.run(install_command) 20 | return self._install_msg_template(runner_result) 21 | 22 | @staticmethod 23 | def check_install(message: str) -> bool: 24 | wrong_signature_messages = [ 25 | # for sure error message 26 | "not found", 27 | "could not load" 28 | ] 29 | 30 | return any(signature_msg not in str(message) for signature_msg in wrong_signature_messages) 31 | -------------------------------------------------------------------------------- /src/stata_mcp/core/stata/builtin_tools/ado_install/ssc_install.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : ssc_install.py 9 | 10 | from .base import AdoInstallBase 11 | 12 | 13 | class SSC_Install(AdoInstallBase): 14 | def install(self, package: str) -> str: 15 | install_command = f"ssc install {package}{self.REPLACE_MESSAGE}" 16 | runner_result = self.controller.run(install_command) 17 | return self._install_msg_template(runner_result) 18 | 19 | @staticmethod 20 | def check_install(message: str) -> bool: 21 | signature_messages = [ 22 | # for the package is not install before or not found in current. 23 | "installing into ", 24 | "installation complete.", 25 | 26 | # for replace arg, the package is already exist and up to date 27 | "all files already exist and are up to date.", 28 | ] 29 | 30 | # Return True if any content from signature messages is found in the message, otherwise False 31 | return any(signature_msg in str(message) for signature_msg in signature_messages) 32 | -------------------------------------------------------------------------------- /source/docs/ProblemCatalog.md: -------------------------------------------------------------------------------- 1 | # Problem Catalog 2 | - [🇨🇳 中文](#已知问题) 3 | - [🇬🇧 English](#known-problems) 4 | 5 | --- 6 | 7 | # 已知问题 8 | 针对该项目,目前已发现以下问题,欢迎通过 issue 或 PR 进行补充: 9 | 10 | ## 懒惰 11 | 在缺乏合适提示词的情况下(即便偶尔给出较好的提示词也会如此),模型在使用 Stata-MCP 时常表现出“懒惰”行为: 12 | - 执行任务时不主动读取 log 文件; 13 | - 集成读取 log 的操作后,只编写 do-file 而不执行; 14 | - 即使流程全部整合,仍可能只返回文本代码而不真正生成 do-file。 15 | 16 | ## 蝴蝶效应 17 | 在执行 do-file 时若出现小错误,模型往往难以自行解决,从而不断尝试(通常是错误方法),形成死循环。这通常源于其对 Stata 语法的不熟悉。 18 | 19 | --- 20 | 21 | # Known Problems 22 | The following issues have been observed in this project. Feel free to open an issue or submit a PR if you discover more. 23 | 24 | ## Laziness 25 | Without well-crafted prompts (and sometimes even with them), the model tends to act lazily when using Stata-MCP: 26 | - It skips reading log files during execution; 27 | - After log reading is added to the do-file, it may only write the do-file without running it; 28 | - Even when all steps are combined, the model might output the code as plain text instead of actually writing a do-file. 29 | 30 | ## Butterfly Effect 31 | When a minor error occurs while running a do-file, the model often fails to resolve it and keeps trying incorrect fixes, leading to an infinite loop. This behavior usually stems from limited familiarity with Stata syntax. 32 | -------------------------------------------------------------------------------- /src/stata_mcp/core/stata/builtin_tools/stata_help.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : stata_help.py 9 | 10 | from ..stata_controller import StataController 11 | 12 | 13 | class StataHelp: 14 | def __init__(self, stata_cli: str): 15 | self.controller = StataController(stata_cli) 16 | 17 | def help(self, cmd: str) -> str: 18 | std_error_msg = ( 19 | f"help {cmd}\r\n" 20 | f"help for {cmd} not found\r\n" 21 | f"try help contents or search {cmd}" 22 | ) 23 | help_result = self.controller.run(f"help {cmd}") 24 | 25 | if help_result != std_error_msg: 26 | return help_result 27 | else: 28 | raise Exception("No help found for the command in Stata ado locally: " + cmd) 29 | 30 | def check_command_exist_with_help(self, cmd: str) -> bool: 31 | std_error_msg = ( 32 | f"help {cmd}\r\n" 33 | f"help for {cmd} not found\r\n" 34 | f"try help contents or search {cmd}" 35 | ) 36 | help_result = self.controller.run(f"help {cmd}") 37 | if help_result != std_error_msg: 38 | return True 39 | else: 40 | return False 41 | -------------------------------------------------------------------------------- /src/stata_mcp/sandbox/jupyter_manager/kernel_manager.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : kernel_manager.py 9 | 10 | import logging 11 | import sys 12 | from typing import Dict 13 | 14 | from jupyter_client.kernelspec import KernelSpecManager 15 | 16 | 17 | class KernelManager: 18 | def __init__(self, 19 | kernel_name: str = "stata"): 20 | self.kernel_name = kernel_name 21 | self.kernel_path = self.find_kernel() 22 | 23 | def find_kernel(self, 24 | kernel_name: str = None) -> str: 25 | if not kernel_name: 26 | kernel_name = self.kernel_name 27 | 28 | kernels_dict: Dict[str, str] = KernelSpecManager().find_kernel_specs() 29 | 30 | if kernel_name in set(kernels_dict.keys()): 31 | return kernels_dict.get(kernel_name) 32 | else: 33 | logging.warning( 34 | "Kernel not found, please install it.\n" 35 | "You can run `pip install stata_kernel` to deal it") 36 | sys.exit(1) 37 | 38 | 39 | if __name__ == '__main__': 40 | # list jupyter kernel 41 | ksm = KernelSpecManager() 42 | print(ksm.find_kernel_specs()) 43 | 44 | print(KernelManager().find_kernel()) 45 | -------------------------------------------------------------------------------- /src/stata_mcp/core/stata/stata_finder/linux.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : linux.py 9 | 10 | from pathlib import Path 11 | from typing import Dict, List 12 | 13 | from .base import FinderBase 14 | 15 | 16 | class FinderLinux(FinderBase): 17 | def finder(self) -> str: 18 | bin_results = self.find_from_bin() 19 | if bin_results: 20 | return max(bin_results).stata_cli_path 21 | else: 22 | raise FileNotFoundError("Stata CLI not found") 23 | 24 | def find_path_base(self) -> Dict[str, List[str]]: 25 | # Start with default bin directory 26 | bin_dirs = ["/usr/local/bin"] 27 | 28 | # Search for additional directories containing "stata" in /usr/local/bin 29 | usr_local_bin = Path("/usr/local/bin") 30 | if usr_local_bin.exists() and usr_local_bin.is_dir(): 31 | # Look for directories containing "stata" in their name 32 | for item in usr_local_bin.iterdir(): 33 | if item.is_dir() and "stata" in item.name.lower(): 34 | # Add the stata directory path to search directories 35 | bin_dirs.append(str(item)) 36 | 37 | return { 38 | "bin": bin_dirs, 39 | } 40 | 41 | 42 | if __name__ == "__main__": 43 | finder = FinderLinux() 44 | -------------------------------------------------------------------------------- /src/stata_mcp/agent_as/agent_base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : agent_base.py 9 | 10 | import os 11 | from abc import ABC 12 | 13 | from agents import Agent, Model, set_tracing_disabled 14 | 15 | 16 | class AgentBase(ABC): 17 | OPENAI_API_KEY: str = os.getenv("OPENAI_API_KEY", None) 18 | NAME: str 19 | agent_instructions: str 20 | 21 | def __init__(self, 22 | name: str = None, 23 | instructions: str = None, 24 | model: Model = None, 25 | mcp_servers: list = None, 26 | tools: list = None, 27 | max_turns: int = 30, # If the task is not easy, set larger number 28 | DISABLE_TRACING: bool = False, 29 | *args, 30 | **kwargs): 31 | # Disable tracing while not found openai_api_key and set tracing disable. 32 | set_tracing_disabled( 33 | (not kwargs.get("OPENAI_API_KEY", self.OPENAI_API_KEY)) or DISABLE_TRACING 34 | ) 35 | 36 | self.agent = Agent( 37 | name=name or self.NAME, 38 | instructions=instructions or self.agent_instructions, 39 | ) 40 | 41 | self.max_turns = max_turns 42 | 43 | if model: 44 | self.agent.model = model 45 | 46 | if mcp_servers: 47 | self.agent.mcp_servers = mcp_servers 48 | 49 | if tools: # if exist tools, register tools 50 | self.agent.tools = tools 51 | -------------------------------------------------------------------------------- /src/stata_mcp/core/claude_proj/proj.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : proj.py 9 | 10 | from pathlib import Path 11 | 12 | from .cwd_cfg import get_exp_cwd 13 | 14 | 15 | class ClaudeProject: 16 | def __init__(self, cwd: str = "."): 17 | self.cwd = Path(get_exp_cwd(cwd)) 18 | self.source_dir = self.cwd / "source" 19 | 20 | def init_project(self): ... 21 | 22 | def write_claude_md(self): ... 23 | 24 | def mk_data_dir(self): 25 | """ 26 | 创建一系列目录 27 | 28 | """ 29 | data_dir = self.source_dir / "data" 30 | raw_data_dir = data_dir / "raw" 31 | middle_data_dir = data_dir / "middle" 32 | final_data_dir = data_dir / "final" 33 | 34 | self.mk_dir_exist(raw_data_dir) 35 | self.mk_dir_exist(middle_data_dir) 36 | self.mk_dir_exist(final_data_dir) 37 | 38 | @staticmethod 39 | def mk_dir_exist(path: str | Path) -> bool: 40 | """ 41 | Create directory if it doesn't exist and return whether it exists. 42 | 43 | Args: 44 | path: Directory path to ensure exists 45 | 46 | Returns: 47 | bool: True if directory exists (or was successfully created), False if failed 48 | """ 49 | path_obj = Path(path) 50 | if not path_obj.exists(): 51 | try: 52 | path_obj.mkdir(parents=True, exist_ok=True) 53 | except OSError: 54 | return False 55 | return path_obj.is_dir() 56 | -------------------------------------------------------------------------------- /src/stata_mcp/agent_as/set_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : set_model.py 9 | 10 | import os 11 | 12 | from agents import Model, OpenAIChatCompletionsModel 13 | from openai import AsyncOpenAI, OpenAI 14 | 15 | 16 | class Provider: 17 | def __init__(self, provider: str = "openai"): 18 | self.provider_name = provider.lower() 19 | 20 | @property 21 | def API_KEY(self): 22 | return os.getenv(f"{self.provider_name.upper()}_API_KEY") 23 | 24 | @property 25 | def BASE_URL(self): 26 | return os.getenv(f"{self.provider_name.upper()}_BASE_URL") 27 | 28 | @property 29 | def client(self): 30 | return OpenAI(api_key=self.API_KEY, base_url=self.BASE_URL) 31 | 32 | @property 33 | def async_client(self): 34 | return AsyncOpenAI(api_key=self.API_KEY, base_url=self.BASE_URL) 35 | 36 | 37 | def set_model(model_name: str = None, 38 | api_key: str = None, 39 | base_url: str = "https://api.openai.com/v1", 40 | openai_client: AsyncOpenAI = None) -> Model: 41 | OPENAI_MODEL = os.getenv("OPENAI_MODEL", model_name) 42 | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", api_key) 43 | OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", base_url) 44 | 45 | OPENAI_CLIENT = openai_client or AsyncOpenAI( 46 | api_key=OPENAI_API_KEY, 47 | base_url=OPENAI_BASE_URL 48 | ) 49 | return OpenAIChatCompletionsModel( 50 | model=OPENAI_MODEL, 51 | openai_client=OPENAI_CLIENT 52 | ) 53 | -------------------------------------------------------------------------------- /src/stata_mcp/core/stata/builtin_tools/ado_install/github_install.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : github_install.py 9 | 10 | from ..stata_help import StataHelp 11 | from .base import AdoInstallBase 12 | 13 | 14 | class GITHUB_Install(AdoInstallBase): 15 | def install(self, package: str) -> str: 16 | install_command = f"github install {package}{self.REPLACE_MESSAGE}" 17 | runner_result = self.controller.run(install_command) 18 | return self._install_msg_template(runner_result) 19 | 20 | @property 21 | def IS_EXIST_GITHUB(self) -> bool: 22 | return StataHelp(self.stata_cli).check_command_exist_with_help("github") 23 | 24 | def __post_initialization(self): 25 | # if not exist `GitHub` command, install it. 26 | if not self.IS_EXIST_GITHUB: 27 | self.__install_github() 28 | 29 | def __install_github(self): 30 | install_command = 'net install github, from("https://haghish.github.io/github/")' 31 | runner_result = self.controller.run(install_command) 32 | return runner_result 33 | 34 | @staticmethod 35 | def check_install(message: str) -> bool: 36 | # I am not sure whether this is robust, if not please email me. 37 | signature_messages = [ 38 | # GitHub specific success messages 39 | "connected to github.com", 40 | "repository exists:", 41 | "installation complete", 42 | 43 | # for replace arg, the package is already exist and up to date 44 | "all files already exist and are up to date", 45 | ] 46 | 47 | return any(signature_msg in str(message).lower() for signature_msg in signature_messages) 48 | -------------------------------------------------------------------------------- /source/reports/2025/0921/stata_mcp_a_research_report_on_ai_assisted_empirical_research.md: -------------------------------------------------------------------------------- 1 | # Stata-MCP: A research report on AI-assisted empirical research 2 | 3 | > Authors: [Tan, Song](https://www.sepinetam.com), & Feng, Muyao 4 | > Date: 2025-09-21 5 | > Keywords: AI; Social science research; Stata; Empirical Strategy 6 | > Paper Link: [download](https://reports.statamcp.com/202509/stata_mcp_a_research_report_on_ai_assisted_empirical_research.pdf) 7 | 8 | ## Abstract 9 | Data analysis is central to social science research, yet technical barriers still limit efficiency. While Stata lowers the learning cost of coding, further simplification is possible. This study develops Stata-MCP to streamline coding via AI and promote automation in empirical work. Using the ReAct agent framework and a custom evaluation set, we systematically assess LLMs across key stages of empirical research. Results show: (1) LLMs perform well in Stata code generation and debugging; (2) models differ notably in econometric understanding and coding, with the Claude series leading; (3) whether LLMs can conduct empirical analysis independently remains open. We also identify two key technical bottlenecks: (1) improving context management and overcoming window limitations through engineering; (2) fine-tuning the ReAct framework to better align with the needs of social science research. 10 | 11 | 12 | ## Citation 13 | ```bibtex 14 | @techreport{tan2025stataMCP, 15 | author = {Tan, Song and Feng, Muyao}, 16 | title = {Stata-MCP: A research report on AI-assisted empirical research}, 17 | year = {2025}, 18 | month = {September}, 19 | day = {21}, 20 | language = {English}, 21 | address = {Shanghai, China}, 22 | institution = {Shanghai Bayes Views Information Technology Co., Ltd.}, 23 | url = {https://www.statamcp.com/reports/2025/09/21/stata_mcp_a_research_report_on_ai_assisted_empirical_research} 24 | } 25 | ``` 26 | -------------------------------------------------------------------------------- /src/stata_mcp/agent_as/agent_as_rag/_tools.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : _tools.py 9 | 10 | from pathlib import Path 11 | from typing import Dict, List 12 | 13 | 14 | class FetchFromDocs: 15 | """ 16 | When knowledge is clearly categorized and limited in quantity, 17 | documents can be used as a knowledge base instead of vectorized retrieval 18 | """ 19 | 20 | def __init__(self, 21 | documents_base_path: str | Path, 22 | allowed_extensions=None): 23 | self.documents_base_path = Path(documents_base_path) 24 | # Check whether the path exists 25 | if not self.documents_base_path.exists(): 26 | raise FileNotFoundError(f"{self.documents_base_path} does not exist") 27 | 28 | self.allowed_extensions = allowed_extensions or [".md", ".txt"] 29 | 30 | @property 31 | def FILES(self) -> Dict[str, Path]: 32 | docs_mapping = {} 33 | files = self.documents_base_path.iterdir() 34 | for file_path in files: 35 | if file_path.suffix in self.allowed_extensions: 36 | docs_mapping[file_path.name] = file_path 37 | return docs_mapping 38 | 39 | @property 40 | def KEYWORDS(self) -> List[str]: 41 | docs_mapping = self.FILES 42 | keywords_list = [] 43 | for key, value in docs_mapping.items(): 44 | keywords_list.append(key) 45 | return keywords_list 46 | 47 | def fetch_knowledge_from_docs(self, keyword: str, encoding: str = "utf-8") -> str: 48 | if keyword in self.KEYWORDS: 49 | with open(self.FILES[keyword], "r", encoding=encoding) as f: 50 | knowledge = f.read() 51 | return knowledge 52 | else: 53 | return f"{keyword} not found in documents, you can use keywords in {self.KEYWORDS}" 54 | -------------------------------------------------------------------------------- /source/docs/Usages/agent_as/agent_as_tool.md: -------------------------------------------------------------------------------- 1 | # Agent as Tool 2 | 3 | Often we want to use Agents to accomplish specific tasks, but sometimes configuring Agents, including writing prompts that require repeated debugging, can be challenging. This project provides a simpler solution to use Agents as tools, which can achieve excellent results with default prompts. Of course, if you want to customize your prompts and tool descriptions, the project also provides corresponding interfaces. 4 | 5 | ## Quickly Start 6 | If you have never install `stata-mcp`, `openai` and `openai-agents`, install them first, `pip` and `uv` are allowed. 7 | 8 | ```bash 9 | pip install stata-mcp 10 | ``` 11 | 12 | or 13 | 14 | ```bash 15 | uv add stata-mcp 16 | ``` 17 | 18 | Here is an example for use `stata-mcp` as a tool. 19 | 20 | ```python 21 | import asyncio 22 | 23 | from agents import Agent 24 | from stata_mcp.agent_as.agent_as_tool import StataAgent 25 | 26 | # init stata agent and set as tool 27 | stata_agent = StataAgent() 28 | sa_tool = stata_agent.as_tool() 29 | 30 | # Create main Agent 31 | agent = Agent( 32 | ..., 33 | tools=[sa_tool], 34 | ) 35 | 36 | 37 | # Then run the agent as usual. 38 | async def main(): 39 | ... 40 | 41 | 42 | if __name__ == "__main__": 43 | asyncio.run(main()) 44 | 45 | ``` 46 | 47 | ## Other Model Provider 48 | You can use any other model provider but `openai`, use the function `set_model` for change it, also you can use the model in ANY OPENAI-AGENT registry. 49 | 50 | Here is an example for using `DeepSeek` as model provider 51 | 52 | ```python 53 | import os 54 | 55 | from agents import Agent 56 | from stata_mcp.agent_as.agent_as_tool import StataAgent, set_model 57 | 58 | deepseek_model = set_model( 59 | model_name="deepseek-chat", 60 | api_key=os.getenv("DEEPSEEK_API_KEY"), 61 | base_url=os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com/v1") 62 | ) 63 | 64 | stata_agent = StataAgent(model=deepseek_model) 65 | 66 | agent = Agent( 67 | ..., 68 | tools=[stata_agent.as_tool, ...], 69 | ) 70 | 71 | ``` 72 | -------------------------------------------------------------------------------- /source/docs/Usages/Questions.md: -------------------------------------------------------------------------------- 1 | # Questions 2 | 3 | ## Cherry Studio 32000 Wrong 4 | > (2025-04-03 Solved) 5 | 6 | Add `USER=YOUR_COMPUTER_NAME` to config env 7 | while if you are windows, add `USERPROFILE` to config env 8 | 9 | If you don't know your computer name, you can run `whoami` in terminal. 10 | it looks like the follow: 11 | ```bash 12 | $ whoami # YOUR_COMPUTER_NAME 13 | ``` 14 | 15 | ## Cherry Studio 32000 Error 16 | > (2025-06-04 Solved) 17 | 18 | Cherry Studio doesn't support the `--directory` argument. Configure it with the 19 | full path to `stata_mcp.py` instead: 20 | 21 | ```json 22 | { 23 | "mcpServers": { 24 | "stata-mcp": { 25 | "command": "uv", 26 | "args": [ 27 | "run", 28 | "/Users/sepinetam/Documents/Github/MCP_Pro/stata-mcp/__init__.py" 29 | ] 30 | } 31 | } 32 | } 33 | ``` 34 | 35 | This means running `uv run /the/full/path/of/stata_mcp.py`. Add `True` if you 36 | need to specify the Stata CLI path. 37 | 38 | _Solves [issue #1](https://github.com/sepinetam/stata-mcp/issues/1)._ 39 | 40 | ## Windows Supports 41 | > (2025-04-11 Added) 42 | 43 | The windows is supported at 2025-04-11 (and it is my birthday). Thanks each of whom use Stata-MCP, it is the best gift for me. 44 | 45 | More information you can visit the [Usage doc for Windows](Usages/Usage_Windows.md). 46 | 47 | 48 | ## Network Errors When Running Stata-MCP 49 | > (2025-06-11 Solved) 50 | 51 | If you encounter connection failures or downloads hanging when running `uv run` or communicating with the LLM service, this is usually caused by an unstable network. Please check your internet connection or switch to a more reliable network and try again. 52 | 53 | _Solves [issue #6](https://github.com/sepinetam/stata-mcp/issues/6)._ 54 | 55 | ## Version Conflict in Documentation 56 | > (2025-06-11 Solved) 57 | 58 | Some documents mistakenly used the version number `3.1.9` while the actual release is `1.3.9`. 59 | Update all occurrences in the README files to `1.3.9` to keep them consistent. 60 | 61 | _Solves [issue #8](https://github.com/sepinetam/stata-mcp/issues/8)._ 62 | -------------------------------------------------------------------------------- /src/stata_mcp/agent_as/agent_as_rag/handoff.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : handoff.py 9 | 10 | from typing import List 11 | 12 | from agents import Agent 13 | from agents.handoffs import Handoff 14 | 15 | from ..agent_base import AgentBase 16 | from ._base import KnowledgeBase 17 | 18 | 19 | class HandoffAgent(AgentBase): 20 | NAME: str = "Knowledge Fetch Agent" 21 | agent_instructions: str = """ 22 | You are a professional researcher for handoff. 23 | """ 24 | 25 | def __init__(self, *args, **kwargs): 26 | super().__init__(*args, **kwargs) 27 | 28 | self.handoffs = [] 29 | _handoffs = kwargs.get("handoffs") 30 | if _handoffs: 31 | self.register_agents(_handoffs, is_typing_warning=True) 32 | 33 | def register_agents(self, 34 | agents: KnowledgeBase | Agent | Handoff | List[Agent | Handoff], 35 | is_typing_warning: bool = False) -> List: 36 | if not isinstance(agents, List): 37 | agents = [agents] 38 | for agent in agents: 39 | if isinstance(agent, KnowledgeBase): 40 | self.handoffs.append(agent.TO_HANDOFF_AGENT) 41 | elif isinstance(agent, (Agent, Handoff)): 42 | self.handoffs.append(agent) 43 | else: 44 | if is_typing_warning: 45 | print(f"Warning: {agent} is not a valid agent for handoff") 46 | return self.handoffs 47 | 48 | @property 49 | def HANDOFF_AGENT(self): 50 | handoff_agent = self.agent 51 | handoff_agent.handoffs = self.handoffs 52 | return handoff_agent 53 | 54 | @property 55 | def as_tool(self): 56 | return self.HANDOFF_AGENT.as_tool( 57 | tool_name="Knowledge fetch", 58 | tool_description="Fetch knowledge from the previous setting.", 59 | max_turns=self.max_turns 60 | ) 61 | -------------------------------------------------------------------------------- /source/agent_examples/langchain/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : main.py 9 | 10 | import os 11 | import asyncio 12 | from time import perf_counter 13 | from typing import Dict 14 | 15 | # We suppose you have set the api_base and api_key. 16 | # Note: The environment name is "OPENAI_API_BASE" and "OPENAI_API_KEY" 17 | # os.environ["OPENAI_API_BASE"] = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com/v1") 18 | # os.environ["OPENAI_API_KEY"] = os.getenv("DEEPSEEK_API_KEY") 19 | 20 | from langchain import hub 21 | from langchain.chat_models import init_chat_model 22 | from langchain_mcp_adapters.client import MultiServerMCPClient 23 | from langgraph.prebuilt import create_react_agent # langgraph is more stable than langchain react agent 24 | 25 | 26 | # Get the ReAct framework prompt here 27 | prompt = hub.pull("hwchase17/react") 28 | 29 | # You should set the stata_cli here, not in the client args 30 | os.environ["stata_cli"] = "stata-mp" 31 | client = MultiServerMCPClient( 32 | { 33 | "stata-mcp": { 34 | "command": "uvx", 35 | "args": ['stata-mcp'], 36 | "transport": "stdio", 37 | } 38 | } 39 | ) 40 | 41 | # If you are setting other providers, you can set whatever model you want, visit official documents to change it. 42 | model = init_chat_model(model="deepseek-chat", model_provider="openai") 43 | 44 | 45 | async def main(msg: Dict[str, str]): 46 | tools = await client.get_tools() 47 | start = perf_counter() 48 | agent = create_react_agent(model, tools, prompt=prompt) 49 | result = await agent.ainvoke(msg) 50 | elapsed = perf_counter() - start 51 | print(result) 52 | print(f"Total cost time: {elapsed:.2f} s") 53 | return result 54 | 55 | 56 | task: str = """ 57 | Using the Stata default data, analysis the relationship between `mpg` and `price`, moreover add `weight` as a control. 58 | """ 59 | task_messages = {"messages": task} 60 | 61 | if __name__ == "__main__": 62 | asyncio.run(main(task_messages)) 63 | -------------------------------------------------------------------------------- /source/agent_examples/task_prompt/agent_langchain.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : agent_langchain.py 9 | 10 | import os 11 | import asyncio 12 | from time import perf_counter 13 | from typing import Dict 14 | 15 | # We suppose you have set the api_base and api_key. 16 | # Note: The environment name is "OPENAI_API_BASE" and "OPENAI_API_KEY" 17 | # os.environ["OPENAI_API_BASE"] = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com/v1") 18 | # os.environ["OPENAI_API_KEY"] = os.getenv("DEEPSEEK_API_KEY") 19 | 20 | from langchain.chat_models import init_chat_model 21 | from langchain_mcp_adapters.client import MultiServerMCPClient 22 | from langgraph.prebuilt import create_react_agent # langgraph is more stable than langchain react agent 23 | 24 | from prompt_generator import PromptGenerator 25 | 26 | 27 | generator = PromptGenerator(template_name="ReAct", language="English", agent_provider="langchain") 28 | model_instructions = generator.instructions() 29 | model_tasks = generator.tasks( 30 | datas="The default data of Stata", 31 | aims="Get the data structure and know the relationship between mpg and price", 32 | ) 33 | 34 | # You should set the stata_cli here, not in the client args 35 | os.environ["stata_cli"] = "stata-mp" 36 | client = MultiServerMCPClient( 37 | { 38 | "stata-mcp": { 39 | "command": "uvx", 40 | "args": ['stata-mcp'], 41 | "transport": "stdio", 42 | } 43 | } 44 | ) 45 | 46 | # If you are setting other providers, you can set whatever model you want, visit official documents to change it. 47 | model = init_chat_model(model="deepseek-chat", model_provider="openai") 48 | 49 | 50 | async def main(msg: Dict[str, str]): 51 | tools = await client.get_tools() 52 | start = perf_counter() 53 | agent = create_react_agent(model, tools, prompt=model_instructions) 54 | result = await agent.ainvoke(msg) 55 | elapsed = perf_counter() - start 56 | print(result) 57 | print(f"Total cost time: {elapsed:.2f} s") 58 | return result 59 | 60 | 61 | task_messages = {"messages": model_tasks} 62 | 63 | if __name__ == "__main__": 64 | asyncio.run(main(task_messages)) 65 | -------------------------------------------------------------------------------- /src/stata_mcp/core/stata/builtin_tools/ado_install/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : base.py 9 | 10 | from abc import ABC, abstractmethod 11 | 12 | from ...stata_controller import StataController 13 | 14 | 15 | class AdoInstallBase(ABC): 16 | def __init__(self, 17 | stata_cli, 18 | is_replace: bool = True): 19 | self.stata_cli = stata_cli 20 | self.is_replace = is_replace 21 | self.__post_initialization() 22 | 23 | def __post_initialization(self): 24 | """ 25 | Post-initialization hook for subclasses to override. 26 | 27 | This method is called automatically after __init__ completes. 28 | Subclasses can override this method to perform additional initialization 29 | without having to override the entire __init__ method. 30 | 31 | Examples: 32 | >>> class MyInstaller(AdoInstallBase): 33 | ... def __post_initialization(self): 34 | ... self.github_mirror = "https://github.com" # Fake var 35 | ... 36 | ... def install(self, package: str): 37 | ... ... 38 | """ 39 | pass 40 | 41 | @property 42 | def controller(self) -> StataController: 43 | return StataController(self.stata_cli) 44 | 45 | @property 46 | def REPLACE_MESSAGE(self) -> str: 47 | if self.is_replace: 48 | return ", replace" 49 | else: 50 | return "" 51 | 52 | @abstractmethod 53 | def install(self, package: str) -> str: pass 54 | 55 | @staticmethod 56 | @abstractmethod 57 | def check_install(message: str) -> bool: 58 | ... 59 | 60 | @staticmethod 61 | def check_installed_from_msg(msg: str) -> bool: 62 | state_with_prompt = msg.split("\n")[0] 63 | state_str = state_with_prompt.split(":")[-1].strip() 64 | if isinstance(state := eval(state_str), bool): 65 | return state 66 | else: 67 | return False 68 | 69 | def _install_msg_template(self, runner_result: str) -> str: 70 | return f"Installation State: {self.check_install(runner_result)}\n" + runner_result 71 | -------------------------------------------------------------------------------- /source/agent_examples/README.md: -------------------------------------------------------------------------------- 1 | # Agent Examples 2 | Here is some examples of how to use Stata-MCP in agents. 3 | 4 | > If you are finding how to make the agent perform better, read file: [How to write a task prompt](task_prompt/README.md) 5 | 6 | ## Catalog 7 | - [OpenAI-Agent](#openai-agent) 8 | - [Langchain-ReAct-Agent](#langchain-react-agent) 9 | 10 | ## OpenAI-Agent 11 | The OpenAI-Agent is the most traditional agent here (I think), it's also the most simple one. 12 | If you just hope to find out that there is something AI could do, have a try here, it is a good start. 13 | 14 | You can `cd` to the `OpenAI-Agent` directory and run `main.py` to start the agent. 15 | ```bash 16 | git clone https://github.com/sepinetam/stata-mcp.git 17 | cd stata-mcp 18 | 19 | # If you did not make your environment, make one. 20 | # uv sync 21 | 22 | uv run agent_examples/openai/main.py # before that, you can change your task message, just provide the minimal task description, and don’t forget to include your data path and the output path. 23 | ``` 24 | 25 | If there is timeout error, do not worry, you can install it before running the agent, like this: 26 | ```bash 27 | git clone https://github.com/sepinetam/stata-mcp.git 28 | cd stata-mcp 29 | 30 | pip install -e . 31 | # You can find whether it is installed successfully by: 32 | stata-mcp --version 33 | ``` 34 | and, edit the agent file `agent_examples/openai/main.py`, from 35 | ```python 36 | mcp_server = MCPServerStdio( 37 | name="Stata-MCP", 38 | params={ 39 | "command": "uvx", 40 | "args": [ 41 | "stata-mcp" 42 | ], 43 | "env": { 44 | "stata_cli": "stata-mp" 45 | } 46 | } 47 | ) 48 | ``` 49 | to 50 | ```python 51 | mcp_server = MCPServerStdio( 52 | name="Stata-MCP", 53 | params={ 54 | "command": "stata-mcp", 55 | "args": [], 56 | "env": {"stata_cli": "stata-mp"} 57 | } 58 | ) 59 | ``` 60 | 61 | ## Langchain-ReAct-Agent 62 | Similar to the OpenAI-Agent, the Langchain-ReAct-Agent is also a simple agent. 63 | With ReAct framework, it could perform better than the common Agent. 64 | More information could read this paper: [ReAct: Synergizing Reasoning and Acting in Language Models](http://arxiv.org/abs/2210.03629). 65 | 66 | How to run? 67 | ```bash 68 | git clone https://github.com/sepinetam/stata-mcp.git 69 | cd stata-mcp 70 | uv run agent_examples/langchain/main.py 71 | ``` 72 | 73 | -------------------------------------------------------------------------------- /src/stata_mcp/core/data_info/dta.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : dta.py 9 | 10 | from io import BytesIO 11 | from pathlib import Path 12 | 13 | import pandas as pd 14 | import requests 15 | 16 | from ._base import DataInfoBase 17 | 18 | 19 | class DtaDataInfo(DataInfoBase): 20 | def _read_data(self) -> pd.DataFrame: 21 | """ 22 | Read Stata dta file into pandas DataFrame. 23 | 24 | Returns: 25 | pd.DataFrame: The data from the Stata file 26 | 27 | Raises: 28 | FileNotFoundError: If the file does not exist 29 | ValueError: If the file is not a valid Stata file 30 | """ 31 | # Check if it's a URL first 32 | if self.is_url: 33 | # For URLs, validate the file extension from the URL string 34 | from urllib.parse import urlparse 35 | parsed_url = urlparse(str(self.data_path)) 36 | url_path = parsed_url.path 37 | if not url_path.lower().endswith('.dta'): 38 | raise ValueError(f"URL must point to a .dta file, got: {url_path}") 39 | file_path = None # Not used for URLs 40 | else: 41 | # For local files, convert to Path object and validate 42 | file_path = Path(self.data_path) 43 | 44 | # Check if file exists 45 | if not file_path.exists(): 46 | raise FileNotFoundError(f"Stata file not found: {file_path}") 47 | 48 | # Check if it's a .dta file 49 | if file_path.suffix.lower() != '.dta': 50 | raise ValueError(f"File must have .dta extension, got: {file_path.suffix}") 51 | 52 | try: 53 | # Read the Stata file 54 | # Using read_stata with convert_categoricals=False to avoid converting labels to categories 55 | # This preserves the original data structure without converting value labels 56 | buffer = None 57 | if self.is_url: 58 | resp = requests.get(self.data_path) 59 | resp.raise_for_status() 60 | buffer = BytesIO(resp.content) 61 | 62 | df = pd.read_stata( 63 | buffer or file_path, 64 | convert_categoricals=False, # disable change data to mapped str. 65 | convert_dates=True, 66 | convert_missing=False, 67 | preserve_dtypes=True 68 | ) 69 | return df 70 | 71 | except Exception as e: 72 | raise ValueError(f"Error reading Stata file {self.data_path}: {str(e)}") 73 | -------------------------------------------------------------------------------- /source/agent_examples/task_prompt/agent_openai.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : agent_openai.py 9 | 10 | """ 11 | All the file structure is same as agent_examples/openai/main.py 12 | The difference is only prompt setting. 13 | """ 14 | 15 | import os 16 | import asyncio 17 | from time import perf_counter 18 | from openai import AsyncOpenAI 19 | from agents import Agent, Runner, OpenAIChatCompletionsModel, set_tracing_disabled 20 | from agents.mcp import MCPServerStdio 21 | 22 | from prompt_generator import PromptGenerator 23 | 24 | 25 | set_tracing_disabled(True) 26 | 27 | generator = PromptGenerator(template_name="ReAct", language="English", agent_provider="openai") 28 | 29 | mcp_server = MCPServerStdio( 30 | name="Stata-MCP", 31 | params={ 32 | "command": "stata-mcp", 33 | "args": [], 34 | # "command": "uvx", 35 | # "args": [ 36 | # "stata-mcp" # or you can use the local beta version with the git clone repo. 37 | # ], 38 | "env": { 39 | "stata_cli": "stata-mp" # alternative you can change your Stata path here 40 | } 41 | } 42 | ) 43 | 44 | model_instructions = generator.instructions() 45 | 46 | agent = Agent( 47 | name="Econ Assistant", 48 | instructions=model_instructions, 49 | mcp_servers=[mcp_server], # If you hope to add other MCP servers, you can add them here. 50 | model=OpenAIChatCompletionsModel( # If you have set environment of OPENAI_API_KEY, you can ignore arg `model`. 51 | model="deepseek-chat", # As I am located in China, I use DeepSeek as model provider, you can change to OpenAI. 52 | openai_client=AsyncOpenAI( 53 | base_url=os.getenv("OPENAI_BASE_URL", "https://api.deepseek.com/v1"), # of whatever providers' base url 54 | api_key=os.getenv("OPENAI_API_KEY") # Suggest saving your API KEY in environment variables. IMPORTANT! 55 | ) 56 | ), 57 | ) 58 | 59 | async def main(msg: str): 60 | await mcp_server.connect() 61 | start = perf_counter() 62 | try: 63 | result = await Runner.run(agent, msg, max_turns=30) 64 | print(result.final_output) 65 | finally: 66 | elapsed = perf_counter() - start 67 | print(f"Total cost time: {elapsed:.2f} s") 68 | await mcp_server.cleanup() 69 | return result 70 | 71 | 72 | task_message = generator.tasks( 73 | datas="The default data of Stata", 74 | aims="Get the data structure and know the relationship between mpg and price", 75 | ) 76 | 77 | 78 | if __name__ == "__main__": 79 | asyncio.run(main(msg=task_message)) 80 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | > Generated by Claude Code 4 | 5 | ## Privacy Policy 6 | **Privacy Statement:** 7 | This project does not collect, store, or transmit any personal information or user data. All data processing occurs locally on the user's machine, and no information is sent to external servers operated by this project. 8 | 9 | **API Usage Disclaimer:** 10 | - All API usage is independent of this project and is the sole responsibility of the user 11 | - Users must comply with the privacy policies and terms of service of their chosen API providers 12 | - This project assumes no responsibility for how users interact with third-party APIs 13 | - Users are solely responsible for reviewing and understanding the privacy implications of their API usage 14 | 15 | **User Responsibility:** 16 | Users acknowledge that: 17 | - They must independently assess the security and privacy risks of any API usage 18 | - They bear full responsibility for their use of third-party services 19 | - This project provides tools for API interaction but does not endorse or guarantee any specific API provider 20 | - All risks associated with API usage remain with the user 21 | 22 | 23 | ## Reporting Security Vulnerabilities 24 | 25 | If you discover a security vulnerability in this project, please report it responsibly: 26 | 27 | 1. **Do not** open a public issue 28 | 2. Email security concerns to: sepinetam@gamil.com 29 | 3. Include a detailed description of the vulnerability 30 | 4. Allow reasonable time for response and resolution 31 | 32 | ## Security Best Practices 33 | 34 | - Keep your dependencies updated 35 | - Use secure network connections (HTTPS/TLS) 36 | - Validate all inputs and outputs 37 | - Follow principle of least privilege 38 | - Regular security audits of code and dependencies 39 | 40 | 41 | ## 隐私政策 42 | 43 | **隐私声明:** 44 | 本项目不收集、存储或传输任何个人信息或用户数据。所有数据处理均在用户本地机器上进行,不会向本项目运营的外部服务器发送任何信息。 45 | 46 | **API使用免责声明:** 47 | - 所有API使用均独立于本项目,由用户自行承担责任 48 | - 用户必须遵守所选API提供商的隐私政策和服务条款 49 | - 本项目对用户如何与第三方API交互不承担任何责任 50 | - 用户有责任审查并理解其API使用的隐私影响 51 | 52 | **用户责任:** 53 | 用户确认: 54 | - 必须独立评估任何API使用的安全和隐私风险 55 | - 对使用第三方服务承担全部责任 56 | - 本项目提供API交互工具,但不认可或保证任何特定API提供商 57 | - 与API使用相关的所有风险由用户承担 58 | 59 | ## 漏洞报告 60 | 61 | 如果您发现本项目的安全漏洞,请负责任地进行报告: 62 | 63 | 1. **不要**公开提交issue 64 | 2. 通过邮件报告安全问题:sepinetam@gamil.com 65 | 3. 包含漏洞的详细描述 66 | 4. 给予合理的响应和解决时间 67 | 68 | ## 安全最佳实践 69 | 70 | - 保持依赖项更新 71 | - 使用安全的网络连接(HTTPS/TLS) 72 | - 验证所有输入和输出 73 | - 遵循最小权限原则 74 | - 定期对代码和依赖项进行安全审计 75 | 76 | ## 重要免责条款 77 | 78 | **本项目明确声明:** 79 | - 不对任何API提供商的行为、政策变更或服务中断负责 80 | - 不对因使用第三方API导致的任何数据泄露、隐私侵犯或其他损害承担责任 81 | - 用户在使用任何API前,应仔细阅读并理解相关服务条款和隐私政策 82 | - 本项目仅为技术工具,不构成对任何API服务的推荐或保证 83 | - 用户应自行判断API使用的合法性、安全性和适用性 84 | 85 | **使用风险自负:** 86 | 用户明确理解并同意,使用本项目连接的任何第三方API服务所产生的所有风险完全由用户自行承担。本项目开发者不对任何直接、间接、偶然、特殊或后果性损害承担责任。 87 | -------------------------------------------------------------------------------- /src/stata_mcp/cli/_cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : _cli.py 9 | 10 | import argparse 11 | import os 12 | import sys 13 | from importlib.metadata import version 14 | 15 | 16 | def main() -> None: 17 | """Entry point for the command line interface.""" 18 | parser = argparse.ArgumentParser( 19 | prog="stata-mcp", 20 | description="Stata-MCP command line interface", 21 | add_help=True) 22 | parser.add_argument( 23 | "-v", 24 | "--version", 25 | action="version", 26 | version=f"Stata-MCP version is {version('stata-mcp')}", 27 | help="show version information", 28 | ) 29 | parser.add_argument( 30 | "-a", "--agent", 31 | nargs="?", 32 | const="./", 33 | help="run Stata-MCP as agent mode (default work dir: current working directory)", 34 | ) 35 | parser.add_argument( 36 | "-c", "--client", 37 | nargs="?", 38 | const="cc", 39 | help="set the client mode (default for Claude Code)" 40 | ) 41 | parser.add_argument( 42 | "--usable", 43 | action="store_true", 44 | help="check whether Stata-MCP could be used on this computer", 45 | ) 46 | parser.add_argument( 47 | "--install", 48 | action="store_true", 49 | help="install Stata-MCP to Claude Desktop") 50 | 51 | # mcp.run 52 | parser.add_argument( 53 | "-t", 54 | "--transport", 55 | choices=["stdio", "sse", "http", "streamable-http"], 56 | default=None, 57 | help="mcp server transport method (default: stdio)", 58 | ) 59 | args = parser.parse_args() 60 | 61 | if args.usable: 62 | from ..utils.usable import usable 63 | sys.exit(usable()) 64 | 65 | elif args.install: 66 | from ..utils.Installer import Installer 67 | Installer(sys_os=sys.platform).install() 68 | 69 | elif args.agent: 70 | from ..agent_as import REPLAgent 71 | agent = REPLAgent(work_dir=args.agent) 72 | agent.run() 73 | 74 | elif args.client: 75 | os.environ["STATA-MCP-CLIENT"] = "cc" 76 | 77 | from ..mcp_servers import stata_mcp as mcp 78 | 79 | mcp.run() 80 | 81 | else: 82 | from ..mcp_servers import stata_mcp as mcp 83 | 84 | print("Starting Stata-MCP...") 85 | 86 | # Use stdio if there is no transport argument 87 | transport = args.transport or "stdio" 88 | if transport == "http": 89 | transport = ( 90 | "streamable-http" # Default to streamable-http for HTTP transport 91 | ) 92 | mcp.run(transport=transport) 93 | -------------------------------------------------------------------------------- /source/agent_examples/openai/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : main.py 9 | 10 | import os 11 | import asyncio 12 | from time import perf_counter 13 | from openai import AsyncOpenAI 14 | from agents import Agent, Runner, OpenAIChatCompletionsModel, set_tracing_disabled 15 | from agents.mcp import MCPServerStdio 16 | 17 | 18 | # If you are using Cloudflare AI Gateway to store the agent chat histories, you should disable the tracing. 19 | # set_tracing_disabled(True) 20 | 21 | mcp_server = MCPServerStdio( 22 | name="Stata-MCP", 23 | params={ 24 | "command": "uvx", 25 | "args": [ 26 | "stata-mcp" # or you can use the local beta version with the git clone repo. 27 | ], 28 | "env": { 29 | "stata_cli": "stata-mp" # alternative you can change your Stata path here 30 | } 31 | } 32 | ) 33 | 34 | # It looks like the system prompt, you can change or add more information to instructions. 35 | # I always figure that better instructions, better performance. 36 | # Here is just an example of instructions, in the real task you should use a better one. 37 | model_instructions = """ 38 | You are a helpful economics agent, you can help user to achieve their task with tools 39 | """ 40 | 41 | agent = Agent( 42 | name="Econ Assistant", 43 | instructions=model_instructions, 44 | mcp_servers=[mcp_server], # If you hope to add other MCP servers, you can add them here. 45 | model=OpenAIChatCompletionsModel( # If you have set environment of OPENAI_API_KEY, you can ignore arg `model`. 46 | model="deepseek-chat", # As I am located in China, I use DeepSeek as model provider, you can change to OpenAI. 47 | openai_client=AsyncOpenAI( 48 | base_url=os.getenv("OPENAI_BASE_URL", "https://api.deepseek.com/v1"), # "https://api.openai.com/v1" 49 | api_key=os.getenv("OPENAI_API_KEY") # Suggest saving your API KEY in environment variables. IMPORTANT! 50 | ) 51 | ), 52 | ) 53 | 54 | 55 | async def main(msg: str): 56 | await mcp_server.connect() 57 | start = perf_counter() 58 | try: 59 | result = await Runner.run(agent, msg, max_turns=30) 60 | print(result.final_output) 61 | finally: 62 | elapsed = perf_counter() - start 63 | print(f"Total cost time: {elapsed:.2f} s") 64 | await mcp_server.cleanup() 65 | return result 66 | 67 | # This task message is also an example, there is no research meaning. 68 | task_message = """ 69 | Using the Stata default data, analysis the relationship between `mpg` and `price`, moreover add `weight` as a control. 70 | """ 71 | 72 | 73 | if __name__ == "__main__": 74 | asyncio.run(main(msg=task_message)) 75 | -------------------------------------------------------------------------------- /src/stata_mcp/utils/Prompt/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam 7 | # @Email : sepinetam@gmail.com 8 | # @File : __init__.py 9 | 10 | import locale 11 | import os 12 | 13 | from .string import frame 14 | 15 | 16 | def _get_system_lang() -> str: 17 | """ 18 | Get the system language and return mapped language code. 19 | 20 | Returns: 21 | str: 'cn' for Chinese, 'en' for English (default) 22 | """ 23 | LANG_MAPPING = { 24 | "zh-CN": "cn", 25 | "en_US": "en" 26 | } 27 | system_language, _ = locale.getdefaultlocale() 28 | _lang = os.getenv( 29 | "STATA_MCP_PROMPT_LANGUAGE", 30 | system_language 31 | ) 32 | return LANG_MAPPING.get(_lang, "en") # Default to English if not set or invalid 33 | 34 | 35 | class Prompt: 36 | def __init__(self): 37 | self.prompts = {} 38 | self.lang = "en" 39 | 40 | def set_lang(self, lang): 41 | self.lang = lang 42 | 43 | def add_prompt(self, prompt_id: str, lang: str, prompt: str): 44 | if prompt_id not in self.prompts: 45 | self.prompts[prompt_id] = {} 46 | self.prompts[prompt_id][lang] = prompt 47 | 48 | def get_prompt(self, prompt_id: str, lang: str = None): 49 | if lang is None: 50 | lang = self.lang 51 | 52 | if prompt_id not in self.prompts: 53 | return "" 54 | 55 | if lang not in self.prompts[prompt_id]: 56 | lang = "en" 57 | if lang not in self.prompts[prompt_id]: 58 | return "" 59 | return self.prompts[prompt_id][lang] 60 | 61 | @staticmethod 62 | def extract(var_name: str): 63 | name_list = var_name.split("_") 64 | lang = name_list[-1] 65 | prompt_id = "_".join(name_list[:-1]) 66 | return prompt_id, lang 67 | 68 | def auto_extract(self, prompts_dict: dict): 69 | for key, prompt in prompts_dict.items(): 70 | prompt_id, lang = Prompt.extract(key) 71 | self.add_prompt(prompt_id=prompt_id, lang=lang, prompt=prompt) 72 | 73 | 74 | def filter_system_vars(dictionary): 75 | exclude_prefixes = ["__"] 76 | exclude_vars = ["inspect", "frame"] 77 | 78 | filtered_dict = {} 79 | for key, value in dictionary.items(): 80 | if ( 81 | not any(key.startswith(prefix) for prefix in exclude_prefixes) 82 | and key not in exclude_vars 83 | ): 84 | filtered_dict[key] = value 85 | return filtered_dict 86 | 87 | 88 | prompts_dict: dict = filter_system_vars(frame.f_locals) 89 | 90 | pmp = Prompt() 91 | pmp.auto_extract(prompts_dict) 92 | pmp.set_lang(_get_system_lang()) # Auto-detect and set system language on initialization 93 | 94 | 95 | __all__ = [ 96 | "pmp", 97 | ] 98 | -------------------------------------------------------------------------------- /src/stata_mcp/utils/Installer/installer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam 7 | # @Email : sepinetam@gmail.com 8 | # @File : installer.py 9 | 10 | import json 11 | import os 12 | 13 | from ...core.stata import StataFinder 14 | 15 | 16 | class Installer: 17 | def __init__(self, sys_os, is_env=False): 18 | self.config_file_path: str = None 19 | if sys_os == "Darwin": 20 | self.config_file_path = os.path.expanduser( 21 | "~/Library/Application Support/Claude/claude_desktop_config.json" 22 | ) 23 | elif sys_os == "Linux": 24 | print( 25 | "There is not a Linux version of Claude yet, please use the Windows or macOS version." 26 | ) 27 | elif sys_os == "Windows": 28 | appdata = os.getenv( 29 | "APPDATA", os.path.expanduser("~\\AppData\\Roaming")) 30 | self.config_file_path = os.path.join( 31 | appdata, "Claude", "claude_desktop_config.json" 32 | ) 33 | 34 | os.makedirs(os.path.dirname(self.config_file_path), exist_ok=True) 35 | 36 | # Create an empty file if it does not already exist 37 | if not os.path.exists(self.config_file_path): 38 | with open(self.config_file_path, "w", encoding="utf-8") as f: 39 | # Or write the default configuration 40 | f.write('{"mcpServers": {}}') 41 | 42 | stata_cli = StataFinder().STATA_CLI 43 | self.stata_mcp_config = { 44 | "stata-mcp": { 45 | "command": "uvx", 46 | "args": ["stata-mcp"], 47 | "env": {"STATA_CLI": stata_cli}, 48 | } 49 | } 50 | 51 | def install(self): 52 | server_cfg = self.stata_mcp_config["stata-mcp"] 53 | stata_cli_path = server_cfg["env"]["STATA_CLI"] 54 | print("About to install the following MCP server into your Claude config:\n") 55 | print(" Server name: stata-mcp") 56 | print(f" Command: {server_cfg['command']}") 57 | print(f" Args: {server_cfg['args']}") 58 | print(f" STATA_CLI path: {stata_cli_path}\n") 59 | print(f"Configuration file to modify:\n {self.config_file_path}\n") 60 | 61 | # Ask the user for confirmation 62 | choice = input( 63 | "Do you want to proceed and add this configuration? [y/N]: ") 64 | if choice.strip().lower() != "y": 65 | print("Installation aborted.") 66 | return 67 | 68 | # Read the now config 69 | try: 70 | with open(self.config_file_path, "r", encoding="utf-8") as f: 71 | config = json.load(f) 72 | except (FileNotFoundError, json.JSONDecodeError): 73 | config = {"mcpServers": {}} 74 | 75 | # Update MCP_Config 76 | servers = config.setdefault("mcpServers", {}) 77 | servers.update(self.stata_mcp_config) 78 | 79 | # Write it 80 | with open(self.config_file_path, "w", encoding="utf-8") as f: 81 | json.dump(config, f, ensure_ascii=False, indent=2) 82 | 83 | print( 84 | f"✅ Successfully wrote 'stata-mcp' configuration to: {self.config_file_path}" 85 | ) 86 | -------------------------------------------------------------------------------- /src/stata_mcp/agent_as/agent_as_rag/_base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : _base.py 9 | 10 | from typing import List 11 | 12 | from agents import Model, handoff 13 | from agents.handoffs import Handoff 14 | from agents.tool import function_tool 15 | 16 | from ..agent_base import AgentBase 17 | from ._tools import FetchFromDocs 18 | 19 | 20 | class KnowledgeBase(AgentBase): 21 | NAME = "Knowledge Agent" 22 | agent_instructions: str = """ 23 | You are a professional researcher on the area of ACADEMIC RESEARCH. 24 | """ 25 | 26 | def __init__(self, 27 | name: str = None, 28 | instructions: str = None, 29 | model: Model = None, 30 | mcp_servers: list = None, 31 | tools: list = None, 32 | max_turns: int = 30, # If the task is not easy, set larger number 33 | DISABLE_TRACING: bool = False, 34 | base_path: str = None, 35 | *args, 36 | **kwargs): 37 | # Initialize FetchFromDocs if base_path is provided 38 | self.doc_fetcher = None 39 | if base_path: 40 | self.doc_fetcher = FetchFromDocs(base_path) 41 | 42 | tools = tools or [] 43 | tools.extend(self._load_fetch_tools()) 44 | 45 | super().__init__( 46 | name=name or self.NAME, 47 | instructions=instructions, 48 | model=model, 49 | mcp_servers=mcp_servers, 50 | tools=tools, 51 | max_turns=max_turns, 52 | DISABLE_TRACING=DISABLE_TRACING, 53 | *args, 54 | **kwargs 55 | ) 56 | 57 | def _load_fetch_tools(self) -> list: 58 | """ 59 | Load document fetching tools for the agent. 60 | 61 | Returns: 62 | List of function tools for document knowledge retrieval 63 | """ 64 | @function_tool() 65 | def list_knowledge_keywords() -> List[str]: 66 | """ 67 | List all available knowledge keywords (document filenames). 68 | 69 | Returns: 70 | List of available document keywords that can be used to fetch knowledge 71 | """ 72 | if not self.doc_fetcher: 73 | return [] 74 | return self.doc_fetcher.KEYWORDS 75 | 76 | @function_tool() 77 | def fetch_knowledge(keyword: str) -> str: 78 | """ 79 | Fetch knowledge content from document by keyword. 80 | 81 | Args: 82 | keyword: Document filename to search for and retrieve content from 83 | 84 | Returns: 85 | Document content as string, or error message if document not found 86 | """ 87 | if not self.doc_fetcher: 88 | return "Document fetcher not initialized. Please provide base_path when creating the agent." 89 | return self.doc_fetcher.fetch_knowledge_from_docs(keyword) 90 | 91 | return [list_knowledge_keywords, fetch_knowledge] 92 | 93 | @property 94 | def TO_HANDOFF_AGENT(self) -> Handoff: 95 | return handoff( 96 | agent=self.agent, 97 | ) 98 | -------------------------------------------------------------------------------- /source/docs/Usages/ClaudeCode/01_install.md: -------------------------------------------------------------------------------- 1 | # Install 2 | The first thing is to install Claude Code and Stata-MCP. 3 | Claude Code starts with `npm` command, and Stata-MCP is start with `uvx` or `python3.11+`, so we should install npm and uvx. 4 | 5 | ## Preparation 6 | ### brew 7 | I recommend to use `brew` to install them, run the following command in your terminal to install brew, if you want to know more information about brew, you can visit [Homebrew](https://brew.sh/). 8 | ```bash 9 | /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" 10 | ``` 11 | 12 | Then, use the following command to check whether install it successfully. 13 | ```bash 14 | brew --version 15 | ``` 16 | 17 | ### nvm, node and npm 18 | `nvm` is a Node Version Manager, it can help you to manage different Node versions. 19 | `node` is a JavaScript runtime environment. It can help you to manage different Node versions. 20 | `npm` is a package manager for Node. It can help you to manage different Node versions. 21 | 22 | You can install nvm directly with `brew install` 23 | ```bash 24 | brew install nvm 25 | ``` 26 | 27 | Then, you should add the following lines to your `~/.zshrc` file 28 | ```bash 29 | echo 'export NVM_DIR="$HOME/.nvm"' >> ~/.zshrc 30 | echo '[ -s "/opt/homebrew/opt/nvm/nvm.sh" ] && . "/opt/homebrew/opt/nvm/nvm.sh"' >> ~/.zshrc 31 | source ~/.zshrc 32 | ``` 33 | 34 | Check the status of nvm: 35 | ```bash 36 | nvm --version 37 | ``` 38 | 39 | Then, you can install node with nvm: 40 | ```bash 41 | nvm install 20.19.5 # At least you should install 18+ 42 | nvm alias default 20 43 | nvm use default 44 | node -v 45 | npm -v 46 | ``` 47 | 48 | ### uvx 49 | As macOS has a build-in python, but we always need a newer version or independent version of python, so we should install `uv`, as the official documents, they provide a command for install it, or you can install it with `brew`, more over you can install with `pip`. (b and c pasted from the official documents) 50 | 51 | a. Install uvx with `brew` 52 | ```bash 53 | brew install uvx 54 | uvx --version 55 | ``` 56 | 57 | b. Install uvx from astral.sh 58 | ```bash 59 | curl -LsSf https://astral.sh/uv/install.sh | sh 60 | ``` 61 | 62 | If your system doesn't have `curl`, you can use `wget`: 63 | ```bash 64 | wget -qO- https://astral.sh/uv/install.sh | sh 65 | ``` 66 | 67 | c. Install uvx from pypi 68 | If installing from PyPI, we recommend installing uv into an isolated environment, e.g., with pipx: 69 | ```bash 70 | pipx install uv 71 | ``` 72 | 73 | However, pip can also be used: 74 | ```bash 75 | pip install uv 76 | ``` 77 | > Note: uv ships with prebuilt distributions (wheels) for many platforms; if a wheel is not available for a given platform, uv will be built from source, which requires a Rust toolchain. See the contributing setup guide for details on building uv from source. 78 | 79 | ## Claude Code 80 | Now, we can install Claude Code. Run the following command in your terminal to install Claude Code. 81 | ```bash 82 | npm install -g @anthropic-ai/claude-code 83 | ``` 84 | 85 | > Install from brew is also available, but it is still in beta. 86 | > ```bash 87 | > brew install --cask claude-code 88 | > ``` 89 | 90 | Then, you can use `claude` in any terminal to start Claude Code, you can enjoy it now. 91 | 92 | ## Stata-MCP 93 | Stata-MCP will be one of the tool in your Claude Code, so we should install it in Claude Code, it is more easy to install it. 94 | ```bash 95 | export STATA_MCP_CWD=$(pwd) 96 | claude mcp add stata-mcp uvx stata-mcp 97 | ``` 98 | -------------------------------------------------------------------------------- /source/reports/ai_coding_ability/main.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "metadata": { 5 | "ExecuteTime": { 6 | "end_time": "2025-08-31T09:29:14.424386Z", 7 | "start_time": "2025-08-31T09:29:14.421907Z" 8 | } 9 | }, 10 | "cell_type": "code", 11 | "source": [ 12 | "# if you want to make a figure about it, you should install matplotlib.\n", 13 | "# !uv pip install matplotlib" 14 | ], 15 | "id": "a0713a5ab359ed99", 16 | "outputs": [], 17 | "execution_count": 1 18 | }, 19 | { 20 | "cell_type": "code", 21 | "id": "initial_id", 22 | "metadata": { 23 | "collapsed": true, 24 | "ExecuteTime": { 25 | "end_time": "2025-08-31T09:29:15.804880Z", 26 | "start_time": "2025-08-31T09:29:14.741696Z" 27 | } 28 | }, 29 | "source": [ 30 | "import pandas as pd\n", 31 | "from matplotlib import pyplot as plt\n" 32 | ], 33 | "outputs": [], 34 | "execution_count": 2 35 | }, 36 | { 37 | "metadata": { 38 | "ExecuteTime": { 39 | "end_time": "2025-08-31T09:29:25.542219Z", 40 | "start_time": "2025-08-31T09:29:15.813331Z" 41 | } 42 | }, 43 | "cell_type": "code", 44 | "source": [ 45 | "# Load datasets\n", 46 | "df = pd.read_parquet(\"hf://datasets/open-llm-leaderboard/contents@main/data/train-00000-of-00001.parquet\")\n" 47 | ], 48 | "id": "66afc8b4a318545a", 49 | "outputs": [ 50 | { 51 | "name": "stderr", 52 | "output_type": "stream", 53 | "text": [ 54 | "/Users/sepinetam/Documents/Github/stata-mcp/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", 55 | " from .autonotebook import tqdm as notebook_tqdm\n" 56 | ] 57 | } 58 | ], 59 | "execution_count": 3 60 | }, 61 | { 62 | "metadata": { 63 | "ExecuteTime": { 64 | "end_time": "2025-08-31T09:29:28.907175Z", 65 | "start_time": "2025-08-31T09:29:28.900383Z" 66 | } 67 | }, 68 | "cell_type": "code", 69 | "source": [ 70 | "# Process date type in data\n", 71 | "df['Upload to Hub Date'] = pd.to_datetime(df['Submission Date'])\n" 72 | ], 73 | "id": "e5274e3736cbb2b5", 74 | "outputs": [], 75 | "execution_count": 4 76 | }, 77 | { 78 | "metadata": {}, 79 | "cell_type": "code", 80 | "source": [ 81 | "# Set plot style\n", 82 | "plt.style.use(\"ggplot\")\n", 83 | "plt.figure(figsize=(12, 6))\n", 84 | "\n", 85 | "# Plot\n", 86 | "plt.scatter(df[\"Upload to Hub Date\"], df[\"Average ⬆\\uFE0F\"])\n", 87 | "plt.title(\"The Average Ability of LLMs\")\n", 88 | "plt.xlabel(\"Upload Date\")\n", 89 | "plt.ylabel(\"LLMs' Average Score\")\n", 90 | "plt.gcf().autofmt_xdate()\n", 91 | "\n", 92 | "# Save figure\n", 93 | "plt.savefig(\"./ability.png\")\n" 94 | ], 95 | "id": "fc32b8df08eccdf6", 96 | "outputs": [], 97 | "execution_count": null 98 | } 99 | ], 100 | "metadata": { 101 | "kernelspec": { 102 | "display_name": "Python 3", 103 | "language": "python", 104 | "name": "python3" 105 | }, 106 | "language_info": { 107 | "codemirror_mode": { 108 | "name": "ipython", 109 | "version": 2 110 | }, 111 | "file_extension": ".py", 112 | "mimetype": "text/x-python", 113 | "name": "python", 114 | "nbconvert_exporter": "python", 115 | "pygments_lexer": "ipython2", 116 | "version": "2.7.6" 117 | } 118 | }, 119 | "nbformat": 4, 120 | "nbformat_minor": 5 121 | } 122 | -------------------------------------------------------------------------------- /src/stata_mcp/core/stata/stata_finder/macos.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : macos.py 9 | 10 | import re 11 | from pathlib import Path 12 | from typing import Dict, List 13 | 14 | from .base import FinderBase, StataEditionConfig 15 | 16 | 17 | class FinderMacOS(FinderBase): 18 | def finder(self) -> str | None: 19 | bin_results = self.find_from_bin() 20 | if bin_results: 21 | return max(bin_results).stata_cli_path 22 | 23 | application_results = self.find_from_application() 24 | if application_results: 25 | return max(application_results).stata_cli_path 26 | else: # If there is no Stata CLI found, raise an error 27 | raise FileNotFoundError("Stata CLI not found") 28 | 29 | def find_path_base(self) -> Dict[str, List[str]]: 30 | return { 31 | "bin": ["/usr/local/bin"], 32 | "application": ["/Applications"], 33 | } 34 | 35 | def _application_find_base(self, 36 | dot_app: str | Path, 37 | version: int | float = None) -> StataEditionConfig | None: 38 | _version = version 39 | _edition = None 40 | stata_cli_path = None 41 | 42 | if not _version: 43 | for isstata_file in dot_app.glob("isstata.*"): 44 | if isstata_file.is_file(): 45 | # Extract version number from filename like "isstata.180" 46 | match = re.search(r'isstata\.(\d+)', isstata_file.name.lower()) 47 | if match: 48 | _version = float(match.group(1)) / 10 49 | break 50 | for stata_app in dot_app.glob("Stata*.app"): 51 | if stata_app.is_dir(): 52 | # Extract edition from Stata app name (MP, SE, BE, IC) 53 | # Remove "Stata" prefix and ".app" suffix, then convert to lowercase 54 | _edition = stata_app.name.replace("Stata", "").replace(".app", "").lower() 55 | __stata_cli_path = stata_app / "Contents" / "MacOS" / f"stata-{_edition}" 56 | if self._is_executable(__stata_cli_path): 57 | stata_cli_path = str(__stata_cli_path) 58 | break 59 | if _version and _edition and stata_cli_path: 60 | return StataEditionConfig(_edition, _version, stata_cli_path) 61 | 62 | else: 63 | return None 64 | 65 | def find_from_application(self) -> List[StataEditionConfig]: 66 | found_executables: List[StataEditionConfig] = [] 67 | applications_dir = Path(self.find_path_base().get("application")[0]) 68 | 69 | # Check for /Applications/Stata directory for Multi-Stata Exist 70 | stata_dir = applications_dir / "Stata" 71 | if default_stata := self._application_find_base(stata_dir): # If exist default, return directly. 72 | return [default_stata] 73 | 74 | # 通过for循环来从applications_dir里找stata*.app 75 | for stata_app in applications_dir.glob("Stata *"): 76 | _version = None 77 | if stata_app.is_dir(): 78 | _version = eval(stata_app.name.split()[-1]) 79 | if stata_app_config := self._application_find_base(stata_app, version=_version): 80 | found_executables.append(stata_app_config) 81 | 82 | return found_executables 83 | 84 | 85 | if __name__ == "__main__": 86 | finder = FinderMacOS() 87 | print(finder.finder()) 88 | -------------------------------------------------------------------------------- /src/stata_mcp/utils/Prompt/string.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam 7 | # @Email : sepinetam@gmail.com 8 | # @File : string.py 9 | 10 | import inspect 11 | 12 | frame = inspect.currentframe() 13 | 14 | stata_assistant_role_en: str = """ 15 | You will play the role of an economics research assistant with strong programming abilities. Stata is a very simple and familiar tool for you. 16 | 17 | You should view the user as an economist with strong economic intuition but unfamiliar with Stata operations, making your collaboration the strongest economics research team. 18 | 19 | Your task is to generate Stata code based on the user's instructions, adding comments before each line of code, and then run this dofile. 20 | The user will provide you with a data path and their research story or regression model. 21 | What you need to do is understand the data structure based on the data path, and then write Stata regression code according to the user's model. 22 | 23 | Your output should tell the user how the results look, whether they meet the user's expectations, and inform them of the locations of the dofile and log file. 24 | """ 25 | 26 | stata_assistant_role_cn: str = """ 27 | 你将扮演一个经济学的研究助理,你有很强的编程能力,Stata在你这里是一个非常简单非常家常的工具。 28 | 29 | 你应该把用户视为一个经济直觉很强但是不熟悉Stata操作的经济学家,因此你们合作就是最强的经济学研究组合。 30 | 31 | 你的任务是根据用户的指令去生成Stata代码,并在每行代码前加上注释,然后运行这个dofile。 32 | 用户会给你一个数据的路径和他的研究故事或者回归模型, 33 | 而你需要做的是根据数据路径去了解数据结构,然后根据用户的模型去写Stata的回归代码。 34 | 35 | 你的输出应该是告诉用户这个结果如何,是否是符合用户预期的,并把dofile和log文件的位置都告诉用户。 36 | """ 37 | 38 | stata_analysis_strategy_en: str = """ 39 | When conducting data analysis using Stata, please follow these strategies: 40 | 41 | 1. Data preparation and exploration: 42 | - First use get_data_info() to understand the basic characteristics of the dataset, including variable types, missing values, and distributions 43 | - Ensure you understand the meaning of each variable and possible encoding methods 44 | - Assess whether data cleaning, variable transformation, or missing value handling is needed 45 | 46 | 2. Code generation and execution workflow: 47 | - Break down the analysis into multiple logical steps, each with a clear objective 48 | - Use write_dofile() to create the initial do file 49 | - For complex analyses, first run the basic steps, then use append_dofile() to add more analyses 50 | - Execute with stata_do() and check results after each modification 51 | 52 | 3. Results management: 53 | - Use results_doc_path() to get a unified storage path for results before generating tables or outputs 54 | - Save this path in the do file using the local output_path command 55 | - Use commands like outreg2 or esttab to output results to the specified path 56 | 57 | 4. Reporting results: 58 | - After executing the do file, use read_log() to view execution results and possible errors 59 | - Analyze and interpret important statistical results 60 | - Provide context and explanation of the meaning of results 61 | 62 | 5. Handling common issues: 63 | - If syntax errors occur, first check if variable names are correct 64 | - Check if the dataset has been properly loaded 65 | - For large datasets, consider using a subsample for preliminary analysis 66 | """ 67 | 68 | stata_analysis_strategy_cn: str = """ 69 | 使用Stata进行数据分析时,请遵循以下策略: 70 | 71 | 1. 数据准备和探索: 72 | - 首先使用get_data_info()了解数据集的基本情况,包括变量类型、缺失值和分布 73 | - 确保理解每个变量的意义和可能的编码方式 74 | - 评估是否需要数据清洗、变量转换或缺失值处理 75 | 76 | 2. 代码生成和执行流程: 77 | - 将分析分解为多个逻辑步骤,每个步骤都有明确的目标 78 | - 使用write_dofile()创建初始do文件 79 | - 对于复杂分析,先运行基础步骤,然后使用append_dofile()添加更多分析 80 | - 每次修改后使用stata_do()执行并检查结果 81 | 82 | 3. 结果管理: 83 | - 在生成表格或输出结果前使用results_doc_path()获取统一的结果存储路径 84 | - 在do文件中使用local output_path命令保存此路径 85 | - 使用outreg2或esttab等命令将结果输出到指定路径 86 | 87 | 4. 报告结果: 88 | - 执行do文件后使用read_log()查看执行结果和可能的错误 89 | - 分析并解释重要的统计结果 90 | - 提供结果的上下文和含义解释 91 | 92 | 5. 常见问题处理: 93 | - 如果出现语法错误,先检查变量名称是否正确 94 | - 检查数据集是否已正确加载 95 | - 针对大型数据集,考虑使用子样本进行初步分析 96 | """ 97 | -------------------------------------------------------------------------------- /source/docs/Difference.md: -------------------------------------------------------------------------------- 1 | # Catalog 2 | - [🇬🇧 English](#difference-with-stata-mcphanlulong) 3 | - [🇨🇳 中文](#与stata-mcphanlulong的不同) 4 | 5 | --- 6 | 7 | # Difference with Stata-MCP@hanlulong 8 | - 🔗 [hanlulong/stata-mcp](https://github.com/hanlulong/stata-mcp) 9 | - [Report or Request](https://github.com/SepineTam/stata-mcp/issues) 10 | 11 | ## stata-mcp@hanlulong 12 | ### Main Features 13 | - IDE integration: Provides Stata integration for Visual Studio Code and Cursor IDE using the Model Context Protocol (MCP) 14 | - Command execution: Allows you to run Stata commands directly from VS Code or Cursor (If you want to use it with Jupyter Lab, refer to the [documentation](https://github.com/hanlulong/stata-mcp/blob/main/jupyter-stata.md) or check [Issue](https://github.com/hanlulong/stata-mcp/issues/5)) 15 | - Syntax highlighting: Full support for Stata .do, .ado, .mata, and .doh files 16 | - Cross-platform: Works on Windows, macOS, and Linux 17 | - AI assistant integration: Get contextual help and code suggestions via MCP 18 | 19 | ### Installation 20 | The Stata-MCP@hanlulong can be installed directly from the VS Code Marketplace. The first-time installation may take up to 2 minutes as dependencies are installed. 21 | 22 | ### MCP Integration 23 | This implementation leverages the Model Context Protocol to enable AI assistants to interact with Stata, allowing: 24 | - Running code directly from the editor 25 | - Receiving contextual help 26 | - Getting code suggestions 27 | 28 | ## stata-mcp@sepinetam 29 | ### Main Features 30 | - Data Integration: Creates a bridge between Stata's statistical capabilities and AI assistants through the Model Context Protocol (MCP) 31 | - Contextual Analysis: Allows AI systems to understand Stata datasets, commands, and statistical output 32 | - Modular Design: Supports customizable components for different use cases and environments 33 | - Statistical Output Parsing: Converts Stata output into structured formats that AI models can interpret 34 | - Advanced Querying: Enables natural language interactions with Stata's statistical and data manipulation capabilities 35 | 36 | ### Installation 37 | Installation instructions are provided in the repository [README](../../README.md) or [Usage](Usages/Usage.md). Initial setup typically requires configuring your Stata path and preferred connection settings. 38 | 39 | ### MCP Integration 40 | This implementation uses the Model Context Protocol to create a semantic layer between Stata and AI systems: 41 | - Statistical context awareness for more relevant AI responses 42 | - Dataset structure understanding for better data analysis suggestions 43 | - Command history awareness to improve workflow recommendations 44 | 45 | ## Differences 46 | Shortly, Stata-MCP@sepinetam provides interaction with large language models to help implement dofiles, while Stata-MCP@hanlulong offers a more convenient Stata usage solution compared to using Jupyter Lab and Stata client (editing and running Stata commands in VScode). 47 | 48 | 1. Documentation and development activity: Currently, hanlulong's repository has more comprehensive documentation. This project will gradually improve its documentation, and configuration videos will be added in the future. 49 | 2. Implementation focus: Although both use MCP, they are implemented in different ways. 50 | 51 | # 与Stata-MCP@hanlulong的不同 52 | - 🔗 [hanlulong/stata-mcp](https://github.com/hanlulong/stata-mcp) 53 | - [报告问题或者提出需求](https://github.com/SepineTam/stata-mcp/issues) 54 | 55 | ## stata-mcp@hanlulong 56 | ### 主要特征 57 | - IDE集成:使用模型上下文协议(MCP)为Visual Studio Code和Cursor IDE提供Stata集成 58 | - 命令执行:允许直接从VS Code或Cursor运行Stata命令 (如果你想通过Jupyter Lab使用,参考[文档](https://github.com/hanlulong/stata-mcp/blob/main/jupyter-stata.md)或查看[Issue](https://github.com/hanlulong/stata-mcp/issues/5)) 59 | - 语法高亮:完全支持Stata .do、.ado、.mata和.doh文件 60 | - 跨平台:适用于Windows、macOS和Linux 61 | - AI助手集成:通过MCP获取上下文相关帮助和代码建议 62 | 63 | ### 安装 64 | 该Stata-MCP@hanlulong可以直接从VS Code市场安装。首次安装可能需要长达2分钟的时间,因为需要安装依赖项。 65 | 66 | ### MCP集成 67 | 此实现利用模型上下文协议使AI助手能够与Stata交互,允许: 68 | - 直接从编辑器运行代码 69 | - 接收上下文相关帮助 70 | - 获取代码建议 71 | 72 | ## stata-mcp@sepinetam 73 | ### 主要特点 74 | - 数据集成:通过模型上下文协议(MCP)在Stata的统计功能和AI助手之间建立桥梁 75 | - 上下文分析:使AI系统能够理解Stata数据集、命令和统计输出 76 | - 模块化设计:支持针对不同用例和环境的可定制组件 77 | - 统计输出解析:将Stata输出转换为AI模型可以解释的结构化格式 78 | - 高级查询:实现与Stata的统计和数据操作功能的自然语言交互 79 | 80 | ### 安装 81 | 安装说明在仓库的[README](../../README.md)或[Usage](Usages/Usage.md)中提供。初始设置通常需要配置您的Stata路径和首选连接设置。 82 | 83 | ### MCP集成 84 | 此实现使用模型上下文协议在Stata和AI系统之间创建语义层: 85 | - 统计上下文感知,提供更相关的AI响应 86 | - 数据集结构理解,提供更好的数据分析建议 87 | - 命令历史感知,改进工作流程建议 88 | 89 | ## 区别 90 | 简短地说,Stata-MCP@sepinetam提供了与大语言模型交互,让其完成dofile的实现,而Stata-MCP@hanlulong提供了相比于使用Jupyter Lab和Stata客户端更方便的Stata使用方案(在VScode编辑并运行stata命令) 91 | 92 | 1. 文档和开发活动:目前hanlulong的仓库有更全面的文档,本项目将逐步完善文档,后续也会加入配置的视频。 93 | 2. 实现重点:虽然两者都使用MCP,但是是通过不同的形式来实现的。 94 | -------------------------------------------------------------------------------- /src/stata_mcp/agent_as/repl_agents.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : repl_agents.py 9 | 10 | import asyncio 11 | import os 12 | import textwrap 13 | import uuid 14 | from pathlib import Path 15 | 16 | from agents import Agent, OpenAIChatCompletionsModel, Runner, set_tracing_disabled 17 | from agents.mcp import MCPServerStdio 18 | from agents.memory.sqlite_session import SQLiteSession 19 | from openai_api_polling.polling import APIPolling, ClientPolling 20 | 21 | set_tracing_disabled(disabled=True) 22 | 23 | 24 | def get_model_instructions(work_dir: str) -> str: 25 | """获取模型指令,包含当前工作目录信息""" 26 | return textwrap.dedent(f""" 27 | # 角色定义 28 | # [定义助手的核心身份和专业领域] 29 | 30 | # 核心能力 31 | # [列出主要技能和功能模块] 32 | 33 | # 工作原则 34 | # [指导性行为准则和优先级] 35 | # - 所有操作都在当前工作目录进行: {work_dir} 36 | # - 执行 Stata 命令前先确保切换到正确的目录: cd "{work_dir}" 37 | 38 | # 分析流程 39 | # [标准化的分析步骤和方法] 40 | 41 | # 输出标准 42 | # [结果呈现的格式和质量要求] 43 | 44 | # 代码规范 45 | # [生成代码的最佳实践] 46 | 47 | # 交互风格 48 | # [与用户沟通的方式和特点] 49 | """) 50 | 51 | 52 | class REPLAgent: 53 | client_polling = ClientPolling( 54 | api_keys=[ 55 | os.getenv("STATA_MCP_API_KEY", os.getenv("OPENAI_API_KEY", None)) 56 | ], 57 | base_url=os.getenv("STATA_MCP_API_BASE_URL", 58 | os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")) 59 | ) 60 | 61 | llm = os.getenv( 62 | "STATA_MCP_MODEL", os.getenv( 63 | "OPENAI_MODEL", "gpt-3.5-turbo" 64 | ) 65 | ) 66 | 67 | def __init__(self, work_dir: str = "./", session_id: str = None): 68 | self.work_dir = Path(work_dir).expanduser().absolute() 69 | self.work_dir.mkdir(exist_ok=True) 70 | 71 | self.session_id = session_id or f"stata_session_{uuid.uuid4().hex[:8]}" 72 | 73 | session_db_path = self.work_dir / ".stata_sessions.db" 74 | 75 | self.session = SQLiteSession( 76 | session_id=self.session_id, 77 | db_path=session_db_path 78 | ) 79 | 80 | self.stata_mcp_server = MCPServerStdio( 81 | name="Stata-MCP", 82 | params={ 83 | "command": "uvx", 84 | "args": [ 85 | "stata-mcp" # or you can use the local beta version with the git clone repo. 86 | ], 87 | "env": { 88 | "STATA_MCP_CWD": self.work_dir.as_posix(), 89 | } 90 | }, 91 | cache_tools_list=True, 92 | client_session_timeout_seconds=60, 93 | max_retry_attempts=3 94 | ) 95 | 96 | async_client = self.client_polling.async_client 97 | self.agent = Agent( 98 | name="Stata Assistant", 99 | instructions=get_model_instructions(self.work_dir.as_posix()), 100 | mcp_servers=[self.stata_mcp_server], 101 | model=OpenAIChatCompletionsModel( 102 | model=self.llm, 103 | openai_client=async_client 104 | ) 105 | ) 106 | print("Current Agent Config: ") 107 | print(f">>> API KEY\t: {APIPolling.mask_api_key(async_client.api_key, mask_len=20)}") 108 | print(f">>> BASE URL\t: {async_client.base_url}") 109 | print(f">>> MODEL\t: {self.llm}") 110 | 111 | def __del__(self): 112 | async def clean_up(): 113 | try: 114 | await self.stata_mcp_server.cleanup() 115 | except Exception: 116 | pass 117 | asyncio.run(clean_up()) 118 | 119 | async def invoke(self, query: str): 120 | await self.stata_mcp_server.connect() 121 | try: 122 | result = await Runner.run( 123 | self.agent, 124 | query, 125 | session=self.session, 126 | max_turns=30 127 | ) 128 | except Exception as e: 129 | print("Something went wrong") 130 | return str(e) 131 | finally: 132 | await self.stata_mcp_server.cleanup() 133 | return result.final_output 134 | 135 | def run(self): 136 | print("Welcome to Stata-MCP built-in Agent!") 137 | print("You can type what you want to do with Stata-MCP after sign `> `") 138 | print("Type `/exit` or `bye` to exit") 139 | while True: 140 | query = input("> ") 141 | if query.lower() == "/exit" or query.lower() == "bye": 142 | break 143 | result = asyncio.run(self.invoke(query)) 144 | print(result) 145 | exit(0) 146 | -------------------------------------------------------------------------------- /CLAUDE.md: -------------------------------------------------------------------------------- 1 | # CLAUDE.md 2 | 3 | This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. 4 | 5 | ## Project Overview 6 | 7 | Stata-MCP is an MCP (Model Context Protocol) server that enables LLMs to execute Stata commands and perform regression analysis. It supports both MCP server mode and agent mode for interactive Stata analysis. 8 | 9 | ## Common Development Commands 10 | 11 | ### Environment Setup 12 | ```bash 13 | # Install dependencies and create virtual environment 14 | uv sync 15 | 16 | # Install the package in development mode 17 | uv pip install -e . 18 | 19 | # Verify installation 20 | stata-mcp --version 21 | stata-mcp --usable 22 | ``` 23 | 24 | ### Building and Distribution 25 | ```bash 26 | # Build source distribution and wheels 27 | uv build 28 | 29 | # Build specific formats 30 | uv build --sdist # Source distribution only 31 | uv build --wheel # Wheel only 32 | 33 | # Specify output directory 34 | uv build --out-dir dist/ 35 | ``` 36 | 37 | ### Running the Application 38 | 39 | #### MCP Server Mode (default) 40 | ```bash 41 | # Start MCP server with stdio transport (default) 42 | stata-mcp 43 | 44 | # Start with specific transport 45 | stata-mcp -t http # HTTP transport 46 | stata-mcp -t sse # SSE transport 47 | ``` 48 | 49 | #### Agent Mode 50 | ```bash 51 | # Run interactive agent mode 52 | stata-mcp --agent 53 | 54 | # Or use uvx for direct execution 55 | uvx stata-mcp --agent 56 | ``` 57 | 58 | #### Utility Commands 59 | ```bash 60 | # Check system compatibility 61 | stata-mcp --usable 62 | 63 | # Install to Claude Desktop 64 | stata-mcp --install 65 | 66 | # Check version 67 | stata-mcp --version 68 | ``` 69 | 70 | ### Development with uvx 71 | ```bash 72 | # Run without local installation 73 | uvx stata-mcp --version 74 | uvx stata-mcp --agent 75 | uvx stata-mcp --usable 76 | ``` 77 | 78 | ## Architecture Overview 79 | 80 | ### Core Components 81 | 82 | 1. **MCP Server (`src/stata_mcp/__init__.py`)** 83 | - FastMCP-based server providing Stata tools and prompts 84 | - Main entry point for LLM interactions 85 | - Handles cross-platform Stata execution 86 | 87 | 2. **Stata Integration (`src/stata_mcp/core/stata/`)** 88 | - `StataFinder`: Locates Stata executable on different platforms 89 | - `StataController`: Manages Stata command execution 90 | - `StataDo`: Handles do-file execution with logging 91 | 92 | 3. **Agent Mode (`src/stata_mcp/mode/`)** 93 | - `StataAgent`: LangChain-based agent for autonomous analysis 94 | - Interactive conversational interface 95 | - Supports custom work directories and models 96 | 97 | 4. **Data Processing (`src/stata_mcp/core/data_info/`)** 98 | - `CsvDataInfo`: CSV file analysis and statistics 99 | - `DtaDataInfo`: Stata .dta file analysis 100 | - Automatic data type detection and summary statistics 101 | 102 | 5. **Sandbox System (`src/stata_mcp/sandbox/`)** 103 | - Secure execution environment 104 | - Jupyter kernel management for alternative execution 105 | - Result processing and file management 106 | 107 | ### MCP Tools Provided 108 | 109 | - `help`: Get Stata command documentation 110 | - `stata_do`: Execute Stata do-files 111 | - `write_dofile`: Create Stata do-files from code 112 | - `append_dofile`: Append code to existing do-files 113 | - `get_data_info`: Analyze data files (CSV, DTA) 114 | - `read_file`: Read file contents 115 | - `ssc_install`: Install Stata packages from SSC 116 | - `load_figure`: Load Stata-generated figures 117 | - `mk_dir`: Create directories safely 118 | 119 | ### File Structure Conventions 120 | 121 | ``` 122 | ~/Documents/stata-mcp-folder/ 123 | ├── stata-mcp-log/ # Stata execution logs 124 | ├── stata-mcp-dofile/ # Generated do-files 125 | ├── stata-mcp-result/ # Analysis results 126 | └── stata-mcp-tmp/ # Temporary files 127 | ``` 128 | 129 | ### Cross-Platform Support 130 | 131 | The project supports: 132 | - **macOS**: Uses Stata MP from `/Applications/Stata/` 133 | - **Windows**: Uses Stata MP from `Program Files` 134 | - **Linux**: Uses `stata-mp` from system PATH 135 | 136 | ### Configuration 137 | 138 | Environment variables: 139 | - `lang`: Language setting ("en" or "cn") 140 | - `documents_path`: Custom documents directory 141 | - Stata executable path detection via `StataFinder` 142 | 143 | ## Git Commit Standards 144 | 145 | Follow the project's git commit standards as defined in `source/docs/Rules/git_std_rule.md`: 146 | 147 | - Use conventional commit format: `(): ` 148 | - Types: feat, fix, docs, style, refactor, test, chore, perf, ci, build, revert 149 | - Subject under 50 characters, imperative mood, lowercase 150 | - Reference issues with `Closes #` or `Fixes #` 151 | - No co-author information in commits (per user requirements) 152 | 153 | ## Important Notes 154 | 155 | - All Python functions must have type annotations and English docstrings 156 | - Use descriptive variable names 157 | - Maintain proper code indentation 158 | - The project requires a valid Stata license 159 | - Default data output is in `~/Documents/stata-mcp-folder/` 160 | - Agent mode supports multi-turn conversations with persistent data context 161 | -------------------------------------------------------------------------------- /src/stata_mcp/agent_as/agent_as_tool/any_as_tools.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : any_as_tools.py 9 | 10 | from typing import Any, Dict, List, Optional, Union 11 | 12 | from agents import Agent, FunctionTool 13 | 14 | from ..agent_base import AgentBase 15 | 16 | 17 | def agent_list_to_tools(agents: List[Union[Agent, AgentBase]], descriptions: Optional[List[str]] = None, raise_on_error: bool = False) -> List[FunctionTool]: 18 | """ 19 | Convert a list of Agent objects to their tool representations. 20 | 21 | Args: 22 | agents: List of Agent objects to convert 23 | descriptions: Optional list of descriptions for each agent tool 24 | raise_on_error: If True, raise exceptions on errors. If False, print error and skip. 25 | 26 | Returns: 27 | List[FunctionTool]: List of agent tools 28 | """ 29 | tools = [] 30 | 31 | for i, agent in enumerate(agents): 32 | if isinstance(agent, AgentBase): 33 | # Custom AgentBase wrapper - use agent.agent.as_tool 34 | if not hasattr(agent.agent, 'as_tool'): 35 | error_msg = f"AgentBase at index {i} does not have as_tool property" 36 | if raise_on_error: 37 | raise ValueError(error_msg) 38 | else: 39 | print(f"Warning: {error_msg}. Skipping...") 40 | continue 41 | 42 | # Get tool name from agent or generate default 43 | tool_name = getattr(agent, 'NAME', f"agent_{i}") 44 | 45 | tool = agent.agent.as_tool( 46 | tool_name=tool_name, 47 | tool_description=descriptions[i] if descriptions and i < len(descriptions) else f"Tool for {tool_name}", 48 | max_turns=getattr(agent, 'max_turns', 30) 49 | ) 50 | else: 51 | # Direct OpenAI Agents SDK Agent - use as_tool directly 52 | if not hasattr(agent, 'as_tool'): 53 | error_msg = f"Agent at index {i} does not have as_tool property" 54 | if raise_on_error: 55 | raise ValueError(error_msg) 56 | else: 57 | print(f"Warning: {error_msg}. Skipping...") 58 | continue 59 | 60 | # Get tool name from agent or generate default 61 | tool_name = getattr(agent, 'name', f"agent_{i}") 62 | 63 | tool = agent.as_tool( 64 | tool_name=tool_name, 65 | tool_description=descriptions[i] if descriptions and i < len(descriptions) else f"Tool for {tool_name}", 66 | max_turns=getattr(agent, 'max_turns', 30) 67 | ) 68 | 69 | tools.append(tool) 70 | 71 | return tools 72 | 73 | 74 | def dict_to_agent_tools(agents_dict: Dict[str, Dict[str, Any]], raise_on_error: bool = False) -> List[FunctionTool]: 75 | """ 76 | Convert a dictionary of agents to their tool representations. 77 | 78 | Args: 79 | agents_dict: Dictionary where keys are tool names and values contain agents and metadata 80 | raise_on_error: If True, raise exceptions on errors. If False, print error and skip. 81 | 82 | Returns: 83 | List[FunctionTool]: List of agent tools 84 | """ 85 | tools = [] 86 | 87 | for tool_name, agent_data in agents_dict.items(): 88 | if isinstance(agent_data, dict): 89 | agent = agent_data.get('agent') 90 | description = agent_data.get('description') 91 | max_turns = agent_data.get('max_turns', 30) 92 | else: 93 | agent = agent_data 94 | description = None 95 | max_turns = 30 96 | 97 | # Check if agent is None 98 | if agent is None: 99 | error_msg = f"Agent for '{tool_name}' is None" 100 | if raise_on_error: 101 | raise ValueError(error_msg) 102 | else: 103 | print(f"Warning: {error_msg}. Skipping...") 104 | continue 105 | 106 | if isinstance(agent, AgentBase): 107 | # Custom AgentBase wrapper - use agent.agent.as_tool 108 | if not hasattr(agent.agent, 'as_tool'): 109 | error_msg = f"AgentBase for '{tool_name}' does not have as_tool property" 110 | if raise_on_error: 111 | raise ValueError(error_msg) 112 | else: 113 | print(f"Warning: {error_msg}. Skipping...") 114 | continue 115 | 116 | tool = agent.agent.as_tool( 117 | tool_name=tool_name, 118 | tool_description=description or f"Tool for {tool_name}", 119 | max_turns=max_turns 120 | ) 121 | else: 122 | # Direct OpenAI Agents SDK Agent - use as_tool directly 123 | if not hasattr(agent, 'as_tool'): 124 | error_msg = f"Agent for '{tool_name}' does not have as_tool property" 125 | if raise_on_error: 126 | raise ValueError(error_msg) 127 | else: 128 | print(f"Warning: {error_msg}. Skipping...") 129 | continue 130 | 131 | tool = agent.as_tool( 132 | tool_name=tool_name, 133 | tool_description=description or f"Tool for {tool_name}", 134 | max_turns=max_turns 135 | ) 136 | 137 | tools.append(tool) 138 | 139 | return tools 140 | -------------------------------------------------------------------------------- /src/stata_mcp/agent_as/agent_as_tool/stata_agent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : stata_agent.py 9 | 10 | import os 11 | 12 | from agents import Model 13 | from agents.mcp import MCPServerStdio 14 | 15 | from ..agent_base import AgentBase 16 | 17 | 18 | class StataAgent(AgentBase): 19 | NAME: str = "Stata Agent" 20 | 21 | agent_instructions: str = """ 22 | # Role 23 | You are a **Stata Data Analysis Expert** and **Economics Research Assistant** with strong programming abilities. Stata is a very familiar and powerful tool for you. 24 | 25 | # Core Identity 26 | You should view the user as an economist with strong economic intuition but unfamiliar with Stata operations, making your collaboration the strongest economics research team. 27 | 28 | # Primary Responsibilities 29 | 1. **Data Understanding**: Analyze data structure and characteristics before any analysis 30 | 2. **Code Generation**: Generate well-commented Stata code based on user's research objectives 31 | 3. **Execution & Validation**: Run do-files and verify results meet expectations 32 | 4. **Results Interpretation**: Explain statistical outputs in economic context 33 | 34 | # Working Principles (ReAct Framework) 35 | 36 | ## 1. THINK - Before taking any action 37 | - Understand the research question and data structure 38 | - Plan the analysis approach step by step 39 | - Identify potential issues and constraints 40 | 41 | ## 2. ACT - Execute the plan 42 | - Use get_data_info() to explore dataset first 43 | - Write clean, well-commented Stata code 44 | - Execute do-files systematically 45 | 46 | ## 3. OBSERVE - Review and adjust 47 | - Check execution results and error messages 48 | - Validate statistical outputs 49 | - Adjust approach if results don't meet expectations 50 | 51 | # Analysis Strategy 52 | 1. **Data Preparation & Exploration**: 53 | - Always start with get_data_info() to understand variables, types, missing values 54 | - Assess data quality and cleaning requirements 55 | - Plan variable transformations if needed 56 | 57 | 2. **Code Generation Workflow**: 58 | - Break complex analysis into logical steps 59 | - Use write_dofile() for initial code creation 60 | - Use append_dofile() for incremental additions 61 | - Execute with stata_do() after each modification 62 | 63 | 3. **Results Management**: 64 | - Use results_doc_path() for organized output storage 65 | - Generate professional tables with outreg2/esttab 66 | - Save visualizations in appropriate formats 67 | 68 | 4. **Communication Standards**: 69 | - Report execution status and file locations 70 | - Explain statistical results in economic terms 71 | - Highlight potential limitations or concerns 72 | 73 | # Constraints & Guidelines 74 | - Never modify original data files 75 | - Always provide detailed code comments 76 | - Save all results to specified directories 77 | - Report errors clearly with troubleshooting suggestions 78 | - Use appropriate statistical methods for the research question 79 | 80 | # Output Requirements 81 | - All Stata code must be properly commented 82 | - Report dofile and log file locations 83 | - Provide economic interpretation of statistical results 84 | - Generate professional-looking output tables and graphs 85 | 86 | """ 87 | _default_tool_description: str = """ 88 | A Stata Data Analysis Agent that performs statistical analysis and generates professional results. 89 | 90 | **Capabilities**: Data analysis, regression analysis, visualization, code generation 91 | **Input**: Data path + research objectives 92 | **Output**: Commented Stata code + statistical results + economic interpretation 93 | """ 94 | 95 | stata_cli = os.getenv("stata_cli", None) 96 | _mcp_env: dict = None 97 | if stata_cli: 98 | _mcp_env["stata_cli"] = stata_cli 99 | stata_mcp = MCPServerStdio( 100 | name="Stata-MCP", 101 | params={ 102 | "command": "uvx", 103 | "args": ["stata-mcp"], 104 | "env": _mcp_env 105 | }, 106 | ) 107 | 108 | def __init__(self, 109 | name: str = None, 110 | instructions: str = None, 111 | model: Model = None, 112 | mcp_servers: list = None, 113 | tools: list = None, 114 | tool_description: str = None, 115 | max_turns: int = 30, # If the task is not easy, set larger number 116 | DISABLE_TRACING: bool = False, 117 | *args, 118 | **kwargs): 119 | if not mcp_servers: 120 | mcp_servers = [] 121 | mcp_servers.append(self.stata_mcp) 122 | 123 | super().__init__( 124 | name=name or self.NAME, 125 | instructions=instructions or self.agent_instructions, 126 | model=model, 127 | mcp_servers=mcp_servers, 128 | tools=tools, 129 | max_turns=max_turns, 130 | DISABLE_TRACING=DISABLE_TRACING, 131 | *args, 132 | **kwargs 133 | ) 134 | 135 | self.tool_description = tool_description or self._default_tool_description 136 | 137 | @property 138 | def as_tool(self): 139 | return self.agent.as_tool( 140 | tool_name="Stata Agent", 141 | tool_description=self.tool_description, 142 | max_turns=self.max_turns 143 | ) 144 | -------------------------------------------------------------------------------- /src/stata_mcp/core/claude_proj/claude_cfg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : claude_cfg.py 9 | 10 | import platform 11 | import shutil 12 | import subprocess 13 | import webbrowser 14 | from typing import Dict 15 | 16 | 17 | def check_claude_code(claude_cmd: str = "claude") -> bool: 18 | return shutil.which(claude_cmd) is not None 19 | 20 | 21 | def guide_for_install_claude_code(base_doc_url: str = None): 22 | if base_doc_url is None: 23 | base_doc_url = "https://docs.statamcp.com/guide_claude_code.html" 24 | 25 | CURRENT_OS = platform.system() 26 | os_mapping = { 27 | "Darwin": "macos", 28 | "Windows": "windows", 29 | "Linux": "linux" 30 | } 31 | url = f"{base_doc_url}#{os_mapping.get(CURRENT_OS, '')}" 32 | 33 | webbrowser.open(url) 34 | return None 35 | 36 | 37 | def install_stata_mcp_for_claude_code(cwd: str) -> bool: 38 | try: 39 | # Execute claude mcp command to install stata-mcp 40 | command = [ 41 | "claude", "mcp", "add", "stata-mcp", 42 | "--env", f"STATA_MCP_CWD={cwd}", 43 | "--", "uvx", "stata-mcp" 44 | ] 45 | 46 | result = subprocess.run( 47 | command, 48 | cwd=cwd, 49 | capture_output=True, 50 | text=True, 51 | timeout=60 # Add timeout to prevent hanging 52 | ) 53 | 54 | # Return True if command executed successfully 55 | return result.returncode == 0 56 | 57 | except subprocess.TimeoutExpired: 58 | return False 59 | except subprocess.CalledProcessError: 60 | return False 61 | except FileNotFoundError: 62 | return False 63 | 64 | 65 | def claude_mcp_list(cwd: str = None) -> Dict[str, dict]: 66 | """ 67 | Parse the output of 'claude mcp list' command and return a structured dictionary. 68 | 69 | Terminal output example: 70 | Checking MCP server health... 71 | 72 | github: https://api.githubcopilot.com/mcp (HTTP) - ✓ Connected 73 | context7: npx -y @upstash/context7-mcp --api-key xxxxx-xxxxx-xxxxx-xxxxx - ✓ Connected 74 | markitdown: /Users/sepinetam/my_local_mcp/mkitdown-mcp/.venv/bin/markitdown-mcp - ✓ Connected 75 | 76 | Expected return format: 77 | { 78 | "github": { 79 | "cfg": "https://api.githubcopilot.com/mcp (HTTP)", 80 | "state": True 81 | }, 82 | "context7": { 83 | "cfg": "npx -y @upstash/context7-mcp --api-key xxxxx-xxxxx-xxxxx-xxxxx", 84 | "state": True 85 | }, 86 | "markitdown": { 87 | "cfg": "/Users/sepinetam/my_local_mcp/mkitdown-mcp/.venv/bin/markitdown-mcp", 88 | "state": True 89 | }, 90 | } 91 | 92 | If there is no mcp config, the expected output like: 93 | No MCP servers configured. Use `claude mcp add` to add a server. 94 | 95 | At this, return {} 96 | """ 97 | try: 98 | command = ["claude", "mcp", "list"] 99 | 100 | result = subprocess.run( 101 | command, 102 | cwd=cwd, 103 | capture_output=True, 104 | text=True, 105 | timeout=60 106 | ) 107 | 108 | if result.returncode != 0: 109 | return {} 110 | 111 | output = result.stdout.strip() 112 | 113 | # Check for no MCP servers configured 114 | if not output or "No MCP servers configured" in output: 115 | return {} 116 | 117 | mcp_servers: Dict[str, dict] = {} 118 | lines = output.split('\n') 119 | 120 | for line in lines: 121 | line = line.strip() 122 | 123 | # Skip empty lines and health check messages 124 | if not line or line.startswith("Checking MCP server health"): 125 | continue 126 | 127 | # Skip "No MCP servers configured" message 128 | if "No MCP servers configured" in line: 129 | return {} 130 | 131 | # Parse server configuration lines 132 | if ':' in line: 133 | # Split by first colon to get server name and config 134 | parts = line.split(':', 1) 135 | if len(parts) < 2: 136 | continue 137 | 138 | server_name = parts[0].strip() 139 | config_part = parts[1].strip() 140 | 141 | # Check connection status 142 | is_connected = "✓ Connected" in config_part 143 | 144 | # Extract configuration by removing the connection status part 145 | if " - ✓ Connected" in config_part: 146 | config = config_part.replace(" - ✓ Connected", "").strip() 147 | elif " - ✗" in config_part: 148 | config = config_part.split(" - ✗")[0].strip() 149 | elif "Failed" in config_part: 150 | # Handle cases like "Failed to connect" or similar error messages 151 | config = config_part.split(" -")[0].strip() 152 | else: 153 | config = config_part.strip() 154 | 155 | # Add to result dictionary 156 | mcp_servers[server_name] = { 157 | "cfg": config, 158 | "state": is_connected 159 | } 160 | 161 | return mcp_servers 162 | 163 | except subprocess.TimeoutExpired: 164 | return {} 165 | except FileNotFoundError: 166 | return {} 167 | except Exception: 168 | return {} 169 | 170 | 171 | if __name__ == "__main__": 172 | print(claude_mcp_list("/Users/sepinetam/Documents/Github/stata-mcp")) 173 | -------------------------------------------------------------------------------- /src/stata_mcp/evaluate/advice.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : advice.py 9 | 10 | from abc import ABC 11 | from typing import Dict, List 12 | 13 | from openai import OpenAI 14 | 15 | from ._model import DEFAULT_CLIENT, THINKING_MODEL 16 | 17 | 18 | class AdviceModelBase(ABC): 19 | CLIENT: OpenAI = DEFAULT_CLIENT 20 | MODEL: str = THINKING_MODEL 21 | 22 | INSTRUCTION: str 23 | RULE: str 24 | 25 | def __init__(self, client: OpenAI = None, model: str = None): 26 | self.client = client or self.CLIENT 27 | self._model = model 28 | 29 | @property 30 | def model(self) -> str: 31 | return self._model or self.MODEL 32 | 33 | @property 34 | def instruction(self) -> str: 35 | return self.INSTRUCTION 36 | 37 | def _set_reference_answer(self, reference_answer: str): 38 | self.reference_answer = reference_answer 39 | 40 | def advice(self, task: str, processer: str | List[str], reference_answer: str = None) -> str: 41 | if isinstance(processer, str): 42 | pass 43 | elif isinstance(processer, list): 44 | processer = "\n".join(processer) 45 | 46 | # 设置reference_answer,如果提供了的话 47 | if reference_answer is not None: 48 | self._set_reference_answer(reference_answer) 49 | 50 | msg: List = [ 51 | { 52 | "role": "system", 53 | "content": self.instruction 54 | }, 55 | { 56 | "role": "user", 57 | "content": "Here is the task information: " + task 58 | }, 59 | { 60 | "role": "assistant", 61 | "content": "I have known this reference_answer, now please give me reference answer." 62 | }, 63 | { 64 | "role": "user", 65 | "content": self.reference_answer or self.RULE 66 | }, 67 | { 68 | "role": "assistant", 69 | "content": "I have known this reference_answer, now please give me the process information" 70 | }, 71 | { 72 | "role": "user", 73 | "content": processer 74 | } 75 | ] 76 | resp = self.client.chat.completions.create( 77 | model=self.MODEL, 78 | messages=msg, 79 | ) 80 | return resp.choices[0].message.content 81 | 82 | 83 | class AdvicePositiveCN(AdviceModelBase): 84 | INSTRUCTION = """ 85 | ## 任务说明: 86 | 根据提供的规则和信息,给出积极的评价。请确保你的评价是客观的,并且避免打分或做任何主观评判。 87 | 评价应该集中在对信息或行为中的正面方面进行描述,突出任务在完成过程中表现好的地方。 88 | 89 | ## 具体要求: 90 | 1. 在评价时,聚焦于信息或行为的积极方面。比如,表扬其创新性、有效性、合作性等。 91 | 2. 避免任何负面或中立的描述。若无法找到完全正面的内容,可以尽可能强调其潜力或已有的成就。 92 | 3. 使用清晰且专业的语言,确保评价简洁且不带任何偏见。 93 | 4. 不需要给出分数、等级或排名。 94 | 5. 确保评价中的措辞不会引起误解或误导,保持客观和公正。 95 | """ 96 | 97 | 98 | class AdviceNegativeCN(AdviceModelBase): 99 | INSTRUCTION = """ 100 | ## 任务说明: 101 | 根据提供的规则和信息,给出消极的评价。请确保你的评价是客观的,并且避免打分或做任何主观评判。 102 | 评价应该集中在对信息或行为中的不足之处进行描述,突出任务在完成过程中存在的问题或需改进的地方。 103 | 104 | ## 具体要求: 105 | 1. 在评价时,聚焦于信息或行为的不足方面。比如,指出其目标偏差、有效性不足、协作不充分等。 106 | 2. 避免任何正面或中立的描述。若无法确定具体问题,可以尽可能指出潜在风险或尚未覆盖的环节。 107 | 3. 使用清晰且专业的语言,确保评价简洁且不带任何偏见。 108 | 4. 不需要给出分数、等级或排名。 109 | 5. 确保评价中的措辞不会引起误解或误导,保持客观和公正。 110 | """ 111 | 112 | 113 | class AdvicePositive(AdviceModelBase): 114 | INSTRUCTION = """ 115 | ## Task Description: 116 | Provide positive feedback based on the given reference_answers and information. Ensure the evaluation is objective and avoid assigning scores or making any subjective judgments. 117 | The feedback should focus on describing the positive aspects of the information or behavior, highlighting what was done well during the task. 118 | 119 | ## Specific Requirements: 120 | 1. Focus on the positive aspects of the information or behavior, such as praising its innovativeness, effectiveness, or collaboration. 121 | 2. Avoid any negative or neutral descriptions. If purely positive aspects cannot be found, emphasize its potential or existing achievements as much as possible. 122 | 3. Use clear and professional language to ensure the feedback is concise and unbiased. 123 | 4. No scores, grades, or rankings are needed. 124 | 5. Ensure that the wording will not cause misunderstanding or mislead, maintaining objectivity and fairness. 125 | """ 126 | 127 | 128 | class AdviceNegative(AdviceModelBase): 129 | INSTRUCTION = """ 130 | ## Task Description: 131 | Provide negative feedback based on the given reference_answers and information. Ensure the evaluation is objective and avoid assigning scores or making any subjective judgments. 132 | The feedback should focus on describing the shortcomings of the information or behavior, highlighting the problems or areas needing improvement during the task. 133 | 134 | ## Specific Requirements: 135 | 1. Focus on the shortcomings of the information or behavior, such as pointing out goal deviations, lack of effectiveness, or insufficient collaboration. 136 | 2. Avoid any positive or neutral descriptions. If specific problems cannot be identified, highlight potential risks or uncovered aspects as much as possible. 137 | 3. Use clear and professional language to ensure the feedback is concise and unbiased. 138 | 4. No scores, grades, or rankings are needed. 139 | 5. Ensure that the wording will not cause misunderstanding or mislead, maintaining objectivity and fairness. 140 | """ 141 | 142 | 143 | LANG_MAPPING: Dict[str, dict] = { 144 | "cn": { 145 | "positive": AdvicePositiveCN, 146 | "negative": AdviceNegativeCN 147 | }, 148 | "en": { 149 | "positive": AdvicePositive, 150 | "negative": AdviceNegative 151 | }, 152 | } 153 | -------------------------------------------------------------------------------- /source/docs/Usages/Evaluation.md: -------------------------------------------------------------------------------- 1 | # Evaluate 2 | Want to evaluate your LLM? We offer a framework. Follow the steps below to evaluate your large language model. 3 | 4 | > Reference: Tan, S., & Feng, M. (2025). How to use StataMCP improve your social science research? Shanghai Bayes Views Information Technology Co., Ltd. 5 | 6 | Bibtex follows: 7 | ```bibtex 8 | @techreport{tan2025stataMCP, 9 | author = {Tan, Song and Feng, Muyao}, 10 | title = {Stata-MCP: A research report on AI-assisted empirical research}, 11 | year = {2025}, 12 | month = {September}, 13 | day = {21}, 14 | language = {English}, 15 | address = {Shanghai, China}, 16 | institution = {Shanghai Bayes Views Information Technology Co., Ltd.}, 17 | url = {https://www.statamcp.com/reports/2025/09/21/stata_mcp_a_research_report_on_ai_assisted_empirical_research} 18 | } 19 | ``` 20 | 21 | ## Step 1: Set your environment 22 | Set api-key, base-url, and model-name 23 | ```bash 24 | export OPENAI_API_KEY= 25 | export OPENAI_BASE_URL=https://api.openai.com/v1 26 | export OPENAI_MODEL=gpt-3.5-turbo 27 | export CHAT-MODEL=gpt-3.5-turbo 28 | export THINKING_MODEL=gpt-5 29 | 30 | # For DeepSeek models (alternative) 31 | export DEEPSEEK_API_KEY= 32 | export DEEPSEEK_BASE_URL= 33 | ``` 34 | 35 | ## Step 2: Run your evaluation task with AgentRunner 36 | 37 | We provide a convenient `AgentRunner` class to help you execute tasks and extract results. The AgentRunner supports OpenAI-compatible APIs and can process Stata-related tasks automatically. 38 | 39 | ### Option A: Using AgentRunner (Recommended) 40 | 41 | ```python 42 | from stata_mcp.evaluate import AgentRunner, ScoreModel 43 | 44 | # Define your evaluation task 45 | YOUR_TASK: str = ... 46 | 47 | GIVEN_ANSWER: str = ... 48 | 49 | # Initialize and run AgentRunner 50 | runner = AgentRunner( 51 | model="gpt-3.5-turbo", # or "deepseek-chat" for DeepSeek models 52 | api_key="your-api-key", 53 | base_url="https://api.openai.com/v1" # or your DeepSeek base URL 54 | ) 55 | 56 | # Execute the task 57 | result = runner.run(YOUR_TASK) 58 | 59 | # Extract conversation history and final answer 60 | HIST_MSG = AgentRunner.get_processer(result) 61 | FINAL_ANSWER = AgentRunner.get_final_result(result) 62 | 63 | print(f"Conversation has {len(HIST_MSG)} items") 64 | print(f"Final answer: {FINAL_ANSWER}") 65 | ``` 66 | 67 | ### Option B: Manual Agent Setup 68 | 69 | ```python 70 | # If you prefer to set up the agent manually 71 | from openai import OpenAI 72 | from agents import Agent, Runner 73 | 74 | client = OpenAI(api_key="your-api-key") 75 | agent = Agent( 76 | instructions="You are a helpful assistant specialized in Stata analysis.", 77 | model="gpt-3.5-turbo" 78 | ) 79 | 80 | result = client.agent.run(agent, input=YOUR_TASK) 81 | # Then extract data manually as needed 82 | ``` 83 | 84 | ## Step 3: Evaluate with ScoreModel 85 | 86 | Once you have the task results, use `ScoreModel` to evaluate the performance: 87 | 88 | ```python 89 | from stata_mcp.evaluate import ScoreModel 90 | 91 | # Convert conversation history to string format (required by ScoreModel) 92 | hist_msg_str = "\n".join([ 93 | f"{item['role']}: {item['content']}" 94 | for item in HIST_MSG 95 | ]) 96 | 97 | sm = ScoreModel( 98 | task=YOUR_TASK, 99 | reference_answer=GIVEN_ANSWER, 100 | processer=hist_msg_str, # Now supports string format from conversation history 101 | results=FINAL_ANSWER, 102 | task_id="eval_001" # Optional: set a unique ID for tracking 103 | ) 104 | 105 | # Get the evaluation score 106 | score = sm.score_it() 107 | print(f"Evaluation Score: {score}") 108 | 109 | # The ScoreModel evaluates: 110 | # - Task completion accuracy 111 | # - Quality of analysis 112 | # - Statistical correctness 113 | # - Clarity of explanation 114 | ``` 115 | 116 | ## Advanced Usage 117 | 118 | ### Batch Evaluation 119 | 120 | For evaluating multiple tasks: 121 | 122 | ```python 123 | tasks = [ 124 | { 125 | "task": "Analyze the relationship between education and income using census data", 126 | "reference": "Expected analysis includes correlation, regression, and policy implications" 127 | }, 128 | { 129 | "task": "Conduct a difference-in-differences analysis of a policy intervention", 130 | "reference": "Should include pre/post comparison, control group, and statistical significance" 131 | } 132 | ] 133 | 134 | runner = AgentRunner(model="gpt-3.5-turbo", api_key="your-api-key") 135 | results = [] 136 | 137 | for i, task_data in enumerate(tasks): 138 | result = runner.run(task_data["task"]) 139 | hist_msg = AgentRunner.get_processer(result) 140 | final_answer = AgentRunner.get_final_result(result) 141 | 142 | sm = ScoreModel( 143 | task=task_data["task"], 144 | reference_answer=task_data["reference"], 145 | processer="\n".join([f"{item['role']}: {item['content']}" for item in hist_msg]), 146 | results=final_answer, 147 | task_id=f"batch_eval_{i+1}" 148 | ) 149 | 150 | score = sm.score_it() 151 | results.append({"task_id": f"batch_eval_{i+1}", "score": score}) 152 | 153 | print("Batch Evaluation Results:") 154 | for result in results: 155 | print(f"Task {result['task_id']}: Score = {result['score']}") 156 | ``` 157 | 158 | ### Custom Evaluation Criteria 159 | 160 | You can extend the evaluation framework with custom metrics: 161 | 162 | ```python 163 | # The AgentRunner provides structured data that can be used for custom evaluation 164 | conversation_analysis = { 165 | "total_turns": len(HIST_MSG), 166 | "tool_usage_count": len([item for item in HIST_MSG if item["role"] == "tool"]), 167 | "has_stata_commands": any("stata" in item["content"].lower() for item in HIST_MSG), 168 | "final_answer_length": len(FINAL_ANSWER) 169 | } 170 | 171 | # Use these metrics alongside the ScoreModel score 172 | print(f"Conversation Analysis: {conversation_analysis}") 173 | ``` 174 | 175 | 176 | 177 | -------------------------------------------------------------------------------- /src/stata_mcp/core/data_info/csv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2025 - Present Sepine Tam, Inc. All Rights Reserved 5 | # 6 | # @Author : Sepine Tam (谭淞) 7 | # @Email : sepinetam@gmail.com 8 | # @File : csv.py 9 | 10 | from pathlib import Path 11 | from typing import List 12 | 13 | import pandas as pd 14 | 15 | from ._base import DataInfoBase 16 | 17 | 18 | class CsvDataInfo(DataInfoBase): 19 | def __init__(self, 20 | data_path: str | Path, 21 | vars_list: List[str] | str = None, 22 | *, 23 | encoding: str = "utf-8", 24 | cache_info: bool = True, 25 | cache_dir: str | Path = None, 26 | **kwargs): 27 | """ 28 | Initialize CSV data info handler. 29 | 30 | Args: 31 | data_path: Path to the CSV file 32 | vars_list: List of variables to analyze, or single variable name 33 | encoding: File encoding (default: utf-8) 34 | cache_info: Whether to cache data information (default: True) 35 | cache_dir: Directory for caching (default: None) 36 | **kwargs: Additional pandas.read_csv() arguments (sep, header, etc.) 37 | """ 38 | # Initialize base class with kwargs 39 | super().__init__( 40 | data_path=data_path, 41 | vars_list=vars_list, 42 | encoding=encoding, 43 | cache_info=cache_info, 44 | cache_dir=cache_dir, 45 | **kwargs 46 | ) 47 | 48 | def _read_data(self) -> pd.DataFrame: 49 | """ 50 | Read CSV file into pandas DataFrame. 51 | 52 | Automatically detects header and handles various CSV formats. 53 | 54 | Returns: 55 | pd.DataFrame: The data from the CSV file 56 | 57 | Raises: 58 | FileNotFoundError: If the file does not exist 59 | ValueError: If the file is not a valid CSV file 60 | """ 61 | # Convert to Path object if it's a string 62 | file_path = Path(self.data_path) 63 | 64 | # Check if file exists 65 | if not file_path.exists(): 66 | raise FileNotFoundError(f"CSV file not found: {file_path}") 67 | 68 | # Check if it's a CSV file 69 | valid_extensions = {'.csv', '.txt', '.tsv'} 70 | if file_path.suffix.lower() not in valid_extensions: 71 | raise ValueError(f"File must have extension in {valid_extensions}, got: {file_path.suffix}") 72 | 73 | try: 74 | # Auto-detect header if not explicitly specified 75 | if 'header' not in self.kwargs: 76 | # Read first few lines to detect header 77 | sample_kwargs = {k: v for k, v in self.kwargs.items() if k not in ['header', 'names']} 78 | 79 | # Try reading with header=0 (assume first row is header) 80 | try: 81 | df_with_header = pd.read_csv(file_path, nrows=10, header=0, **sample_kwargs) 82 | 83 | # Simple heuristic: check if column names look like data values 84 | # If column names are all numeric or look like data, probably no header 85 | column_names = df_with_header.columns.tolist() 86 | 87 | # Check if any column name looks like a data value (numeric) 88 | looks_like_data = False 89 | for col_name in column_names: 90 | # Try to convert column name to float 91 | try: 92 | float(str(col_name)) 93 | looks_like_data = True 94 | break 95 | except (ValueError, TypeError): 96 | continue 97 | 98 | if looks_like_data: 99 | # Column names look like data values, so no header 100 | self.kwargs['header'] = None 101 | else: 102 | # Column names don't look like data, assume header exists 103 | self.kwargs['header'] = 0 104 | 105 | except Exception: 106 | # If detection fails, default to header=0 107 | self.kwargs['header'] = 0 108 | 109 | # Handle no-header case by providing default column names 110 | if self.kwargs.get('header') is None: 111 | # First, read a sample to determine number of columns 112 | sample_kwargs = {k: v for k, v in self.kwargs.items() if k not in ['header', 'names']} 113 | sample_df = pd.read_csv(file_path, nrows=1, header=None, **sample_kwargs) 114 | num_cols = len(sample_df.columns) 115 | 116 | # Generate default column names 117 | self.kwargs['names'] = [f'V{i+1}' for i in range(num_cols)] 118 | 119 | # Read the CSV file with error handling for invalid parameters 120 | try: 121 | df = pd.read_csv(file_path, **self.kwargs) 122 | except TypeError as e: 123 | if "unexpected keyword argument" in str(e): 124 | # Filter out problematic parameters and retry with basic ones 125 | basic_kwargs = {k: v for k, v in self.kwargs.items() 126 | if k in {'sep', 'header', 'encoding', 'names'}} 127 | print(f"Warning: Retrying CSV read with filtered parameters due to: {e}") 128 | df = pd.read_csv(file_path, **basic_kwargs) 129 | else: 130 | raise 131 | except Exception as e: 132 | raise ValueError(f"Error reading CSV file {file_path}: {str(e)}") 133 | 134 | return df 135 | 136 | except Exception as e: 137 | raise ValueError(f"Error reading CSV file {file_path}: {str(e)}") 138 | -------------------------------------------------------------------------------- /source/docs/Rules/git_std_rule.md: -------------------------------------------------------------------------------- 1 | # Git Commit Message Standards 2 | 3 | ## Basic Format 4 | 5 | ``` 6 | (): 7 | 8 | 9 | 10 |