├── .gitignore ├── COMMERCIAL_LICENSE.md ├── LICENSE ├── README.md ├── assets ├── 1pga_gmx_copilot_animation.gif ├── prot_lig.gif ├── prot_lig_rmsf.pdf └── report.pdf ├── gromacs_copilot ├── __init__.py ├── __main__.py ├── cli.py ├── config.py ├── core │ ├── __init__.py │ ├── enums.py │ └── md_agent.py ├── mcp_server.py ├── protocols │ ├── __init__.py │ ├── analysis.py │ ├── base.py │ ├── mmpbsa.py │ ├── protein.py │ └── protein_ligand.py └── utils │ ├── __init__.py │ ├── logging_utils.py │ ├── shell.py │ └── terminal.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ 169 | 170 | # PyPI configuration file 171 | .pypirc 172 | 173 | experimental/ 174 | examples/ -------------------------------------------------------------------------------- /COMMERCIAL_LICENSE.md: -------------------------------------------------------------------------------- 1 | # Commercial License for gromacs_copilot 2 | 3 | This software is dual-licensed under: 4 | 5 | 1. **GNU General Public License v3.0 (GPLv3)** 6 | - Free to use, modify, and distribute under **GPL terms**. 7 | - Any derivative work **must also be open-sourced** under the same GPL license. 8 | 9 | 2. **Commercial License** 10 | - If you wish to use this software **without GPL restrictions** (e.g., for proprietary software, SaaS products, or internal business applications), a commercial license is available. 11 | - Contact us at jinyuansun_at_chatmol.org to discuss licensing options. 12 | 13 | ## Benefits of the Commercial License: 14 | ✅ Use in closed-source or proprietary projects. 15 | ✅ No obligation to disclose your modifications. 16 | ✅ Official support and priority updates. 17 | 18 | For inquiries, please email **jinyuansun_at_chatmol.org** or visit **[our website](https://chatmol.org/)**. 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This project is licensed under the terms of the GNU General Public License v3.0. 2 | 3 | You can redistribute and modify this project under the terms of the GNU General Public License as published by the Free Software Foundation. 4 | 5 | However, if you wish to use this software without the restrictions of the GPL (e.g., for proprietary or commercial use), please contact us for a commercial license. 6 | 7 | See the full GPL-3.0 license at: https://www.gnu.org/licenses/gpl-3.0.html 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GROMACS Copilot 2 | **Let LLM run your MDs.** 3 | 4 | The good news: 🎉 You now have more time to hang out with your cat! 🐱💖 5 | The bad news: 😢 You'll miss out on GROMACS' legendary wisdom... 🧙‍♂️💬 6 | 7 | ## Introduction 8 | This agent automates **MD simulations** for proteins in water using **GROMACS**. It sets up the system, runs simulations, and analyzes **RMSD, RMSF, Rg, H-bonds**, etc. 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 19 | 22 | 23 | 24 | 27 | 30 | 31 |
ProteinProtein-Ligand Complex
17 | 1pga 18 | 20 | 3wzm 21 |
25 | A demo of output report 26 | 28 | A demo of output report 29 |
32 | 33 | ## How to Run 34 | 35 | ### Before using a LLM 36 | 1. Install the package 37 | ```bash 38 | pip install git+https://github.com/ChatMol/gromacs_copilot.git 39 | conda install -c conda-forge acpype # for protein-ligand complex 40 | conda install -c conda-forge gmx_mmpbsa # for MM-PBSA/GBSA analysis 41 | ``` 42 | 2. Prepare a working dir and a input pdb 43 | ```bash 44 | mkdir md_workspace && cd md_workspace 45 | wget https://files.rcsb.org/download/1PGA.pdb 46 | grep -v HOH 1PGA.pdb > 1pga_protein.pdb 47 | cd .. 48 | ``` 49 | 50 | ### Using DeepSeek 51 | ```bash 52 | gmx_copilot --workspace md_workspace/ \ 53 | --prompt "setup simulation system for 1pga_protein.pdb in the workspace" \ 54 | --api-key $DEEPSEEK_API_KEY \ 55 | --model deepseek-chat \ 56 | --url https://api.deepseek.com/chat/completions 57 | ``` 58 | 59 | ### Using OpenAI 60 | ```bash 61 | gmx_copilot --workspace md_workspace/ \ 62 | --prompt "setup simulation system for 1pga_protein.pdb in the workspace" \ 63 | --api-key $OPENAI_API_KEY \ 64 | --model gpt-4o \ 65 | --url https://api.openai.com/v1/chat/completions 66 | ``` 67 | 68 | ### Using Gemini 69 | ```bash 70 | gmx_copilot --workspace md_workspace/ \ 71 | --prompt "setup simulation system for 1pga_protein.pdb in the workspace" \ 72 | --api-key $GEMINI_API_KEY \ 73 | --model gemini-2.0-flash \ 74 | --url https://generativelanguage.googleapis.com/v1beta/chat/completions 75 | ``` 76 | 77 | 3. Agent mode 78 | The agent mode is good automation of a long acting trajectory of using tools. 79 | ```bash 80 | gmx_copilot --workspace md_workspace/ \ 81 | --prompt "run 1 ns production md for 1pga_protein.pdb in the workspace, and analyze rmsd" \ 82 | --mode agent 83 | ``` 84 | 85 | The agent handles **system setup, simulation execution, and result analysis** automatically. 🚀 86 | 87 | 88 | ## License 89 | This project is dual-licensed under: 90 | - **GPLv3** (Open Source License) 91 | - **Commercial License** (For proprietary use) 92 | 93 | For commercial licensing, [read this](COMMERCIAL_LICENSE.md). 94 | 95 | ## Known issues 96 | 1. 🤖 LLM sometimes struggles with selecting the correct group index. Double-checking the selection is recommended. 97 | 2. ⚡ The interaction between LLM and `gmx` prompt input isn't always seamless. Running commands based on suggestions can help you get the correct results more easily. 98 | 99 | ## Disclaimer 100 | 101 | GROMACS Copilot is provided "as is" without warranty of any kind, express or implied. The authors and contributors disclaim all warranties including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose. Users employ this software at their own risk. 102 | 103 | The authors bear no responsibility for any consequences arising from the use, misuse, or misinterpretation of this software or its outputs. Results obtained through GROMACS Copilot should be independently validated prior to use in research, publications, or decision-making processes. 104 | 105 | This software is intended for research and educational purposes only. Users are solely responsible for ensuring compliance with applicable laws, regulations, and ethical standards in their jurisdiction. -------------------------------------------------------------------------------- /assets/1pga_gmx_copilot_animation.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChatMol/gromacs_copilot/c0d75a5eea8d6cc0a00b523ef8455380ade237a7/assets/1pga_gmx_copilot_animation.gif -------------------------------------------------------------------------------- /assets/prot_lig.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChatMol/gromacs_copilot/c0d75a5eea8d6cc0a00b523ef8455380ade237a7/assets/prot_lig.gif -------------------------------------------------------------------------------- /assets/prot_lig_rmsf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChatMol/gromacs_copilot/c0d75a5eea8d6cc0a00b523ef8455380ade237a7/assets/prot_lig_rmsf.pdf -------------------------------------------------------------------------------- /assets/report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChatMol/gromacs_copilot/c0d75a5eea8d6cc0a00b523ef8455380ade237a7/assets/report.pdf -------------------------------------------------------------------------------- /gromacs_copilot/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | GROMACS Copilot - AI-assisted molecular dynamics simulations 3 | ============================================================ 4 | 5 | A tool for automating GROMACS molecular dynamics simulations 6 | with AI assistance to guide setup, execution, and analysis. 7 | 8 | Created by the ChatMol Team 9 | """ 10 | 11 | __version__ = "0.2.0" 12 | __author__ = "ChatMol Team" 13 | __email__ = "jinyuansun@chatmol.org" -------------------------------------------------------------------------------- /gromacs_copilot/__main__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Entry point for running GROMACS Copilot as a module 3 | """ 4 | 5 | from gromacs_copilot.cli import main 6 | 7 | if __name__ == "__main__": 8 | main() -------------------------------------------------------------------------------- /gromacs_copilot/cli.py: -------------------------------------------------------------------------------- 1 | """ 2 | Command-line interface for GROMACS Copilot 3 | """ 4 | 5 | import os 6 | import sys 7 | import argparse 8 | import logging 9 | 10 | from gromacs_copilot.core.md_agent import MDLLMAgent 11 | from gromacs_copilot.utils.terminal import Colors, print_message 12 | from gromacs_copilot.utils.logging_utils import setup_logging 13 | from gromacs_copilot.core.enums import MessageType 14 | from gromacs_copilot.config import DEFAULT_WORKSPACE, DEFAULT_MODEL, DEFAULT_OPENAI_URL 15 | 16 | 17 | def parse_arguments(): 18 | """ 19 | Parse command-line arguments 20 | 21 | Returns: 22 | argparse.Namespace: Parsed arguments 23 | """ 24 | parser = argparse.ArgumentParser(description="GROMACS Copilot") 25 | parser.add_argument("--api-key", help="API key for LLM service") 26 | parser.add_argument("--url", 27 | help=( 28 | "The url of the LLM service, " 29 | "\ndeepseek: https://api.deepseek.com/chat/completions" 30 | "\nopenai: https://api.openai.com/v1/chat/completions" 31 | ), 32 | default=DEFAULT_OPENAI_URL) 33 | parser.add_argument("--model", default=DEFAULT_MODEL, help="Model to use for LLM") 34 | parser.add_argument("--workspace", default=DEFAULT_WORKSPACE, help="Workspace directory") 35 | parser.add_argument("--prompt", help="Starting prompt for the LLM") 36 | parser.add_argument("--no-color", action="store_true", help="Disable colored output") 37 | parser.add_argument("--log-file", default="md_agent.log", help="Log file path") 38 | parser.add_argument("--log-level", default="INFO", 39 | choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], 40 | help="Logging level") 41 | parser.add_argument("--mode", default="copilot", choices=['copilot', 'agent'], 42 | help="The copilot mode or agent mode, copilot will be more like a advisor." 43 | ) 44 | 45 | return parser.parse_args() 46 | 47 | 48 | def main(): 49 | """ 50 | Main entry point for the CLI 51 | """ 52 | # Parse command line arguments 53 | args = parse_arguments() 54 | 55 | # Setup logging 56 | log_level = getattr(logging, args.log_level) 57 | setup_logging(args.log_file, level=log_level) 58 | 59 | # Disable colors if requested or if not in a terminal 60 | if args.no_color or not sys.stdout.isatty(): 61 | Colors.disable_colors() 62 | 63 | # Display splash screen 64 | print_message("", style="divider") 65 | print_message("GROMACS Copilot", MessageType.TITLE, style="box") 66 | print_message("A molecular dynamics simulation assistant powered by AI, created by the ChatMol Team.", MessageType.INFO) 67 | print_message("", style="divider") 68 | 69 | try: 70 | # Check for API key 71 | if args.url == "https://api.openai.com/v1/chat/completions": 72 | api_key = args.api_key or os.environ.get("OPENAI_API_KEY") 73 | elif args.url == "https://api.deepseek.com/chat/completions": 74 | api_key = args.api_key or os.environ.get("DEEPSEEK_API_KEY") 75 | else: 76 | api_key = args.api_key 77 | 78 | if not api_key: 79 | print_message( 80 | "API key not found. Please provide an API key using --api-key or set the " 81 | "OPENAI_API_KEY or DEEPSEEK_API_KEY environment variable.", 82 | MessageType.ERROR 83 | ) 84 | sys.exit(1) 85 | 86 | # Create and run MD LLM agent 87 | print_message(f"Initializing with model: {args.model}", MessageType.INFO) 88 | print_message(f"Using workspace: {args.workspace}", MessageType.INFO) 89 | 90 | agent = MDLLMAgent( 91 | api_key=api_key, 92 | model=args.model, 93 | workspace=args.workspace, 94 | url=args.url, 95 | mode=args.mode 96 | ) 97 | agent.run(starting_prompt=args.prompt) 98 | 99 | except KeyboardInterrupt: 100 | print_message("\nExiting the MD agent. Thank you for using GROMACS Copilot!", 101 | MessageType.SUCCESS, style="box") 102 | except Exception as e: 103 | error_msg = str(e) 104 | logging.error(f"Error running MD LLM agent: {error_msg}") 105 | print_message(f"Error running MD LLM agent: {error_msg}", 106 | MessageType.ERROR, style="box") 107 | 108 | 109 | if __name__ == "__main__": 110 | main() -------------------------------------------------------------------------------- /gromacs_copilot/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration constants and settings for GROMACS Copilot 3 | """ 4 | 5 | # Default settings 6 | DEFAULT_WORKSPACE = "./md_workspace" 7 | DEFAULT_MODEL = "gpt-4o" 8 | DEFAULT_OPENAI_URL = "https://api.openai.com/v1/chat/completions" 9 | DEFAULT_DEEPSEEK_URL = "https://api.deepseek.com/chat/completions" 10 | 11 | # Force fields 12 | FORCE_FIELDS = { 13 | "AMBER99SB-ILDN": "amber99sb-ildn", 14 | "CHARMM36": "charmm36-feb2021", 15 | "GROMOS96 53a6": "gromos53a6", 16 | "OPLS-AA/L": "oplsaa" 17 | } 18 | 19 | # Water models 20 | WATER_MODELS = ["spc", "tip3p", "tip4p"] 21 | 22 | # Box types 23 | BOX_TYPES = ["cubic", "dodecahedron", "octahedron"] 24 | 25 | # MDP file types 26 | MDP_TYPES = ["ions", "em", "nvt", "npt", "md"] 27 | 28 | # Default MDP parameters 29 | DEFAULT_MDP_PARAMS = { 30 | "ions": { 31 | "integrator": "steep", 32 | "emtol": 1000.0, 33 | "emstep": 0.01, 34 | "nsteps": 50000, 35 | "nstlist": 1, 36 | "cutoff-scheme": "Verlet", 37 | "ns_type": "grid", 38 | "coulombtype": "cutoff", 39 | "rcoulomb": 1.0, 40 | "rvdw": 1.0, 41 | "pbc": "xyz" 42 | }, 43 | "em": { 44 | "integrator": "steep", 45 | "emtol": 1000.0, 46 | "emstep": 0.01, 47 | "nsteps": 50000, 48 | "nstlist": 1, 49 | "cutoff-scheme": "Verlet", 50 | "ns_type": "grid", 51 | "coulombtype": "PME", 52 | "rcoulomb": 1.0, 53 | "rvdw": 1.0, 54 | "pbc": "xyz" 55 | }, 56 | "nvt": { 57 | "title": "Protein-ligand complex NVT equilibration", 58 | "define": "-DPOSRES", 59 | "integrator": "md", 60 | "nsteps": 50000, 61 | "dt": 0.002, 62 | "nstxout": 500, 63 | "nstvout": 500, 64 | "nstenergy": 500, 65 | "nstlog": 500, 66 | "continuation": "no", 67 | "constraint_algorithm": "lincs", 68 | "constraints": "h-bonds", 69 | "lincs_iter": 1, 70 | "lincs_order": 4, 71 | "cutoff-scheme": "Verlet", 72 | "ns_type": "grid", 73 | "nstlist": 10, 74 | "rcoulomb": 1.0, 75 | "rvdw": 1.0, 76 | "DispCorr": "EnerPres", 77 | "coulombtype": "PME", 78 | "pme_order": 4, 79 | "fourierspacing": 0.16, 80 | "tcoupl": "V-rescale", 81 | "tc-grps": "Protein Non-Protein", 82 | "tau_t": "0.1 0.1", 83 | "ref_t": "300 300", 84 | "pcoupl": "no", 85 | "pbc": "xyz", 86 | "gen_vel": "yes", 87 | "gen_temp": 300, 88 | "gen_seed": -1 89 | }, 90 | "npt": { 91 | "title": "Protein-ligand complex NPT equilibration", 92 | "define": "-DPOSRES", 93 | "integrator": "md", 94 | "nsteps": 50000, 95 | "dt": 0.002, 96 | "nstxout": 500, 97 | "nstvout": 500, 98 | "nstenergy": 500, 99 | "nstlog": 500, 100 | "continuation": "yes", 101 | "constraint_algorithm": "lincs", 102 | "constraints": "h-bonds", 103 | "lincs_iter": 1, 104 | "lincs_order": 4, 105 | "cutoff-scheme": "Verlet", 106 | "ns_type": "grid", 107 | "nstlist": 10, 108 | "rcoulomb": 1.0, 109 | "rvdw": 1.0, 110 | "DispCorr": "EnerPres", 111 | "coulombtype": "PME", 112 | "pme_order": 4, 113 | "fourierspacing": 0.16, 114 | "tcoupl": "V-rescale", 115 | "tc-grps": "Protein Non-Protein", 116 | "tau_t": "0.1 0.1", 117 | "ref_t": "300 300", 118 | "pcoupl": "Parrinello-Rahman", 119 | "pcoupltype": "isotropic", 120 | "tau_p": 2.0, 121 | "ref_p": 1.0, 122 | "compressibility": 4.5e-5, 123 | "refcoord_scaling": "com", 124 | "pbc": "xyz", 125 | "gen_vel": "no" 126 | }, 127 | "md": { 128 | "title": "Protein-ligand complex MD simulation", 129 | "integrator": "md", 130 | "nsteps": 5000000, # Default 10 ns 131 | "dt": 0.002, 132 | "nstxout": 5000, 133 | "nstvout": 5000, 134 | "nstenergy": 5000, 135 | "nstlog": 5000, 136 | "nstxout-compressed": 5000, 137 | "compressed-x-grps": "System", 138 | "continuation": "yes", 139 | "constraint_algorithm": "lincs", 140 | "constraints": "h-bonds", 141 | "lincs_iter": 1, 142 | "lincs_order": 4, 143 | "cutoff-scheme": "Verlet", 144 | "ns_type": "grid", 145 | "nstlist": 10, 146 | "rcoulomb": 1.0, 147 | "rvdw": 1.0, 148 | "DispCorr": "EnerPres", 149 | "coulombtype": "PME", 150 | "pme_order": 4, 151 | "fourierspacing": 0.16, 152 | "tcoupl": "V-rescale", 153 | "tc-grps": "Protein Non-Protein", 154 | "tau_t": "0.1 0.1", 155 | "ref_t": "300 300", 156 | "pcoupl": "Parrinello-Rahman", 157 | "pcoupltype": "isotropic", 158 | "tau_p": 2.0, 159 | "ref_p": 1.0, 160 | "compressibility": 4.5e-5, 161 | "pbc": "xyz", 162 | "gen_vel": "no" 163 | } 164 | } 165 | 166 | # Standard residues list 167 | STANDARD_RESIDUES = [ 168 | "ALA", "ARG", "ASN", "ASP", "CYS", "GLN", "GLU", "GLY", "HIS", "ILE", 169 | "LEU", "LYS", "MET", "PHE", "PRO", "SER", "THR", "TRP", "TYR", "VAL", 170 | "HOH", "WAT", "TIP", "SOL", "NA", "CL", "K", "CA", "MG", "ZN" 171 | ] 172 | 173 | # System message for LLM 174 | SYSTEM_MESSAGE_ADVISOR = """You are an expert molecular dynamics (MD) assistant that helps run GROMACS simulations. 175 | 176 | Your primary goal is to guide the user through setting up and running MD simulations for protein systems. 177 | You have access to various functions to interact with GROMACS and manage simulations. 178 | 179 | 1. First, you should check if GROMACS is installed using check_gromacs_installation() 180 | 2. Guide the user through the entire MD workflow in these stages: 181 | - Setup: Get protein file and prepare workspace 182 | - Prepare Protein: Generate topology with appropriate force field 183 | - Solvation: Add water and ions to the system 184 | - Energy Minimization: Remove bad contacts 185 | - Equilibration: Equilibrate the system (NVT and NPT) 186 | - Production: Run the actual MD simulation 187 | - Analysis: Analyze results (RMSD, RMSF, etc.) 188 | 3. The default protocol is protein only, for other functions, switch to corresponding protocol first. 189 | - MM/GBSA: switch_to_mmpbsa_protocol 190 | - Protein-Ligand complex: set_ligand 191 | 192 | 193 | IMPORTANT: When running GROMACS commands that require interactive group selection, ALWAYS use echo commands to pipe the selection to the GROMACS command. For example: 194 | - Instead of: gmx rms -s md.tpr -f md.xtc -o rmsd.xvg 195 | - Use: echo "Protein Protein" | gmx rms -s md.tpr -f md.xtc -o rmsd.xvg 196 | 197 | 198 | For each step: 199 | 1. Explain what you're doing and why 200 | 2. Execute the necessary functions to perform the actions 201 | 3. Check the results and handle any errors 202 | 4. Ask the user for input when needed 203 | 204 | 205 | When you reach a point where you're waiting for the user's response or you've completed 206 | the current stage of the workflow, end your response with: "This is the final answer at this stage." 207 | 208 | Always provide clear explanations for technical concepts, and guide the user through the 209 | entire process from start to finish. 210 | """ 211 | 212 | SYSTEM_MESSAGE_AGENT = """You are an autonomous MD agent that runs GROMACS simulations for the user. 213 | 214 | Your primary goal is to execute molecular dynamics simulations of proteins and protein-ligand systems as requested by the user. Take direct action, making reasonable default choices when parameters aren't specified. 215 | 216 | 1. First, check if GROMACS is installed using check_gromacs_installation() 217 | 2. Execute the MD workflow efficiently 218 | 3. The default protocol is protein only, for other functions, switch to corresponding protocol first. 219 | - MM/GBSA: switch_to_mmpbsa_protocol 220 | - Protein-Ligand complex: set_ligand 221 | 222 | IMPORTANT: When running GROMACS commands that require interactive group selection, use echo commands: 223 | - Use: echo "Protein Protein" | gmx rms -s md.tpr -f md.xtc -o rmsd.xvg 224 | 225 | For each action: 226 | 1. Execute the necessary functions without asking for confirmation 227 | 2. Check results and solve problems autonomously 228 | 3. Explain what you're doing briefly but focus on execution 229 | 4. Only ask for input when absolutely necessary 230 | 231 | Keep in mind: 232 | - Select reasonable default parameters when not specified 233 | - Handle protein-ligand systems automatically when detected 234 | 235 | When you complete a stage or need user input, end with: "This is the final answer at this stage." 236 | 237 | Focus on efficiently completing the requested simulation with minimal user intervention. 238 | """ -------------------------------------------------------------------------------- /gromacs_copilot/core/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Core module for GROMACS Copilot 3 | """ 4 | 5 | from gromacs_copilot.core.enums import SimulationStage, MessageType 6 | from gromacs_copilot.core.md_agent import MDLLMAgent 7 | 8 | __all__ = [ 9 | 'SimulationStage', 10 | 'MessageType', 11 | 'MDLLMAgent' 12 | ] -------------------------------------------------------------------------------- /gromacs_copilot/core/enums.py: -------------------------------------------------------------------------------- 1 | """ 2 | Enumerations for GROMACS Copilot 3 | """ 4 | 5 | from enum import Enum, auto 6 | 7 | class SimulationStage(Enum): 8 | """Stages of the MD simulation workflow""" 9 | SETUP = auto() 10 | PREPARE_PROTEIN = auto() 11 | PREPARE_LIGAND = auto() # For protein-ligand simulations 12 | PREPARE_COMPLEX = auto() # For protein-ligand simulations 13 | SOLVATION = auto() 14 | ENERGY_MINIMIZATION = auto() 15 | EQUILIBRATION = auto() 16 | PRODUCTION = auto() 17 | ANALYSIS = auto() 18 | COMPLETED = auto() 19 | 20 | class MessageType(Enum): 21 | """Types of messages for terminal output""" 22 | INFO = auto() 23 | SUCCESS = auto() 24 | WARNING = auto() 25 | ERROR = auto() 26 | TITLE = auto() 27 | SYSTEM = auto() 28 | USER = auto() 29 | COMMAND = auto() 30 | TOOL = auto() 31 | FINAL = auto() -------------------------------------------------------------------------------- /gromacs_copilot/core/md_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main MD Agent class for GROMACS Copilot 3 | """ 4 | 5 | import os 6 | import json 7 | import logging 8 | import requests 9 | from typing import List, Dict, Any, Optional, Union 10 | 11 | from gromacs_copilot.protocols.protein import ProteinProtocol 12 | from gromacs_copilot.protocols.protein_ligand import ProteinLigandProtocol 13 | from gromacs_copilot.protocols.mmpbsa import MMPBSAProtocol 14 | from gromacs_copilot.protocols.analysis import AnalysisProtocol 15 | 16 | from gromacs_copilot.utils.terminal import print_message, prompt_user 17 | from gromacs_copilot.core.enums import MessageType, SimulationStage 18 | from gromacs_copilot.config import SYSTEM_MESSAGE_ADVISOR, SYSTEM_MESSAGE_AGENT 19 | 20 | 21 | class MDLLMAgent: 22 | """LLM-based agent for running molecular dynamics simulations with GROMACS""" 23 | 24 | def __init__(self, api_key: str = None, model: str = "gpt-4o", 25 | workspace: str = "./md_workspace", 26 | url: str = "https://api.openai.com/v1/chat/completions", mode: str = "copilot", gmx_bin: str = "gmx"): 27 | """ 28 | Initialize the MD LLM agent 29 | 30 | Args: 31 | api_key: API key for LLM service 32 | model: Model to use for LLM 33 | workspace: Directory to use as the working directory 34 | url: URL of the LLM API endpoint 35 | """ 36 | self.api_key = api_key or os.environ.get("OPENAI_API_KEY") 37 | self.url = url 38 | if not self.api_key: 39 | raise ValueError("API key is required. Provide as parameter or set OPENAI_API_KEY environment variable") 40 | 41 | self.model = model 42 | self.conversation_history = [] 43 | self.workspace = workspace 44 | self.gmx_bin = gmx_bin 45 | 46 | # Initialize protocol (will be set to protein or protein-ligand as needed) 47 | self.protocol = ProteinProtocol(workspace, self.gmx_bin) 48 | self.mode = mode 49 | 50 | logging.info(f"MD LLM Agent initialized with model: {model}") 51 | 52 | def switch_to_mmpbsa_protocol(self) -> Dict[str, Any]: 53 | """ 54 | Switch to MM-PBSA protocol 55 | 56 | Returns: 57 | Dictionary with result information 58 | """ 59 | try: 60 | # Create new MM-PBSA protocol 61 | old_protocol = self.protocol 62 | self.protocol = MMPBSAProtocol(self.workspace) 63 | 64 | # Copy relevant state from the old protocol if possible 65 | if hasattr(old_protocol, 'topology_file'): 66 | self.protocol.topology_file = old_protocol.topology_file 67 | 68 | if hasattr(old_protocol, 'trajectory_file'): 69 | self.protocol.trajectory_file = old_protocol.trajectory_file 70 | 71 | logging.info("Switched to MM-PBSA protocol") 72 | 73 | return { 74 | "success": True, 75 | "message": "Switched to MM-PBSA protocol successfully", 76 | "previous_protocol": old_protocol.__class__.__name__, 77 | "current_protocol": "MMPBSAProtocol" 78 | } 79 | except Exception as e: 80 | return { 81 | "success": False, 82 | "error": f"Failed to switch to MM-PBSA protocol: {str(e)}" 83 | } 84 | 85 | def switch_to_protein_ligand_protocol(self) -> Dict[str, Any]: 86 | """ 87 | Switch to Protein-Ligand protocol 88 | 89 | Returns: 90 | Dictionary with result information 91 | """ 92 | try: 93 | # Create new Protein-Ligand protocol 94 | old_protocol = self.protocol 95 | self.protocol = ProteinLigandProtocol(self.workspace) 96 | 97 | # Copy relevant state from the old protocol if possible 98 | if hasattr(old_protocol, 'topology_file'): 99 | self.protocol.topology_file = old_protocol.topology_file 100 | 101 | if hasattr(old_protocol, 'trajectory_file'): 102 | self.protocol.trajectory_file = old_protocol.trajectory_file 103 | 104 | logging.info("Switched to Protein-Ligand protocol") 105 | 106 | return { 107 | "success": True, 108 | "message": "Switched to Protein-Ligand protocol successfully", 109 | "previous_protocol": old_protocol.__class__.__name__, 110 | "current_protocol": "ProteinLigandProtocol" 111 | } 112 | except Exception as e: 113 | return { 114 | "success": False, 115 | "error": f"Failed to switch to Protein-Ligand protocol: {str(e)}" 116 | } 117 | 118 | def switch_to_analysis_protocol(self) -> Dict[str, Any]: 119 | """ 120 | Switch to Analysis protocol 121 | 122 | Returns: 123 | Dictionary with result information 124 | """ 125 | try: 126 | # Create new Analysis protocol 127 | old_protocol = self.protocol 128 | self.protocol = AnalysisProtocol(self.workspace) 129 | 130 | # Copy relevant state from the old protocol if possible 131 | if hasattr(old_protocol, 'topology_file'): 132 | self.protocol.topology_file = old_protocol.topology_file 133 | 134 | if hasattr(old_protocol, 'trajectory_file'): 135 | self.protocol.trajectory_file = old_protocol.trajectory_file 136 | 137 | logging.info("Switched to Analysis protocol") 138 | 139 | return { 140 | "success": True, 141 | "message": "Switched to Analysis protocol successfully", 142 | "previous_protocol": old_protocol.__class__.__name__, 143 | "current_protocol": "AnalysisProtocol" 144 | } 145 | except Exception as e: 146 | return { 147 | "success": False, 148 | "error": f"Failed to switch to Analysis protocol: {str(e)}" 149 | } 150 | 151 | def get_tool_schema(self) -> List[Dict[str, Any]]: 152 | """ 153 | Get the schema for the tools available to the LLM 154 | 155 | Returns: 156 | List of tool schema dictionaries 157 | """ 158 | tools = [ 159 | { 160 | "type": "function", 161 | "function": { 162 | "name": "run_shell_command", 163 | "description": "Run a shell command", 164 | "parameters": { 165 | "type": "object", 166 | "properties": { 167 | "command": { 168 | "type": "string", 169 | "description": "Shell command to run" 170 | }, 171 | "capture_output": { 172 | "type": "boolean", 173 | "description": "Whether to capture stdout/stderr" 174 | } 175 | }, 176 | "required": ["command"] 177 | } 178 | } 179 | }, 180 | { 181 | "type": "function", 182 | "function": { 183 | "name": "get_workspace_info", 184 | "description": "Get information about the current workspace", 185 | "parameters": { 186 | "type": "object", 187 | "properties": {}, 188 | "required": [] 189 | } 190 | } 191 | }, 192 | { 193 | "type": "function", 194 | "function": { 195 | "name": "check_gromacs_installation", 196 | "description": "Check if GROMACS is installed and available", 197 | "parameters": { 198 | "type": "object", 199 | "properties": {}, 200 | "required": [] 201 | } 202 | } 203 | }, 204 | { 205 | "type": "function", 206 | "function": { 207 | "name": "set_protein_file", 208 | "description": "Set and prepare the protein file for simulation", 209 | "parameters": { 210 | "type": "object", 211 | "properties": { 212 | "file_path": { 213 | "type": "string", 214 | "description": "Path to the protein structure file (PDB or GRO)" 215 | } 216 | }, 217 | "required": ["file_path"] 218 | } 219 | } 220 | }, 221 | { 222 | "type": "function", 223 | "function": { 224 | "name": "check_for_ligands", 225 | "description": "Check for potential ligands in the PDB file", 226 | "parameters": { 227 | "type": "object", 228 | "properties": { 229 | "pdb_file": { 230 | "type": "string", 231 | "description": "Path to the PDB file" 232 | } 233 | }, 234 | "required": ["pdb_file"] 235 | } 236 | } 237 | }, 238 | { 239 | "type": "function", 240 | "function": { 241 | "name": "set_ligand", 242 | "description": "Set the ligand for simulation", 243 | "parameters": { 244 | "type": "object", 245 | "properties": { 246 | "ligand_name": { 247 | "type": "string", 248 | "description": "Residue name of the ligand in the PDB file" 249 | } 250 | }, 251 | "required": ["ligand_name"] 252 | } 253 | } 254 | }, 255 | { 256 | "type": "function", 257 | "function": { 258 | "name": "generate_topology", 259 | "description": "Generate topology for the protein", 260 | "parameters": { 261 | "type": "object", 262 | "properties": { 263 | "force_field": { 264 | "type": "string", 265 | "description": "Name of the force field to use", 266 | "enum": ["AMBER99SB-ILDN", "CHARMM36", "GROMOS96 53a6", "OPLS-AA/L"] 267 | }, 268 | "water_model": { 269 | "type": "string", 270 | "description": "Water model to use", 271 | "enum": ["spc", "tip3p", "tip4p"] 272 | } 273 | }, 274 | "required": ["force_field"] 275 | } 276 | } 277 | }, 278 | { 279 | "type": "function", 280 | "function": { 281 | "name": "define_simulation_box", 282 | "description": "Define the simulation box", 283 | "parameters": { 284 | "type": "object", 285 | "properties": { 286 | "distance": { 287 | "type": "number", 288 | "description": "Minimum distance between protein and box edge (nm)" 289 | }, 290 | "box_type": { 291 | "type": "string", 292 | "description": "Type of box", 293 | "enum": ["cubic", "dodecahedron", "octahedron"] 294 | } 295 | }, 296 | "required": [] 297 | } 298 | } 299 | }, 300 | { 301 | "type": "function", 302 | "function": { 303 | "name": "solvate_system", 304 | "description": "Solvate the protein in water", 305 | "parameters": { 306 | "type": "object", 307 | "properties": {}, 308 | "required": [] 309 | } 310 | } 311 | }, 312 | { 313 | "type": "function", 314 | "function": { 315 | "name": "create_mdp_file", 316 | "description": "Create an MDP parameter file for GROMACS", 317 | "parameters": { 318 | "type": "object", 319 | "properties": { 320 | "mdp_type": { 321 | "type": "string", 322 | "description": "Type of MDP file", 323 | "enum": ["ions", "em", "nvt", "npt", "md"] 324 | }, 325 | "params": { 326 | "type": "object", 327 | "description": "Optional override parameters", 328 | "properties": { 329 | "nsteps": { 330 | "type": "integer", 331 | "description": "Number of steps" 332 | }, 333 | "dt": { 334 | "type": "number", 335 | "description": "Time step (fs)" 336 | } 337 | } 338 | } 339 | }, 340 | "required": ["mdp_type"] 341 | } 342 | } 343 | }, 344 | { 345 | "type": "function", 346 | "function": { 347 | "name": "add_ions", 348 | "description": "Add ions to the solvated system", 349 | "parameters": { 350 | "type": "object", 351 | "properties": { 352 | "concentration": { 353 | "type": "number", 354 | "description": "Salt concentration in M, default is 0.15" 355 | }, 356 | "neutral": { 357 | "type": "boolean", 358 | "description": "Whether to neutralize the system" 359 | } 360 | }, 361 | "required": [] 362 | } 363 | } 364 | }, 365 | { 366 | "type": "function", 367 | "function": { 368 | "name": "run_energy_minimization", 369 | "description": "Run energy minimization", 370 | "parameters": { 371 | "type": "object", 372 | "properties": {}, 373 | "required": [] 374 | } 375 | } 376 | }, 377 | { 378 | "type": "function", 379 | "function": { 380 | "name": "run_nvt_equilibration", 381 | "description": "Run NVT equilibration", 382 | "parameters": { 383 | "type": "object", 384 | "properties": {}, 385 | "required": [] 386 | } 387 | } 388 | }, 389 | { 390 | "type": "function", 391 | "function": { 392 | "name": "run_npt_equilibration", 393 | "description": "Run NPT equilibration", 394 | "parameters": { 395 | "type": "object", 396 | "properties": {}, 397 | "required": [] 398 | } 399 | } 400 | }, 401 | { 402 | "type": "function", 403 | "function": { 404 | "name": "run_production_md", 405 | "description": "Run production MD", 406 | "parameters": { 407 | "type": "object", 408 | "properties": { 409 | "length_ns": { 410 | "type": "number", 411 | "description": "Length of the simulation in nanoseconds" 412 | } 413 | }, 414 | "required": [] 415 | } 416 | } 417 | }, 418 | { 419 | "type": "function", 420 | "function": { 421 | "name": "analyze_rmsd", 422 | "description": "Perform RMSD analysis", 423 | "parameters": { 424 | "type": "object", 425 | "properties": {}, 426 | "required": [] 427 | } 428 | } 429 | }, 430 | { 431 | "type": "function", 432 | "function": { 433 | "name": "analyze_rmsf", 434 | "description": "Perform RMSF analysis", 435 | "parameters": { 436 | "type": "object", 437 | "properties": {}, 438 | "required": [] 439 | } 440 | } 441 | }, 442 | { 443 | "type": "function", 444 | "function": { 445 | "name": "analyze_gyration", 446 | "description": "Perform radius of gyration analysis", 447 | "parameters": { 448 | "type": "object", 449 | "properties": {}, 450 | "required": [] 451 | } 452 | } 453 | }, 454 | { 455 | "type": "function", 456 | "function": { 457 | "name": "analyze_ligand_rmsd", 458 | "description": "Perform RMSD analysis focused on the ligand", 459 | "parameters": { 460 | "type": "object", 461 | "properties": {}, 462 | "required": [] 463 | } 464 | } 465 | }, 466 | { 467 | "type": "function", 468 | "function": { 469 | "name": "analyze_protein_ligand_contacts", 470 | "description": "Analyze contacts between protein and ligand", 471 | "parameters": { 472 | "type": "object", 473 | "properties": {}, 474 | "required": [] 475 | } 476 | } 477 | }, 478 | { 479 | "type": "function", 480 | "function": { 481 | "name": "set_simulation_stage", 482 | "description": "Set the current simulation stage", 483 | "parameters": { 484 | "type": "object", 485 | "properties": { 486 | "stage": { 487 | "type": "string", 488 | "description": "Name of the stage to set", 489 | "enum": [s.name for s in SimulationStage] 490 | } 491 | }, 492 | "required": ["stage"] 493 | } 494 | } 495 | }, 496 | { 497 | "type": "function", 498 | "function": { 499 | "name": "create_mmpbsa_index_file", 500 | "description": "Create index file for MM-PBSA analysis", 501 | "parameters": { 502 | "type": "object", 503 | "properties": { 504 | "protein_selection": { 505 | "type": "string", 506 | "description": "Selection for protein group" 507 | }, 508 | "ligand_selection": { 509 | "type": "string", 510 | "description": "Selection for ligand group" 511 | } 512 | }, 513 | "required": [] 514 | } 515 | } 516 | }, 517 | { 518 | "type": "function", 519 | "function": { 520 | "name": "create_mmpbsa_input", 521 | "description": "Create input file for MM-PBSA/GBSA calculation", 522 | "parameters": { 523 | "type": "object", 524 | "properties": { 525 | "method": { 526 | "type": "string", 527 | "description": "Method to use (pb or gb)", 528 | "enum": ["pb", "gb"] 529 | }, 530 | "startframe": { 531 | "type": "integer", 532 | "description": "First frame to analyze" 533 | }, 534 | "endframe": { 535 | "type": "integer", 536 | "description": "Last frame to analyze" 537 | }, 538 | "interval": { 539 | "type": "integer", 540 | "description": "Interval between frames" 541 | }, 542 | "ionic_strength": { 543 | "type": "number", 544 | "description": "Ionic strength for calculation" 545 | }, 546 | "with_entropy": { 547 | "type": "boolean", 548 | "description": "Whether to include entropy calculation" 549 | } 550 | }, 551 | "required": [] 552 | } 553 | } 554 | }, 555 | { 556 | "type": "function", 557 | "function": { 558 | "name": "run_mmpbsa_calculation", 559 | "description": "Run MM-PBSA/GBSA calculation for protein-ligand binding free energy", 560 | "parameters": { 561 | "type": "object", 562 | "properties": { 563 | "ligand_mol_file": { 564 | "type": "string", 565 | "description": "The Antechamber output mol2 file of ligand parametrization" 566 | }, 567 | "index_file": { 568 | "type": "string", 569 | "description": "GROMACS index file containing protein and ligand groups" 570 | }, 571 | "topology_file": { 572 | "type": "string", 573 | "description": "GROMACS topology file (tpr) for the system" 574 | }, 575 | "protein_group": { 576 | "type": "string", 577 | "description": "Name or index of the protein group in the index file" 578 | }, 579 | "ligand_group": { 580 | "type": "string", 581 | "description": "Name or index of the ligand group in the index file" 582 | }, 583 | "trajectory_file": { 584 | "type": "string", 585 | "description": "GROMACS trajectory file (xtc) for analysis" 586 | }, 587 | "overwrite": { 588 | "type": "boolean", 589 | "description": "Whether to overwrite existing output files", 590 | }, 591 | "verbose": { 592 | "type": "boolean", 593 | "description": "Whether to print verbose output", 594 | } 595 | }, 596 | "required": ["ligand_mol_file", "index_file", "topology_file", "protein_group", "ligand_group", "trajectory_file"] 597 | } 598 | } 599 | }, 600 | { 601 | "type": "function", 602 | "function": { 603 | "name": "parse_mmpbsa_results", 604 | "description": "Parse MM-PBSA/GBSA results", 605 | "parameters": { 606 | "type": "object", 607 | "properties": {}, 608 | "required": [] 609 | } 610 | } 611 | }, 612 | { 613 | "type": "function", 614 | "function": { 615 | "name": "switch_to_mmpbsa_protocol", 616 | "description": "Switch to MM-PBSA protocol for binding free energy calculations", 617 | "parameters": { 618 | "type": "object", 619 | "properties": {}, 620 | "required": [] 621 | } 622 | } 623 | } 624 | ] 625 | 626 | return tools 627 | 628 | def call_llm(self, messages: List[Dict[str, str]], tools: List[Dict[str, Any]] = None) -> Dict[str, Any]: 629 | """ 630 | Call the LLM with messages and tools 631 | 632 | Args: 633 | messages: List of message dictionaries 634 | tools: List of tool schema dictionaries 635 | 636 | Returns: 637 | LLM response 638 | """ 639 | tools = tools or self.get_tool_schema() 640 | 641 | headers = { 642 | "Authorization": f"Bearer {self.api_key}", 643 | "Content-Type": "application/json" 644 | } 645 | 646 | data = { 647 | "model": self.model, 648 | "messages": messages, 649 | "tools": tools 650 | } 651 | 652 | response = requests.post( 653 | self.url, 654 | headers=headers, 655 | json=data 656 | ) 657 | 658 | if response.status_code != 200: 659 | logging.error(f"LLM API error: {response.status_code} - {response.text}") 660 | raise Exception(f"LLM API error: {response.status_code} - {response.text}") 661 | 662 | return response.json() 663 | 664 | def execute_tool_call(self, tool_call: Dict[str, Any]) -> Dict[str, Any]: 665 | """ 666 | Execute a tool call 667 | 668 | Args: 669 | tool_call: Tool call dictionary 670 | 671 | Returns: 672 | Result of the tool call 673 | """ 674 | function_name = tool_call["function"]["name"] 675 | arguments = json.loads(tool_call["function"]["arguments"]) 676 | 677 | if function_name == "set_ligand" and not isinstance(self.protocol, ProteinLigandProtocol): 678 | # Switch to protein-ligand protocol 679 | old_protocol = self.protocol 680 | self.protocol = ProteinLigandProtocol(self.workspace) 681 | 682 | # Copy relevant state from the old protocol 683 | self.protocol.protein_file = old_protocol.protein_file 684 | self.protocol.stage = old_protocol.stage 685 | 686 | logging.info("Switched to protein-ligand protocol") 687 | elif function_name == "switch_to_mmpbsa_protocol": 688 | return self.switch_to_mmpbsa_protocol() 689 | 690 | # Get the method from the protocol class 691 | if hasattr(self.protocol, function_name): 692 | method = getattr(self.protocol, function_name) 693 | result = method(**arguments) 694 | return result 695 | else: 696 | return { 697 | "success": False, 698 | "error": f"Unknown function: {function_name}" 699 | } 700 | 701 | def run(self, starting_prompt: str = None) -> None: 702 | """ 703 | Run the MD LLM agent 704 | 705 | Args: 706 | starting_prompt: Optional starting prompt for the LLM 707 | """ 708 | # Initialize conversation with system message 709 | if self.mode == "copilot": 710 | system_message = { 711 | "role": "system", 712 | "content": SYSTEM_MESSAGE_ADVISOR 713 | } 714 | else: 715 | system_message = { 716 | "role": "system", 717 | "content": SYSTEM_MESSAGE_AGENT 718 | } 719 | 720 | self.conversation_history = [system_message] 721 | 722 | # Add starting prompt if provided 723 | if starting_prompt: 724 | self.conversation_history.append({ 725 | "role": "user", 726 | "content": starting_prompt 727 | }) 728 | 729 | # Get initial response from LLM 730 | response = self.call_llm(self.conversation_history) 731 | 732 | # Main conversation loop 733 | while True: 734 | assistant_message = response["choices"][0]["message"] 735 | self.conversation_history.append(assistant_message) 736 | 737 | # Process tool calls if any 738 | if "tool_calls" in assistant_message: 739 | for tool_call in assistant_message["tool_calls"]: 740 | # Execute the tool call 741 | print_message(f"Executing: {tool_call['function']['name']}", MessageType.TOOL) 742 | result = self.execute_tool_call(tool_call) 743 | 744 | # Add the tool call result to the conversation 745 | self.conversation_history.append({ 746 | "role": "tool", 747 | "tool_call_id": tool_call["id"], 748 | "name": tool_call["function"]["name"], 749 | "content": json.dumps(result) 750 | }) 751 | 752 | # Get next response from LLM 753 | response = self.call_llm(self.conversation_history) 754 | continue 755 | 756 | # Display the assistant's message 757 | content = assistant_message["content"] 758 | 759 | # Check if it's a final answer 760 | if "This is the final answer at this stage." in content: 761 | # Split at the final answer marker 762 | parts = content.split("This is the final answer at this stage.") 763 | 764 | # Print the main content normally 765 | print_message(parts[0].strip(), MessageType.INFO) 766 | 767 | # Print the final answer part with special formatting 768 | final_part = "This is the final answer at this stage." + parts[1] 769 | print_message(final_part.strip(), MessageType.FINAL, style="box") 770 | else: 771 | # Regular message 772 | print_message(content, MessageType.INFO) 773 | 774 | # Check if we've reached a stopping point 775 | if "This is the final answer at this stage." in content: 776 | # Ask if the user wants to continue 777 | user_input = prompt_user("Do you want to continue with the next stage?", default="yes") 778 | if user_input.lower() not in ["yes", "y", "continue", ""]: 779 | print_message("Exiting the MD agent. Thank you for using GROMACS Copilot!", MessageType.SUCCESS, style="box") 780 | break 781 | 782 | # Ask for the next user prompt 783 | user_input = prompt_user("What would you like to do next?") 784 | else: 785 | # Normal user input 786 | user_input = prompt_user("Your response") 787 | 788 | # Check for exit command 789 | if user_input.lower() in ["exit", "quit", "bye"]: 790 | print_message("Exiting the MD agent. Thank you for using GROMACS Copilot!", MessageType.SUCCESS, style="box") 791 | break 792 | 793 | # Add user input to conversation 794 | self.conversation_history.append({ 795 | "role": "user", 796 | "content": user_input 797 | }) 798 | 799 | # Get next response from LLM 800 | response = self.call_llm(self.conversation_history) -------------------------------------------------------------------------------- /gromacs_copilot/mcp_server.py: -------------------------------------------------------------------------------- 1 | from mcp.server.fastmcp import FastMCP 2 | import os 3 | import logging 4 | from typing import Dict, Any, Optional, Union 5 | from gromacs_copilot.core.md_agent import MDLLMAgent 6 | 7 | 8 | # Initialize FastMCP server 9 | mcp = FastMCP("gromacs-copilot") 10 | 11 | # Reference to the agent instance (will be set later) 12 | global agent 13 | 14 | @mcp.tool() 15 | async def init_gromacs_copilot(workspace: str, gmx_bin: str) -> Dict[str, Any]: 16 | """ 17 | Initialize the GROMACS Copilot server with a specific workspace and GROMACS binary 18 | 19 | Args: 20 | workspace: Path to the workspace directory 21 | gmx_bin: Path to the GROMACS binary 22 | **kwargs: Additional arguments for agent initialization 23 | """ 24 | global agent 25 | agent = MDLLMAgent(workspace=workspace, api_key="dummy", gmx_bin=gmx_bin) 26 | 27 | return {"success": True, "message": f"Initialized GROMACS Copilot with workspace: {workspace}"} 28 | 29 | 30 | @mcp.tool() 31 | async def check_gromacs_installation() -> Dict[str, Any]: 32 | """ 33 | Check if GROMACS is installed and available 34 | 35 | Returns: 36 | Dictionary with GROMACS installation information 37 | """ 38 | global agent 39 | if agent is None: 40 | return {"success": False, "error": "agent not initialized"} 41 | 42 | return agent.protocol.check_gromacs_installation() 43 | 44 | @mcp.tool() 45 | async def set_protein_file(file_path: str) -> Dict[str, Any]: 46 | """ 47 | Set and prepare the protein file for simulation, only use for protein-ligand complex 48 | 49 | Args: 50 | file_path: Path to the protein structure file (PDB or GRO) 51 | 52 | Returns: 53 | Dictionary with result information 54 | """ 55 | global agent 56 | if agent is None: 57 | return {"success": False, "error": "agent not initialized"} 58 | elif hasattr(agent.protocol, "set_protein_file"): 59 | 60 | return agent.protocol.set_protein_file(file_path) 61 | else: 62 | return {"success": False, "error": "set_protein_file method not available in agent, is not needed for protein only simulation."} 63 | 64 | @mcp.tool() 65 | async def check_for_ligands(pdb_file: str) -> Dict[str, Any]: 66 | """ 67 | Check for potential ligands in the PDB file, only use for protein-ligand complex 68 | 69 | Args: 70 | pdb_file: Path to the PDB file 71 | 72 | Returns: 73 | Dictionary with ligand information 74 | """ 75 | global agent 76 | if agent is None: 77 | return {"success": False, "error": "agent not initialized"} 78 | 79 | return agent.protocol.check_for_ligands(pdb_file) 80 | 81 | @mcp.tool() 82 | async def set_ligand(ligand_name: str) -> Dict[str, Any]: 83 | """ 84 | Set the ligand for simulation, only use for protein-ligand complex 85 | 86 | Args: 87 | ligand_name: Residue name of the ligand in the PDB file 88 | 89 | Returns: 90 | Dictionary with result information 91 | """ 92 | global agent 93 | if agent is None: 94 | return {"success": False, "error": "agent not initialized"} 95 | 96 | return agent.protocol.set_ligand(ligand_name) 97 | 98 | @mcp.tool() 99 | async def generate_topology(force_field: str, water_model: str = "spc") -> Dict[str, Any]: 100 | """ 101 | Generate topology for the protein 102 | 103 | Args: 104 | force_field: Name of the force field to use 105 | water_model: Water model to use 106 | 107 | Returns: 108 | Dictionary with result information 109 | """ 110 | global agent 111 | if agent is None: 112 | return {"success": False, "error": "agent not initialized"} 113 | return agent.protocol.generate_topology(force_field, water_model) 114 | 115 | @mcp.tool() 116 | async def define_simulation_box(distance: float = 1.0, box_type: str = "cubic") -> Dict[str, Any]: 117 | """ 118 | Define the simulation box 119 | 120 | Args: 121 | distance: Minimum distance between protein and box edge (nm) 122 | box_type: Type of box (cubic, dodecahedron, octahedron) 123 | 124 | Returns: 125 | Dictionary with result information 126 | """ 127 | global agent 128 | if agent is None: 129 | return {"success": False, "error": "agent not initialized"} 130 | 131 | return agent.protocol.define_simulation_box(distance, box_type) 132 | 133 | @mcp.tool() 134 | async def solvate_system() -> Dict[str, Any]: 135 | """ 136 | Solvate the protein in water 137 | 138 | Returns: 139 | Dictionary with result information 140 | """ 141 | global agent 142 | if agent is None: 143 | return {"success": False, "error": "agent not initialized"} 144 | return agent.protocol.solvate_system() 145 | 146 | @mcp.tool() 147 | async def create_mdp_file(mdp_type: str, params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: 148 | """ 149 | Create an MDP parameter file for GROMACS 150 | 151 | Args: 152 | mdp_type: Type of MDP file 153 | params: Optional override parameters 154 | 155 | Returns: 156 | Dictionary with result information 157 | """ 158 | global agent 159 | if agent is None: 160 | return {"success": False, "error": "agent not initialized"} 161 | 162 | return agent.protocol.create_mdp_file(mdp_type, params) 163 | 164 | @mcp.tool() 165 | async def add_ions(concentration: float = 0.15, neutral: bool = True) -> Dict[str, Any]: 166 | """ 167 | Add ions to the solvated system 168 | 169 | Args: 170 | concentration: Salt concentration in M 171 | neutral: Whether to neutralize the system 172 | 173 | Returns: 174 | Dictionary with result information 175 | """ 176 | global agent 177 | if agent is None: 178 | return {"success": False, "error": "agent not initialized"} 179 | 180 | return agent.protocol.add_ions(concentration, neutral) 181 | 182 | @mcp.tool() 183 | async def run_energy_minimization() -> Dict[str, Any]: 184 | """ 185 | Run energy minimization 186 | 187 | Returns: 188 | Dictionary with result information 189 | """ 190 | global agent 191 | if agent is None: 192 | return {"success": False, "error": "agent not initialized"} 193 | # global agent 194 | return agent.protocol.run_energy_minimization() 195 | 196 | @mcp.tool() 197 | async def run_nvt_equilibration() -> Dict[str, Any]: 198 | """ 199 | Run NVT equilibration 200 | 201 | Returns: 202 | Dictionary with result information 203 | """ 204 | global agent 205 | if agent is None: 206 | return {"success": False, "error": "agent not initialized"} 207 | # global agent 208 | return agent.protocol.run_nvt_equilibration() 209 | 210 | @mcp.tool() 211 | async def run_npt_equilibration() -> Dict[str, Any]: 212 | """ 213 | Run NPT equilibration 214 | 215 | Returns: 216 | Dictionary with result information 217 | """ 218 | global agent 219 | if agent is None: 220 | return {"success": False, "error": "agent not initialized"} 221 | # global agent 222 | return agent.protocol.run_npt_equilibration() 223 | 224 | @mcp.tool() 225 | async def run_production_md(length_ns: float = 10.0) -> Dict[str, Any]: 226 | """ 227 | Run production MD 228 | 229 | Args: 230 | length_ns: Length of the simulation in nanoseconds 231 | 232 | Returns: 233 | Dictionary with result information 234 | """ 235 | global agent 236 | if agent is None: 237 | return {"success": False, "error": "agent not initialized"} 238 | # global agent 239 | return agent.protocol.run_production_md(length_ns) 240 | 241 | @mcp.tool() 242 | async def analyze_rmsd() -> Dict[str, Any]: 243 | """ 244 | Perform RMSD analysis 245 | 246 | Returns: 247 | Dictionary with result information 248 | """ 249 | global agent 250 | if agent is None: 251 | return {"success": False, "error": "agent not initialized"} 252 | # global agent 253 | return agent.protocol.analyze_rmsd() 254 | 255 | @mcp.tool() 256 | async def analyze_rmsf() -> Dict[str, Any]: 257 | """ 258 | Perform RMSF analysis 259 | 260 | Returns: 261 | Dictionary with result information 262 | """ 263 | global agent 264 | if agent is None: 265 | return {"success": False, "error": "agent not initialized"} 266 | # global agent 267 | return agent.protocol.analyze_rmsf() 268 | 269 | @mcp.tool() 270 | async def analyze_gyration() -> Dict[str, Any]: 271 | """ 272 | Perform radius of gyration analysis 273 | 274 | Returns: 275 | Dictionary with result information 276 | """ 277 | global agent 278 | if agent is None: 279 | return {"success": False, "error": "agent not initialized"} 280 | # global agent 281 | return agent.protocol.analyze_gyration() 282 | 283 | @mcp.tool() 284 | async def analyze_ligand_rmsd() -> Dict[str, Any]: 285 | """ 286 | Perform RMSD analysis focused on the ligand 287 | 288 | Returns: 289 | Dictionary with result information 290 | """ 291 | global agent 292 | if agent is None: 293 | return {"success": False, "error": "agent not initialized"} 294 | # global agent 295 | return agent.protocol.analyze_ligand_rmsd() 296 | 297 | @mcp.tool() 298 | async def analyze_protein_ligand_contacts() -> Dict[str, Any]: 299 | """ 300 | Analyze contacts between protein and ligand 301 | 302 | Returns: 303 | Dictionary with result information 304 | """ 305 | global agent 306 | if agent is None: 307 | return {"success": False, "error": "agent not initialized"} 308 | # global agent 309 | return agent.protocol.analyze_protein_ligand_contacts() 310 | 311 | @mcp.tool() 312 | async def set_simulation_stage(stage: str) -> Dict[str, Any]: 313 | """ 314 | Set the current simulation stage 315 | 316 | Args: 317 | stage: Name of the stage to set 318 | 319 | Returns: 320 | Dictionary with result information 321 | """ 322 | global agent 323 | if agent is None: 324 | return {"success": False, "error": "agent not initialized"} 325 | # global agent 326 | return agent.protocol.set_simulation_stage(stage) 327 | 328 | @mcp.tool() 329 | async def run_shell_command(command: str, capture_output: bool = True) -> Dict[str, Any]: 330 | """ 331 | Run a shell command 332 | 333 | Args: 334 | command: Shell command to run 335 | capture_output: Whether to capture stdout/stderr 336 | 337 | Returns: 338 | Dictionary with command result information 339 | """ 340 | global agent 341 | if agent is None: 342 | return {"success": False, "error": "agent not initialized"} 343 | # global agent 344 | return agent.protocol.run_shell_command(command, capture_output) 345 | 346 | @mcp.tool() 347 | async def get_workspace_info() -> Dict[str, Any]: 348 | """ 349 | Get information about the current workspace 350 | 351 | Returns: 352 | Dictionary with workspace information 353 | """ 354 | global agent 355 | if agent is None: 356 | return {"success": False, "error": "agent not initialized"} 357 | # global agent 358 | return agent.protocol.get_state() 359 | 360 | # Add additional tools for MM-PBSA functionality 361 | @mcp.tool() 362 | async def switch_agent_protocol(protocol:str) -> Dict[str, Any]: 363 | """ 364 | Switch to another protocol 365 | Args: 366 | protocol: Name of the protocol to switch to, [ligand, mmpbsa, analysis] 367 | 368 | Returns: 369 | Dictionary with result information 370 | """ 371 | global agent 372 | if agent is None: 373 | return {"success": False, "error": "agent not initialized"} 374 | if protocol not in ["ligand", "mmpbsa, analysis"]: 375 | return {"success": False, "error": "protocol not supported"} 376 | elif protocol == "mmpbsa": 377 | agent.switch_to_mmpbsa_protocol() 378 | return {"success": True, "message": "switched to mmpbsa protocol"} 379 | elif protocol == "ligand": 380 | agent.switch_to_protein_ligand_protocol() 381 | return {"success": True, "message": "switched to ligand protocol"} 382 | elif protocol == "analysis": 383 | agent.switch_to_analysis_protocol() 384 | return {"success": True, "message": "switched to analysis protocol"} 385 | 386 | 387 | 388 | @mcp.tool() 389 | async def create_mmpbsa_index_file(protein_selection: str = "Protein", 390 | ligand_selection: str = "LIG") -> Dict[str, Any]: 391 | """ 392 | Create index file for MM-PBSA analysis 393 | 394 | Args: 395 | protein_selection: Selection for protein group 396 | ligand_selection: Selection for ligand group 397 | 398 | Returns: 399 | Dictionary with result information 400 | """ 401 | global agent 402 | if agent is None: 403 | return {"success": False, "error": "agent not initialized"} 404 | # global agent 405 | return agent.protocol.create_mmpbsa_index_file(protein_selection, ligand_selection) 406 | -------------------------------------------------------------------------------- /gromacs_copilot/protocols/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Protocol modules for GROMACS Copilot 3 | """ 4 | 5 | from gromacs_copilot.protocols.base import BaseProtocol 6 | from gromacs_copilot.protocols.protein import ProteinProtocol 7 | from gromacs_copilot.protocols.protein_ligand import ProteinLigandProtocol 8 | from gromacs_copilot.protocols.analysis import AnalysisProtocol 9 | 10 | __all__ = [ 11 | 'BaseProtocol', 12 | 'ProteinProtocol', 13 | 'ProteinLigandProtocol', 14 | 'AnalysisProtocol', 15 | 'MMPBSAProtocol' 16 | ] -------------------------------------------------------------------------------- /gromacs_copilot/protocols/analysis.py: -------------------------------------------------------------------------------- 1 | """ 2 | Analysis protocol for GROMACS Copilot 3 | """ 4 | 5 | import os 6 | import logging 7 | from typing import Dict, Any, List, Optional 8 | 9 | from gromacs_copilot.protocols.base import BaseProtocol 10 | from gromacs_copilot.utils.shell import check_command_exists 11 | 12 | 13 | class AnalysisProtocol(BaseProtocol): 14 | """Protocol for analysis of MD simulation results""" 15 | 16 | def __init__(self, workspace: str = "./md_workspace", has_ligand: bool = False, gmx_bin: str = "gmx"): 17 | """ 18 | Initialize the analysis protocol 19 | 20 | Args: 21 | workspace: Directory to use as the working directory 22 | has_ligand: Whether the system includes a ligand 23 | """ 24 | super().__init__(workspace) 25 | self.has_ligand = has_ligand 26 | self.production_file = None 27 | self.trajectory_file = None 28 | self.topology_file = None 29 | self.energy_file = None 30 | self.analysis_dir = os.path.join(workspace, "analysis") 31 | self.gmx_bin = gmx_bin 32 | 33 | # Create analysis directory if it doesn't exist 34 | if not os.path.exists(self.analysis_dir): 35 | os.makedirs(self.analysis_dir) 36 | 37 | logging.info(f"Analysis protocol initialized with workspace: {self.workspace}") 38 | 39 | def get_state(self) -> Dict[str, Any]: 40 | """ 41 | Get the current state of the protocol 42 | 43 | Returns: 44 | Dictionary with protocol state information 45 | """ 46 | try: 47 | analysis_files = [] 48 | if os.path.exists(self.analysis_dir): 49 | analysis_files = os.listdir(self.analysis_dir) 50 | 51 | return { 52 | "success": True, 53 | "workspace_path": self.workspace, 54 | "analysis_directory": self.analysis_dir, 55 | "has_ligand": self.has_ligand, 56 | "production_file": self.production_file, 57 | "trajectory_file": self.trajectory_file, 58 | "topology_file": self.topology_file, 59 | "energy_file": self.energy_file, 60 | "analysis_files": analysis_files 61 | } 62 | except Exception as e: 63 | logging.error(f"Error getting analysis state: {str(e)}") 64 | return { 65 | "success": False, 66 | "error": str(e), 67 | "workspace_path": self.workspace 68 | } 69 | 70 | def check_prerequisites(self) -> Dict[str, Any]: 71 | """ 72 | Check if prerequisites for analysis are met 73 | 74 | Returns: 75 | Dictionary with prerequisite check information 76 | """ 77 | # Check GROMACS installation 78 | gromacs_result = self.run_shell_command(f"{self.gmx_bin} --version", capture_output=True) 79 | gromacs_installed = gromacs_result["success"] 80 | 81 | # Check DSSP installation (optional) 82 | dssp_installed = check_command_exists("dssp") or check_command_exists("mkdssp") 83 | 84 | # Check for required files 85 | required_files = ["md.xtc", "md.tpr", "md.edr"] 86 | missing_files = [file for file in required_files if not os.path.exists(os.path.join(self.workspace, file))] 87 | 88 | if missing_files: 89 | return { 90 | "success": False, 91 | "installed": { 92 | "gromacs": gromacs_installed, 93 | "dssp": dssp_installed 94 | }, 95 | "missing_files": missing_files, 96 | "error": f"Missing required files: {', '.join(missing_files)}" 97 | } 98 | 99 | # Set file paths if all required files exist 100 | self.production_file = "md.gro" 101 | self.trajectory_file = "md.xtc" 102 | self.topology_file = "topol.top" 103 | self.energy_file = "md.edr" 104 | 105 | return { 106 | "success": True, 107 | "installed": { 108 | "gromacs": gromacs_installed, 109 | "dssp": dssp_installed 110 | } 111 | } 112 | 113 | def clean_trajectory(self) -> Dict[str, Any]: 114 | """ 115 | Clean the trajectory file by removing PBC effects and centering 116 | 117 | Returns: 118 | Dictionary with result information 119 | """ 120 | # Create clean trajectory 121 | cmd = f"echo 'Protein System' | {self.gmx_bin} trjconv -s md.tpr -f md.xtc -o analysis/clean_full.xtc -pbc nojump -ur compact -center" 122 | result = self.run_shell_command(cmd) 123 | 124 | if not result["success"]: 125 | return { 126 | "success": False, 127 | "error": f"Failed to clean trajectory: {result['stderr']}" 128 | } 129 | 130 | # Create no-water trajectory 131 | cmd = f"echo 'Protein non-Water' |{self.gmx_bin} trjconv -s md.tpr -f analysis/clean_full.xtc -o analysis/clean_nowat.xtc -fit rot+trans" 132 | result = self.run_shell_command(cmd) 133 | 134 | if not result["success"]: 135 | return { 136 | "success": False, 137 | "error": f"Failed to create no-water trajectory: {result['stderr']}" 138 | } 139 | 140 | # Extract last frame as PDB 141 | cmd = f"echo 'Protein Protein' |{self.gmx_bin} trjconv -s md.tpr -f analysis/clean_nowat.xtc -o analysis/protein_lastframe.pdb -pbc nojump -ur compact -center -dump 9999999999999999" 142 | result = self.run_shell_command(cmd) 143 | 144 | if not result["success"]: 145 | return { 146 | "success": False, 147 | "error": f"Failed to extract last frame: {result['stderr']}" 148 | } 149 | 150 | return { 151 | "success": True, 152 | "clean_trajectory": "analysis/clean_full.xtc", 153 | "nowat_trajectory": "analysis/clean_nowat.xtc", 154 | "last_frame": "analysis/protein_lastframe.pdb" 155 | } 156 | 157 | def analyze_rmsd(self, selection: str = "Backbone", reference: str = "Backbone") -> Dict[str, Any]: 158 | """ 159 | Perform RMSD analysis 160 | 161 | Args: 162 | selection: Selection to analyze 163 | reference: Reference selection for fitting 164 | 165 | Returns: 166 | Dictionary with result information 167 | """ 168 | output_file = f"analysis/rmsd_{selection.lower()}.xvg" 169 | 170 | cmd = f"echo '{reference} {selection}' |{self.gmx_bin} rms -s md.tpr -f analysis/clean_nowat.xtc -o {output_file} -tu ns" 171 | result = self.run_shell_command(cmd) 172 | 173 | if not result["success"]: 174 | return { 175 | "success": False, 176 | "error": f"RMSD analysis failed: {result['stderr']}" 177 | } 178 | 179 | return { 180 | "success": True, 181 | "output_file": output_file, 182 | "analysis_type": "RMSD", 183 | "selection": selection, 184 | "reference": reference 185 | } 186 | 187 | def analyze_rmsf(self, selection: str = "Backbone") -> Dict[str, Any]: 188 | """ 189 | Perform RMSF analysis 190 | 191 | Args: 192 | selection: Selection to analyze 193 | 194 | Returns: 195 | Dictionary with result information 196 | """ 197 | output_file = f"analysis/rmsf_{selection.lower()}.xvg" 198 | 199 | cmd = f"echo '{selection}' |{self.gmx_bin} rmsf -s md.tpr -f analysis/clean_nowat.xtc -o {output_file} -res" 200 | result = self.run_shell_command(cmd) 201 | 202 | if not result["success"]: 203 | return { 204 | "success": False, 205 | "error": f"RMSF analysis failed: {result['stderr']}" 206 | } 207 | 208 | return { 209 | "success": True, 210 | "output_file": output_file, 211 | "analysis_type": "RMSF", 212 | "selection": selection 213 | } 214 | 215 | def analyze_gyration(self, selection: str = "Protein") -> Dict[str, Any]: 216 | """ 217 | Perform radius of gyration analysis 218 | 219 | Args: 220 | selection: Selection to analyze 221 | 222 | Returns: 223 | Dictionary with result information 224 | """ 225 | output_file = f"analysis/gyrate_{selection.lower()}.xvg" 226 | 227 | cmd = f"echo '{selection}' |{self.gmx_bin} gyrate -s md.tpr -f analysis/clean_nowat.xtc -o {output_file}" 228 | result = self.run_shell_command(cmd) 229 | 230 | if not result["success"]: 231 | return { 232 | "success": False, 233 | "error": f"Radius of gyration analysis failed: {result['stderr']}" 234 | } 235 | 236 | return { 237 | "success": True, 238 | "output_file": output_file, 239 | "analysis_type": "Radius of Gyration", 240 | "selection": selection 241 | } 242 | 243 | def analyze_hydrogen_bonds(self, selection1: str = "Protein", selection2: str = "Protein") -> Dict[str, Any]: 244 | """ 245 | Perform hydrogen bond analysis 246 | 247 | Args: 248 | selection1: First selection 249 | selection2: Second selection 250 | 251 | Returns: 252 | Dictionary with result information 253 | """ 254 | output_file = f"analysis/hbnum_{selection1.lower()}_{selection2.lower()}.xvg" 255 | 256 | cmd = f"echo -e '{selection1}\\n{selection2}' |{self.gmx_bin} hbond -s md.tpr -f analysis/clean_nowat.xtc -num {output_file}" 257 | result = self.run_shell_command(cmd) 258 | 259 | if not result["success"]: 260 | return { 261 | "success": False, 262 | "error": f"Hydrogen bond analysis failed: {result['stderr']}" 263 | } 264 | 265 | return { 266 | "success": True, 267 | "output_file": output_file, 268 | "analysis_type": "Hydrogen Bonds", 269 | "selection1": selection1, 270 | "selection2": selection2 271 | } 272 | 273 | def analyze_secondary_structure(self) -> Dict[str, Any]: 274 | """ 275 | Perform secondary structure analysis using DSSP 276 | 277 | Returns: 278 | Dictionary with result information 279 | """ 280 | # Check if DSSP is installed 281 | dssp_executable = None 282 | if check_command_exists("dssp"): 283 | dssp_executable = "dssp" 284 | elif check_command_exists("mkdssp"): 285 | dssp_executable = "mkdssp" 286 | 287 | if not dssp_executable: 288 | return { 289 | "success": False, 290 | "error": "DSSP is not installed. Please install DSSP or mkdssp." 291 | } 292 | 293 | # Set environment variable for GROMACS to find DSSP 294 | os.environ["DSSP"] = dssp_executable 295 | 296 | cmd = f"echo 'Protein' |{self.gmx_bin} do_dssp -s md.tpr -f analysis/clean_nowat.xtc -o analysis/ss.xpm -ver 3 -tu ns -dt 0.05" 297 | result = self.run_shell_command(cmd) 298 | 299 | if not result["success"]: 300 | return { 301 | "success": False, 302 | "error": f"Secondary structure analysis failed: {result['stderr']}" 303 | } 304 | 305 | # Convert XPM to PS for better visualization 306 | cmd = f"{self.gmx_bin} xpm2ps -f analysis/ss.xpm -o analysis/ss.ps -by 10 -bx 3" 307 | ps_result = self.run_shell_command(cmd) 308 | 309 | return { 310 | "success": True, 311 | "output_file": "analysis/ss.xpm", 312 | "ps_file": "analysis/ss.ps" if ps_result["success"] else None, 313 | "analysis_type": "Secondary Structure" 314 | } 315 | 316 | def analyze_energy(self, terms: List[str] = ["Potential", "Temperature", "Pressure"]) -> Dict[str, Any]: 317 | """ 318 | Perform energy analysis 319 | 320 | Args: 321 | terms: Energy terms to analyze 322 | 323 | Returns: 324 | Dictionary with result information 325 | """ 326 | results = {} 327 | 328 | for term in terms: 329 | # Map energy term to its typical number in GROMACS 330 | term_map = { 331 | "Potential": "10", 332 | "Kinetic": "11", 333 | "Total": "12", 334 | "Temperature": "16", 335 | "Pressure": "17", 336 | "Volume": "22" 337 | } 338 | 339 | if term not in term_map: 340 | results[term] = { 341 | "success": False, 342 | "error": f"Unknown energy term: {term}" 343 | } 344 | continue 345 | 346 | output_file = f"analysis/energy_{term.lower()}.xvg" 347 | 348 | cmd = f"echo '{term_map[term]} 0' |{self.gmx_bin} energy -f md.edr -o {output_file}" 349 | result = self.run_shell_command(cmd) 350 | 351 | if not result["success"]: 352 | results[term] = { 353 | "success": False, 354 | "error": f"Energy analysis for {term} failed: {result['stderr']}" 355 | } 356 | else: 357 | results[term] = { 358 | "success": True, 359 | "output_file": output_file, 360 | "analysis_type": "Energy", 361 | "term": term 362 | } 363 | 364 | return { 365 | "success": all(results[term]["success"] for term in terms), 366 | "results": results 367 | } 368 | 369 | def analyze_ligand_rmsd(self) -> Dict[str, Any]: 370 | """ 371 | Perform RMSD analysis focused on the ligand 372 | 373 | Returns: 374 | Dictionary with result information 375 | """ 376 | if not self.has_ligand: 377 | return { 378 | "success": False, 379 | "error": "No ligand in the system" 380 | } 381 | 382 | output_file = "analysis/ligand_rmsd.xvg" 383 | 384 | cmd = f"echo 'LIG LIG' |{self.gmx_bin} rms -s md.tpr -f analysis/clean_nowat.xtc -o analysis/ligand_rmsd.xvg -tu ns" 385 | result = self.run_shell_command(cmd) 386 | 387 | if not result["success"]: 388 | return { 389 | "success": False, 390 | "error": f"Ligand RMSD analysis failed: {result['stderr']}" 391 | } 392 | 393 | return { 394 | "success": True, 395 | "output_file": output_file, 396 | "analysis_type": "Ligand RMSD" 397 | } 398 | 399 | def analyze_protein_ligand_contacts(self) -> Dict[str, Any]: 400 | """ 401 | Analyze contacts between protein and ligand 402 | 403 | Returns: 404 | Dictionary with result information 405 | """ 406 | if not self.has_ligand: 407 | return { 408 | "success": False, 409 | "error": "No ligand in the system" 410 | } 411 | 412 | output_file = "analysis/protein_ligand_mindist.xvg" 413 | 414 | cmd = f"echo -e 'Protein\\nLIG' |{self.gmx_bin} mindist -s md.tpr -f analysis/clean_nowat.xtc -od analysis/protein_ligand_mindist.xvg -tu ns" 415 | result = self.run_shell_command(cmd) 416 | 417 | if not result["success"]: 418 | return { 419 | "success": False, 420 | "error": f"Protein-ligand contacts analysis failed: {result['stderr']}" 421 | } 422 | 423 | return { 424 | "success": True, 425 | "output_file": output_file, 426 | "analysis_type": "Protein-Ligand Minimum Distance" 427 | } 428 | 429 | def generate_analysis_report(self) -> Dict[str, Any]: 430 | """ 431 | Generate a comprehensive analysis report 432 | 433 | Returns: 434 | Dictionary with result information 435 | """ 436 | # Create analysis directory if it doesn't exist 437 | if not os.path.exists(self.analysis_dir): 438 | os.makedirs(self.analysis_dir) 439 | 440 | # Clean trajectories 441 | clean_result = self.clean_trajectory() 442 | if not clean_result["success"]: 443 | return clean_result 444 | 445 | # Perform various analyses 446 | analyses = [ 447 | self.analyze_rmsd(selection="Backbone", reference="Backbone"), 448 | self.analyze_rmsd(selection="Protein", reference="Backbone"), 449 | self.analyze_rmsf(selection="C-alpha"), 450 | self.analyze_gyration(selection="Protein"), 451 | self.analyze_energy(terms=["Potential", "Temperature", "Pressure"]), 452 | self.analyze_hydrogen_bonds(selection1="Protein", selection2="Protein") 453 | ] 454 | 455 | # Add ligand-specific analyses if applicable 456 | if self.has_ligand: 457 | analyses.extend([ 458 | self.analyze_ligand_rmsd(), 459 | self.analyze_protein_ligand_contacts() 460 | ]) 461 | 462 | # Try to do secondary structure analysis if DSSP is available 463 | if check_command_exists("dssp") or check_command_exists("mkdssp"): 464 | analyses.append(self.analyze_secondary_structure()) 465 | 466 | # Count successful analyses 467 | successful_analyses = sum(1 for analysis in analyses if analysis["success"]) 468 | 469 | return { 470 | "success": successful_analyses > 0, 471 | "total_analyses": len(analyses), 472 | "successful_analyses": successful_analyses, 473 | "analyses": analyses, 474 | "report_directory": self.analysis_dir 475 | } -------------------------------------------------------------------------------- /gromacs_copilot/protocols/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base protocol class for GROMACS Copilot 3 | """ 4 | 5 | import os 6 | import logging 7 | from abc import ABC, abstractmethod 8 | from typing import Dict, Any, Optional 9 | 10 | from gromacs_copilot.utils.shell import run_shell_command 11 | from gromacs_copilot.core.enums import SimulationStage 12 | 13 | 14 | class BaseProtocol(ABC): 15 | """Base class for simulation protocols""" 16 | 17 | def __init__(self, workspace: str = "./md_workspace", gmx_bin: str = "gmx"): 18 | """ 19 | Initialize the base protocol 20 | 21 | Args: 22 | workspace: Directory to use as the working directory 23 | """ 24 | self.workspace = os.path.abspath(workspace) 25 | self.stage = SimulationStage.SETUP 26 | 27 | # Create workspace if it doesn't exist 28 | if not os.path.exists(self.workspace): 29 | os.makedirs(self.workspace) 30 | 31 | # Change to workspace directory 32 | os.chdir(self.workspace) 33 | self.gmx_bin = gmx_bin 34 | 35 | logging.info(f"Protocol initialized with workspace: {self.workspace}") 36 | 37 | 38 | def check_gromacs_installation(self) -> Dict[str, Any]: 39 | """ 40 | Check if GROMACS is installed and available 41 | 42 | Returns: 43 | Dictionary with GROMACS installation information 44 | """ 45 | result = self.run_shell_command(f"{self.gmx_bin} --version", capture_output=True) 46 | 47 | if result["success"]: 48 | version_info = result["stdout"].strip() 49 | return { 50 | "success": True, 51 | "installed": True, 52 | "version": version_info 53 | } 54 | else: 55 | return { 56 | "success": False, 57 | "installed": False, 58 | "error": "GROMACS is not installed or not in PATH" 59 | } 60 | 61 | def run_shell_command(self, command: str, capture_output: bool = True, 62 | suppress_output: bool = False) -> Dict[str, Any]: 63 | """ 64 | Run a shell command 65 | 66 | Args: 67 | command: Shell command to run 68 | capture_output: Whether to capture stdout/stderr 69 | suppress_output: Whether to suppress terminal output 70 | 71 | Returns: 72 | Dictionary with command result information 73 | """ 74 | return run_shell_command(command, capture_output, suppress_output) 75 | 76 | @abstractmethod 77 | def get_state(self) -> Dict[str, Any]: 78 | """ 79 | Get the current state of the protocol 80 | 81 | Returns: 82 | Dictionary with protocol state information 83 | """ 84 | pass 85 | 86 | @abstractmethod 87 | def check_prerequisites(self) -> Dict[str, Any]: 88 | """ 89 | Check if prerequisites for the protocol are met 90 | 91 | Returns: 92 | Dictionary with prerequisite check information 93 | """ 94 | pass 95 | 96 | def create_mdp_file(self, mdp_type: str, params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: 97 | """ 98 | Create an MDP parameter file for GROMACS 99 | 100 | Args: 101 | mdp_type: Type of MDP file (ions, em, nvt, npt, md) 102 | params: Optional override parameters 103 | 104 | Returns: 105 | Dictionary with result information 106 | """ 107 | from gromacs_copilot.config import DEFAULT_MDP_PARAMS, MDP_TYPES 108 | 109 | if mdp_type not in MDP_TYPES: 110 | return { 111 | "success": False, 112 | "error": f"Unknown MDP type: {mdp_type}. Available types: {MDP_TYPES}" 113 | } 114 | 115 | # Start with default parameters for the specified type 116 | mdp_params = DEFAULT_MDP_PARAMS[mdp_type].copy() 117 | 118 | # Override with user-provided parameters if any 119 | if params: 120 | mdp_params.update(params) 121 | 122 | # Create MDP file content 123 | mdp_content = f"; {mdp_type}.mdp - Generated by GROMACS Copilot\n" 124 | for key, value in mdp_params.items(): 125 | mdp_content += f"{key:<20} = {value}\n" 126 | 127 | # Write MDP file 128 | file_path = f"{mdp_type}.mdp" 129 | try: 130 | with open(file_path, "w") as f: 131 | f.write(mdp_content) 132 | except Exception as e: 133 | return { 134 | "success": False, 135 | "error": f"Failed to write MDP file: {str(e)}" 136 | } 137 | 138 | return { 139 | "success": True, 140 | "file_path": file_path, 141 | "mdp_type": mdp_type, 142 | "params": mdp_params 143 | } 144 | 145 | def set_simulation_stage(self, stage: str) -> Dict[str, Any]: 146 | """ 147 | Set the current simulation stage 148 | 149 | Args: 150 | stage: Name of the stage to set 151 | 152 | Returns: 153 | Dictionary with result information 154 | """ 155 | try: 156 | self.stage = SimulationStage[stage] 157 | return { 158 | "success": True, 159 | "stage": self.stage.name, 160 | "previous_stage": self.stage.name 161 | } 162 | except KeyError: 163 | return { 164 | "success": False, 165 | "error": f"Unknown stage: {stage}. Available stages: {[s.name for s in SimulationStage]}" 166 | } -------------------------------------------------------------------------------- /gromacs_copilot/protocols/mmpbsa.py: -------------------------------------------------------------------------------- 1 | """ 2 | MM-PBSA/GBSA binding free energy calculation protocol for GROMACS Copilot 3 | """ 4 | 5 | import os 6 | import logging 7 | from typing import Dict, Any, Optional, List 8 | 9 | from gromacs_copilot.protocols.base import BaseProtocol 10 | from gromacs_copilot.utils.shell import check_command_exists, run_shell_command 11 | 12 | 13 | class MMPBSAProtocol(BaseProtocol): 14 | """Protocol for MM-PBSA/GBSA binding free energy calculations""" 15 | 16 | def __init__(self, workspace: str = "./md_workspace", gmx_bin: str = "gmx"): 17 | """ 18 | Initialize the MM-PBSA protocol 19 | 20 | Args: 21 | workspace: Directory to use as the working directory 22 | """ 23 | super().__init__(workspace) 24 | 25 | # Initialize MM-PBSA specific attributes 26 | self.trajectory_file = None 27 | self.topology_file = None 28 | self.index_file = None 29 | self.protein_group = None 30 | self.ligand_group = None 31 | self.complex_group = None 32 | self.mmpbsa_dir = os.path.join(workspace, "mmpbsa") 33 | self.gmx_bin = gmx_bin 34 | 35 | # Create MM-PBSA directory if it doesn't exist 36 | if not os.path.exists(self.mmpbsa_dir): 37 | os.makedirs(self.mmpbsa_dir) 38 | 39 | logging.info(f"MM-PBSA protocol initialized with workspace: {self.workspace}") 40 | 41 | def get_state(self) -> Dict[str, Any]: 42 | """ 43 | Get the current state of the protocol 44 | 45 | Returns: 46 | Dictionary with protocol state information 47 | """ 48 | try: 49 | mmpbsa_files = [] 50 | if os.path.exists(self.mmpbsa_dir): 51 | mmpbsa_files = os.listdir(self.mmpbsa_dir) 52 | 53 | return { 54 | "success": True, 55 | "workspace_path": self.workspace, 56 | "mmpbsa_directory": self.mmpbsa_dir, 57 | "trajectory_file": self.trajectory_file, 58 | "topology_file": self.topology_file, 59 | "index_file": self.index_file, 60 | "protein_group": self.protein_group, 61 | "ligand_group": self.ligand_group, 62 | "complex_group": self.complex_group, 63 | "mmpbsa_files": mmpbsa_files 64 | } 65 | except Exception as e: 66 | logging.error(f"Error getting MM-PBSA state: {str(e)}") 67 | return { 68 | "success": False, 69 | "error": str(e), 70 | "workspace_path": self.workspace 71 | } 72 | 73 | def check_mmpbsa_prerequisites(self) -> Dict[str, Any]: 74 | """ 75 | Check if prerequisites for MM-PBSA analysis are met 76 | 77 | Returns: 78 | Dictionary with prerequisite check information 79 | """ 80 | # Check GROMACS installation 81 | gromacs_result = run_shell_command(f"{self.gmx_bin} --version", capture_output=True) 82 | gromacs_installed = gromacs_result["success"] 83 | 84 | # Check gmx_MMPBSA installation 85 | gmx_mmpbsa_installed = check_command_exists("gmx_MMPBSA") 86 | 87 | # Check for required files 88 | required_files = ["md.tpr", "md.xtc"] 89 | missing_files = [file for file in required_files if not os.path.exists(os.path.join(self.workspace, file))] 90 | 91 | if missing_files: 92 | return { 93 | "success": False, 94 | "installed": { 95 | "gromacs": gromacs_installed, 96 | "gmx_mmpbsa": gmx_mmpbsa_installed 97 | }, 98 | "missing_files": missing_files, 99 | "error": f"Missing required files: {', '.join(missing_files)}" 100 | } 101 | 102 | # Set file paths if all required files exist 103 | self.trajectory_file = "md.xtc" 104 | self.topology_file = "md.tpr" 105 | 106 | return { 107 | "success": True, 108 | "installed": { 109 | "gromacs": gromacs_installed, 110 | "gmx_mmpbsa": gmx_mmpbsa_installed 111 | } 112 | } 113 | 114 | def create_mmpbsa_index_file(self, protein_selection: str = "Protein", 115 | ligand_selection: str = "LIG") -> Dict[str, Any]: 116 | """ 117 | Create index file for MM-PBSA analysis 118 | 119 | Args: 120 | protein_selection: Selection for protein group 121 | ligand_selection: Selection for ligand group 122 | 123 | Returns: 124 | Dictionary with result information 125 | """ 126 | if not os.path.exists(os.path.join(self.workspace, "md.tpr")): 127 | return { 128 | "success": False, 129 | "error": "Topology file not found" 130 | } 131 | 132 | # Create index file with protein and ligand groups 133 | cmd = f"""echo -e "name {protein_selection}\\nname {ligand_selection}\\n\\nq" | gmx make_ndx -f md.tpr -o mmpbsa/mmpbsa.ndx""" 134 | result = self.run_shell_command(cmd) 135 | 136 | if not result["success"]: 137 | return { 138 | "success": False, 139 | "error": f"Failed to create index file: {result['stderr']}" 140 | } 141 | 142 | # Get group numbers from the index file 143 | groups_cmd = "grep '\\[' mmpbsa/mmpbsa.ndx | grep -n '\\[' | awk '{print $1, $2, $3}'" 144 | groups_result = self.run_shell_command(groups_cmd) 145 | 146 | if not groups_result["success"]: 147 | return { 148 | "success": False, 149 | "error": f"Failed to extract group numbers: {groups_result['stderr']}" 150 | } 151 | 152 | # Parse the group numbers from output 153 | try: 154 | lines = groups_result["stdout"].strip().split('\n') 155 | group_dict = {} 156 | 157 | for line in lines: 158 | if ':' in line: 159 | parts = line.split(':') 160 | if len(parts) >= 2: 161 | group_num = int(parts[0]) - 1 # Adjust for 0-based indexing 162 | group_name = parts[1].strip() 163 | group_dict[group_name] = group_num 164 | 165 | # Find protein and ligand groups 166 | # protein_group = None 167 | # ligand_group = None 168 | # complex_group = None 169 | 170 | # for group_name, group_num in group_dict.items(): 171 | # if protein_selection in group_name: 172 | # protein_group = group_num 173 | # if ligand_selection in group_name: 174 | # ligand_group = group_num 175 | # if f"{protein_selection} | {ligand_selection}" in group_name: 176 | # complex_group = group_num 177 | 178 | # if protein_group is None or ligand_group is None or complex_group is None: 179 | # return { 180 | # "success": False, 181 | # "error": f"Could not identify protein, ligand, or complex groups in index file" 182 | # } 183 | 184 | # self.index_file = "mmpbsa/mmpbsa.ndx" 185 | # self.protein_group = protein_group 186 | # self.ligand_group = ligand_group 187 | # self.complex_group = complex_group 188 | group_dict["success"] = True 189 | return group_dict 190 | 191 | except Exception as e: 192 | return { 193 | "success": False, 194 | "error": f"Error parsing group numbers: {str(e)}" 195 | } 196 | 197 | def create_mmpbsa_input(self, method: str = "pb", 198 | startframe: int = 1, 199 | endframe: int = 1000, 200 | interval: int = 10, 201 | ionic_strength: float = 0.15, 202 | with_entropy: bool = False) -> Dict[str, Any]: 203 | """ 204 | Create input file for MM-PBSA/GBSA calculation 205 | 206 | Args: 207 | method: Method to use (pb or gb) 208 | startframe: First frame to analyze 209 | endframe: Last frame to analyze 210 | interval: Interval between frames 211 | ionic_strength: Ionic strength for PB calculation 212 | with_entropy: Whether to include entropy calculation 213 | 214 | Returns: 215 | Dictionary with result information 216 | """ 217 | try: 218 | mmpbsa_input = "&general\n" 219 | mmpbsa_input += f" sys_name = Protein_Ligand\n" 220 | mmpbsa_input += f" startframe = {startframe}\n" 221 | mmpbsa_input += f" endframe = {endframe}\n" 222 | mmpbsa_input += f" interval = {interval}\n" 223 | 224 | if with_entropy: 225 | mmpbsa_input += " entropy = 1\n" 226 | mmpbsa_input += " entropy_seg = 25\n" # Number of frames for entropy calculation 227 | 228 | mmpbsa_input += "/\n\n" 229 | 230 | if method.lower() == "pb": 231 | mmpbsa_input += "&pb\n" 232 | mmpbsa_input += f" istrng = {ionic_strength}\n" 233 | mmpbsa_input += " fillratio = 4.0\n" 234 | mmpbsa_input += " inp = 2\n" 235 | mmpbsa_input += " radiopt = 0\n" 236 | mmpbsa_input += "/\n" 237 | elif method.lower() == "gb": 238 | mmpbsa_input += "&gb\n" 239 | mmpbsa_input += f" saltcon = {ionic_strength}\n" 240 | mmpbsa_input += " igb = 5\n" # GB model (5 = OBC2) 241 | mmpbsa_input += "/\n" 242 | 243 | input_file_path = os.path.join(self.mmpbsa_dir, "mmpbsa.in") 244 | with open(input_file_path, "w") as f: 245 | f.write(mmpbsa_input) 246 | 247 | return { 248 | "success": True, 249 | "input_file": input_file_path, 250 | "method": method, 251 | "startframe": startframe, 252 | "endframe": endframe, 253 | "interval": interval, 254 | "with_entropy": with_entropy 255 | } 256 | 257 | except Exception as e: 258 | return { 259 | "success": False, 260 | "error": f"Error creating MM-PBSA input file: {str(e)}" 261 | } 262 | 263 | def run_mmpbsa_calculation(self, 264 | ligand_mol_file: str, 265 | index_file: str, 266 | topology_file: str, 267 | protein_group: str, 268 | ligand_group: str, 269 | trajectory_file: str, 270 | overwrite: bool = True, 271 | verbose: bool = True) -> Dict[str, Any]: 272 | """ 273 | Run MM-PBSA/GBSA calculation 274 | 275 | Args: 276 | overwrite: Whether to overwrite existing output files 277 | verbose: Whether to print verbose output 278 | 279 | Returns: 280 | Dictionary with result information 281 | """ 282 | if not index_file or not os.path.exists(os.path.join(self.workspace, index_file)): 283 | return { 284 | "success": False, 285 | "error": "Index file not found" 286 | } 287 | 288 | input_file = os.path.join(self.mmpbsa_dir, "mmpbsa.in") 289 | if not os.path.exists(input_file): 290 | return { 291 | "success": False, 292 | "error": "MM-PBSA input file not found. Run create_mmpbsa_input() first." 293 | } 294 | 295 | # Run gmx_MMPBSA 296 | overwrite_flag = "-O" if overwrite else "" 297 | # verbose_flag = "--verbose" if verbose else "" 298 | 299 | cmd = f"cd {self.workspace} && gmx_MMPBSA {overwrite_flag} -i {input_file} -cs {topology_file} -ci {index_file} -cg {protein_group} {ligand_group} -ct {trajectory_file} -lm {ligand_mol_file} -o {self.mmpbsa_dir}/FINAL_RESULTS_MMPBSA.dat -nogui" 300 | 301 | result = self.run_shell_command(cmd) 302 | 303 | if not result["success"]: 304 | return { 305 | "success": False, 306 | "error": f"MM-PBSA calculation failed: {result['stderr']}" 307 | } 308 | 309 | # Check if output file exists 310 | final_results = os.path.join(self.mmpbsa_dir, "FINAL_RESULTS_MMPBSA.dat") 311 | if not os.path.exists(final_results): 312 | return { 313 | "success": False, 314 | "error": "MM-PBSA calculation did not produce expected output file" 315 | } 316 | 317 | return { 318 | "success": True, 319 | "results_file": final_results, 320 | "output_dir": self.mmpbsa_dir 321 | } 322 | 323 | def check_prerequisites(self): 324 | pass 325 | 326 | def parse_mmpbsa_results(self) -> Dict[str, Any]: 327 | """ 328 | Parse MM-PBSA/GBSA results 329 | 330 | Returns: 331 | Dictionary with parsed results 332 | """ 333 | final_results = os.path.join(self.mmpbsa_dir, "results_FINAL_RESULTS_MMPBSA.dat") 334 | if not os.path.exists(final_results): 335 | return { 336 | "success": False, 337 | "error": "MM-PBSA results file not found" 338 | } 339 | 340 | try: 341 | # Read results file 342 | with open(final_results, "r") as f: 343 | lines = f.readlines() 344 | 345 | # Parse results 346 | results = {} 347 | data_block = False 348 | 349 | for line in lines: 350 | line = line.strip() 351 | 352 | # Skip empty lines and headers 353 | if not line or line.startswith("***") or line.startswith("==="): 354 | continue 355 | 356 | # Start data block 357 | if line.startswith("DELTA TOTAL"): 358 | data_block = True 359 | continue 360 | 361 | if data_block and ":" in line: 362 | parts = line.split(":") 363 | if len(parts) >= 2: 364 | key = parts[0].strip() 365 | value_parts = parts[1].strip().split() 366 | 367 | if len(value_parts) >= 3: 368 | mean = float(value_parts[0]) 369 | std = float(value_parts[1]) 370 | std_err = float(value_parts[2]) 371 | 372 | results[key] = { 373 | "mean": mean, 374 | "std": std, 375 | "std_err": std_err 376 | } 377 | 378 | # Extract binding energy components 379 | binding_energy = results.get("DELTA TOTAL", {}).get("mean", 0) 380 | van_der_waals = results.get("VDWAALS", {}).get("mean", 0) 381 | electrostatic = results.get("EEL", {}).get("mean", 0) 382 | polar_solvation = results.get("EGB/EPB", {}).get("mean", 0) 383 | non_polar_solvation = results.get("ESURF", {}).get("mean", 0) 384 | 385 | return { 386 | "success": True, 387 | "binding_energy": binding_energy, 388 | "components": { 389 | "van_der_waals": van_der_waals, 390 | "electrostatic": electrostatic, 391 | "polar_solvation": polar_solvation, 392 | "non_polar_solvation": non_polar_solvation 393 | }, 394 | "detailed_results": results 395 | } 396 | 397 | except Exception as e: 398 | return { 399 | "success": False, 400 | "error": f"Error parsing MM-PBSA results: {str(e)}" 401 | } 402 | 403 | # def run_full_mmpbsa_analysis(self, 404 | # protein_selection: str = "Protein", 405 | # ligand_selection: str = "LIG", 406 | # method: str = "pb", 407 | # startframe: int = 1, 408 | # endframe: int = 1000, 409 | # interval: int = 10, 410 | # ionic_strength: float = 0.15, 411 | # with_entropy: bool = False) -> Dict[str, Any]: 412 | # """ 413 | # Run full MM-PBSA/GBSA analysis workflow 414 | 415 | # Args: 416 | # protein_selection: Selection for protein group 417 | # ligand_selection: Selection for ligand group 418 | # method: Method to use (pb or gb) 419 | # startframe: First frame to analyze 420 | # endframe: Last frame to analyze 421 | # interval: Interval between frames 422 | # ionic_strength: Ionic strength for calculation 423 | # with_entropy: Whether to include entropy calculation 424 | 425 | # Returns: 426 | # Dictionary with result information 427 | # """ 428 | # # Check prerequisites 429 | # prereq_result = self.check_prerequisites() 430 | # if not prereq_result["success"]: 431 | # return prereq_result 432 | 433 | # if not prereq_result["installed"]["gmx_mmpbsa"]: 434 | # return { 435 | # "success": False, 436 | # "error": "gmx_MMPBSA is not installed. Please install it with: conda install -c conda-forge gmx_mmpbsa" 437 | # } 438 | 439 | # # Create index file 440 | # index_result = self.create_index_file(protein_selection, ligand_selection) 441 | # if not index_result["success"]: 442 | # return index_result 443 | 444 | # # Create MM-PBSA input file 445 | # input_result = self.create_mmpbsa_input( 446 | # method=method, 447 | # startframe=startframe, 448 | # endframe=endframe, 449 | # interval=interval, 450 | # ionic_strength=ionic_strength, 451 | # with_entropy=with_entropy 452 | # ) 453 | # if not input_result["success"]: 454 | # return input_result 455 | 456 | # # Run MM-PBSA calculation 457 | # calc_result = self.run_mmpbsa_calculation() 458 | # if not calc_result["success"]: 459 | # return calc_result 460 | 461 | # # Parse results 462 | # parse_result = self.parse_results() 463 | # if not parse_result["success"]: 464 | # return parse_result 465 | 466 | # return { 467 | # "success": True, 468 | # "binding_energy": parse_result["binding_energy"], 469 | # "components": parse_result["components"], 470 | # "detailed_results": parse_result["detailed_results"], 471 | # "results_file": calc_result["results_file"], 472 | # "method": method, 473 | # "with_entropy": with_entropy 474 | # } -------------------------------------------------------------------------------- /gromacs_copilot/protocols/protein.py: -------------------------------------------------------------------------------- 1 | """ 2 | Protein simulation protocol for GROMACS Copilot 3 | """ 4 | 5 | import os 6 | import time 7 | import logging 8 | from typing import Dict, Any, Optional, List 9 | 10 | from gromacs_copilot.protocols.base import BaseProtocol 11 | from gromacs_copilot.core.enums import SimulationStage 12 | from gromacs_copilot.config import FORCE_FIELDS 13 | 14 | 15 | class ProteinProtocol(BaseProtocol): 16 | """Protocol for protein-only simulations""" 17 | 18 | def __init__(self, workspace: str = "./md_workspace", gmx_bin: str = "gmx"): 19 | """ 20 | Initialize the protein simulation protocol 21 | 22 | Args: 23 | workspace: Directory to use as the working directory 24 | """ 25 | super().__init__(workspace) 26 | 27 | # Initialize protein-specific attributes 28 | self.protein_file = None 29 | self.topology_file = None 30 | self.box_file = None 31 | self.solvated_file = None 32 | self.minimized_file = None 33 | self.equilibrated_file = None 34 | self.production_file = None 35 | self.gmx_bin = gmx_bin 36 | 37 | logging.info(f"Protein protocol initialized with workspace: {self.workspace}") 38 | 39 | def get_state(self) -> Dict[str, Any]: 40 | """ 41 | Get the current state of the protocol 42 | 43 | Returns: 44 | Dictionary with protocol state information 45 | """ 46 | try: 47 | files = os.listdir(self.workspace) 48 | 49 | # Get file sizes and modification times 50 | file_info = [] 51 | for file in files: 52 | file_path = os.path.join(self.workspace, file) 53 | if os.path.isfile(file_path): 54 | stats = os.stat(file_path) 55 | file_info.append({ 56 | "name": file, 57 | "size_bytes": stats.st_size, 58 | "modified": time.ctime(stats.st_mtime), 59 | "is_directory": False 60 | }) 61 | elif os.path.isdir(file_path): 62 | file_info.append({ 63 | "name": file, 64 | "is_directory": True, 65 | "modified": time.ctime(os.path.getmtime(file_path)) 66 | }) 67 | 68 | return { 69 | "success": True, 70 | "workspace_path": self.workspace, 71 | "current_stage": self.stage.name, 72 | "files": file_info, 73 | "protein_file": self.protein_file, 74 | "topology_file": self.topology_file, 75 | "box_file": self.box_file, 76 | "solvated_file": self.solvated_file, 77 | "minimized_file": self.minimized_file, 78 | "equilibrated_file": self.equilibrated_file, 79 | "production_file": self.production_file 80 | } 81 | except Exception as e: 82 | logging.error(f"Error getting protocol state: {str(e)}") 83 | return { 84 | "success": False, 85 | "error": str(e), 86 | "workspace_path": self.workspace, 87 | "current_stage": self.stage.name 88 | } 89 | 90 | def check_prerequisites(self) -> Dict[str, Any]: 91 | """ 92 | Check if GROMACS is installed and available 93 | 94 | Returns: 95 | Dictionary with prerequisite check information 96 | """ 97 | result = self.run_shell_command(f"{self.gmx_bin} --version", capture_output=True) 98 | 99 | if result["success"]: 100 | version_info = result["stdout"].strip() 101 | return { 102 | "success": True, 103 | "installed": True, 104 | "version": version_info 105 | } 106 | else: 107 | return { 108 | "success": False, 109 | "installed": False, 110 | "error": "GROMACS is not installed or not in PATH" 111 | } 112 | 113 | def set_protein_file(self, file_path: str) -> Dict[str, Any]: 114 | """ 115 | Set and prepare the protein file for simulation 116 | 117 | Args: 118 | file_path: Path to the protein structure file (PDB or GRO) 119 | 120 | Returns: 121 | Dictionary with result information 122 | """ 123 | if not os.path.exists(file_path): 124 | return { 125 | "success": False, 126 | "error": f"Protein file not found: {file_path}" 127 | } 128 | 129 | # Copy the protein file to the workspace if it's not already there 130 | basename = os.path.basename(file_path) 131 | self.protein_file = basename 132 | 133 | if os.path.abspath(file_path) != os.path.join(self.workspace, basename): 134 | copy_result = self.run_shell_command(f"cp {file_path} {self.workspace}/") 135 | if not copy_result["success"]: 136 | return { 137 | "success": False, 138 | "error": f"Failed to copy protein file to workspace: {copy_result['stderr']}" 139 | } 140 | 141 | # Create directories for topologies 142 | mkdir_result = self.run_shell_command("mkdir -p topologies") 143 | 144 | return { 145 | "success": True, 146 | "protein_file": self.protein_file, 147 | "file_path": os.path.join(self.workspace, self.protein_file) 148 | } 149 | 150 | def generate_topology(self, force_field: str, water_model: str = "spc") -> Dict[str, Any]: 151 | """ 152 | Generate topology for the protein 153 | 154 | Args: 155 | force_field: Name of the force field to use 156 | water_model: Water model to use 157 | 158 | Returns: 159 | Dictionary with result information 160 | """ 161 | if not self.protein_file: 162 | return { 163 | "success": False, 164 | "error": "No protein file has been set" 165 | } 166 | 167 | # Map user-friendly force field names to GROMACS internal names 168 | if force_field not in FORCE_FIELDS: 169 | return { 170 | "success": False, 171 | "error": f"Unknown force field: {force_field}. Available options: {list(FORCE_FIELDS.keys())}" 172 | } 173 | 174 | ff_name = FORCE_FIELDS[force_field] 175 | 176 | # Generate topology 177 | cmd = f"{self.gmx_bin} pdb2gmx -f {self.protein_file} -o protein.gro -p topology.top -i posre.itp -ff {ff_name} -water {water_model}" 178 | result = self.run_shell_command(cmd) 179 | 180 | if not result["success"]: 181 | return { 182 | "success": False, 183 | "error": f"Failed to generate topology: {result['stderr']}" 184 | } 185 | 186 | self.topology_file = "topology.top" 187 | self.box_file = "protein.gro" 188 | 189 | return { 190 | "success": True, 191 | "topology_file": self.topology_file, 192 | "box_file": self.box_file, 193 | "force_field": force_field, 194 | "water_model": water_model 195 | } 196 | 197 | def define_simulation_box(self, distance: float = 1.0, box_type: str = "cubic") -> Dict[str, Any]: 198 | """ 199 | Define the simulation box 200 | 201 | Args: 202 | distance: Minimum distance between protein and box edge (nm) 203 | box_type: Type of box (cubic, dodecahedron, octahedron) 204 | 205 | Returns: 206 | Dictionary with result information 207 | """ 208 | if not self.box_file: 209 | return { 210 | "success": False, 211 | "error": "No protein structure file has been processed" 212 | } 213 | 214 | cmd = f"{self.gmx_bin} editconf -f {self.box_file} -o box.gro -c -d {distance} -bt {box_type}" 215 | result = self.run_shell_command(cmd) 216 | 217 | if not result["success"]: 218 | return { 219 | "success": False, 220 | "error": f"Failed to define simulation box: {result['stderr']}" 221 | } 222 | 223 | self.box_file = "box.gro" 224 | 225 | return { 226 | "success": True, 227 | "box_file": self.box_file, 228 | "distance": distance, 229 | "box_type": box_type 230 | } 231 | 232 | def solvate_system(self) -> Dict[str, Any]: 233 | """ 234 | Solvate the protein in water 235 | 236 | Returns: 237 | Dictionary with result information 238 | """ 239 | if not self.box_file or not self.topology_file: 240 | return { 241 | "success": False, 242 | "error": "Box file or topology file not defined" 243 | } 244 | 245 | cmd = f"{self.gmx_bin} solvate -cp {self.box_file} -cs spc216.gro -o solvated.gro -p {self.topology_file}" 246 | result = self.run_shell_command(cmd) 247 | 248 | if not result["success"]: 249 | return { 250 | "success": False, 251 | "error": f"Failed to solvate the protein: {result['stderr']}" 252 | } 253 | 254 | self.solvated_file = "solvated.gro" 255 | 256 | return { 257 | "success": True, 258 | "solvated_file": self.solvated_file 259 | } 260 | 261 | def add_ions(self, concentration: float = .15, neutral: bool = True) -> Dict[str, Any]: 262 | """ 263 | Add ions to the solvated system 264 | 265 | Args: 266 | concentration: Salt concentration in M 267 | neutral: Whether to neutralize the system 268 | 269 | Returns: 270 | Dictionary with result information 271 | """ 272 | if not self.solvated_file or not self.topology_file: 273 | return { 274 | "success": False, 275 | "error": "Solvated file or topology file not defined" 276 | } 277 | 278 | # Create ions.mdp file 279 | ions_mdp = self.create_mdp_file("ions") 280 | if not ions_mdp["success"]: 281 | return ions_mdp 282 | 283 | # Prepare for adding ions 284 | cmd = f"{self.gmx_bin} grompp -f ions.mdp -c {self.solvated_file} -p {self.topology_file} -o ions.tpr" 285 | result = self.run_shell_command(cmd) 286 | 287 | if not result["success"]: 288 | return { 289 | "success": False, 290 | "error": f"Failed to prepare for adding ions: {result['stderr']}" 291 | } 292 | 293 | # Add ions 294 | neutral_flag = "-neutral" if neutral else "" 295 | cmd = f"echo 'SOL' | {self.gmx_bin} genion -s ions.tpr -o solvated_ions.gro -p {self.topology_file} -pname NA -nname CL {neutral_flag} -conc {concentration}" 296 | result = self.run_shell_command(cmd) 297 | 298 | if not result["success"]: 299 | return { 300 | "success": False, 301 | "error": f"Failed to add ions: {result['stderr']}" 302 | } 303 | 304 | self.solvated_file = "solvated_ions.gro" 305 | 306 | return { 307 | "success": True, 308 | "solvated_file": self.solvated_file, 309 | "concentration": concentration, 310 | "neutral": neutral 311 | } 312 | 313 | def run_energy_minimization(self) -> Dict[str, Any]: 314 | """ 315 | Run energy minimization 316 | 317 | Returns: 318 | Dictionary with result information 319 | """ 320 | if not self.solvated_file or not self.topology_file: 321 | return { 322 | "success": False, 323 | "error": "Solvated file or topology file not defined" 324 | } 325 | 326 | # Create em.mdp file 327 | em_mdp = self.create_mdp_file("em") 328 | if not em_mdp["success"]: 329 | return em_mdp 330 | 331 | # Generate tpr file for minimization 332 | cmd = f"{self.gmx_bin} grompp -f em.mdp -c {self.solvated_file} -p {self.topology_file} -o em.tpr" 333 | result = self.run_shell_command(cmd) 334 | 335 | if not result["success"]: 336 | return { 337 | "success": False, 338 | "error": f"Failed to prepare energy minimization: {result['stderr']}" 339 | } 340 | 341 | # Run energy minimization 342 | cmd = f"{self.gmx_bin} mdrun -v -deffnm em" 343 | result = self.run_shell_command(cmd) 344 | 345 | if not result["success"]: 346 | # return { 347 | # "success": False, 348 | # "error": f"Energy minimization failed: {result['stderr']}" 349 | # } 350 | cmd = f"{self.gmx_bin} mdrun -ntmpi 1 -v -deffnm em" 351 | result = self.run_shell_command(cmd) 352 | if not result["success"]: 353 | return { 354 | "success": False, 355 | "error": f"Energy minimization failed: {result['stderr']}" 356 | } 357 | 358 | self.minimized_file = "em.gro" 359 | 360 | return { 361 | "success": True, 362 | "minimized_file": self.minimized_file, 363 | "log_file": "em.log", 364 | "energy_file": "em.edr" 365 | } 366 | 367 | def run_nvt_equilibration(self) -> Dict[str, Any]: 368 | """ 369 | Run NVT equilibration 370 | 371 | Returns: 372 | Dictionary with result information 373 | """ 374 | if not self.minimized_file or not self.topology_file: 375 | return { 376 | "success": False, 377 | "error": "Minimized file or topology file not defined" 378 | } 379 | 380 | # Create nvt.mdp file 381 | nvt_mdp = self.create_mdp_file("nvt") 382 | if not nvt_mdp["success"]: 383 | return nvt_mdp 384 | 385 | # Generate tpr file for NVT equilibration 386 | cmd = f"{self.gmx_bin} grompp -f nvt.mdp -c {self.minimized_file} -r {self.minimized_file} -p {self.topology_file} -o nvt.tpr" 387 | # print(f"Running command: {cmd}") 388 | result = self.run_shell_command(cmd) 389 | 390 | if not result["success"]: 391 | return { 392 | "success": False, 393 | "error": f"Failed to prepare NVT equilibration: {result['stderr']}" 394 | } 395 | 396 | # Run NVT equilibration 397 | cmd = f"{self.gmx_bin} mdrun -v -deffnm nvt" 398 | result = self.run_shell_command(cmd) 399 | 400 | if not result["success"]: 401 | # return { 402 | # "success": False, 403 | # "error": f"NVT equilibration failed: {result['stderr']}" 404 | # } 405 | cmd = f"{self.gmx_bin} mdrun -ntmpi 1 -v -deffnm nvt" 406 | result = self.run_shell_command(cmd) 407 | if not result["success"]: 408 | return { 409 | "success": False, 410 | "error": f"NVT equilibration failed: {result['stderr']}" 411 | } 412 | 413 | return { 414 | "success": True, 415 | "nvt_file": "nvt.gro", 416 | "nvt_checkpoint": "nvt.cpt", 417 | "log_file": "nvt.log", 418 | "energy_file": "nvt.edr" 419 | } 420 | 421 | def run_npt_equilibration(self) -> Dict[str, Any]: 422 | """ 423 | Run NPT equilibration 424 | 425 | Returns: 426 | Dictionary with result information 427 | """ 428 | # Create npt.mdp file 429 | npt_mdp = self.create_mdp_file("npt") 430 | if not npt_mdp["success"]: 431 | return npt_mdp 432 | 433 | # Generate tpr file for NPT equilibration 434 | cmd = f"{self.gmx_bin} grompp -f npt.mdp -c nvt.gro -r nvt.gro -t nvt.cpt -p {self.topology_file} -o npt.tpr" 435 | result = self.run_shell_command(cmd) 436 | 437 | if not result["success"]: 438 | return { 439 | "success": False, 440 | "error": f"Failed to prepare NPT equilibration: {result['stderr']}" 441 | } 442 | 443 | # Run NPT equilibration 444 | cmd = f"{self.gmx_bin} mdrun -v -deffnm npt" 445 | result = self.run_shell_command(cmd) 446 | 447 | if not result["success"]: 448 | # return { 449 | # "success": False, 450 | # "error": f"NPT equilibration failed: {result['stderr']}" 451 | # } 452 | cmd = f"{self.gmx_bin} mdrun -ntmpi 1 -v -deffnm npt" 453 | result = self.run_shell_command(cmd) 454 | if not result["success"]: 455 | return { 456 | "success": False, 457 | "error": f"NPT equilibration failed: {result['stderr']}" 458 | } 459 | 460 | self.equilibrated_file = "npt.gro" 461 | 462 | return { 463 | "success": True, 464 | "equilibrated_file": self.equilibrated_file, 465 | "npt_checkpoint": "npt.cpt", 466 | "log_file": "npt.log", 467 | "energy_file": "npt.edr" 468 | } 469 | 470 | def run_production_md(self, length_ns: float = 10.0) -> Dict[str, Any]: 471 | """ 472 | Run production MD 473 | 474 | Args: 475 | length_ns: Length of the simulation in nanoseconds 476 | 477 | Returns: 478 | Dictionary with result information 479 | """ 480 | if not self.equilibrated_file or not self.topology_file: 481 | return { 482 | "success": False, 483 | "error": "Equilibrated file or topology file not defined" 484 | } 485 | 486 | # Calculate number of steps (2 fs timestep) 487 | nsteps = int(length_ns * 1000000 / 2) 488 | 489 | # Create md.mdp file with custom steps 490 | md_mdp = self.create_mdp_file("md", {"nsteps": nsteps}) 491 | if not md_mdp["success"]: 492 | return md_mdp 493 | 494 | # Generate tpr file for production MD 495 | cmd = f"{self.gmx_bin} grompp -f md.mdp -c {self.equilibrated_file} -t npt.cpt -p {self.topology_file} -o md.tpr" 496 | result = self.run_shell_command(cmd) 497 | 498 | if not result["success"]: 499 | return { 500 | "success": False, 501 | "error": f"Failed to prepare production MD: {result['stderr']}" 502 | } 503 | 504 | # Run production MD 505 | cmd = f"{self.gmx_bin} mdrun -v -deffnm md" 506 | result = self.run_shell_command(cmd) 507 | 508 | if not result["success"]: 509 | # return { 510 | # "success": False, 511 | # "error": f"Production MD failed: {result['stderr']}" 512 | # } 513 | cmd = f"{self.gmx_bin} mdrun -ntmpi 1 -v -deffnm md" 514 | result = self.run_shell_command(cmd) 515 | if not result["success"]: 516 | return { 517 | "success": False, 518 | "error": f"Production MD failed: {result['stderr']}" 519 | } 520 | 521 | self.production_file = "md.gro" 522 | 523 | return { 524 | "success": True, 525 | "production_file": self.production_file, 526 | "trajectory_file": "md.xtc", 527 | "log_file": "md.log", 528 | "energy_file": "md.edr", 529 | "length_ns": length_ns 530 | } 531 | 532 | def analyze_rmsd(self) -> Dict[str, Any]: 533 | """ 534 | Perform RMSD analysis 535 | 536 | Returns: 537 | Dictionary with result information 538 | """ 539 | # Create analysis directory if it doesn't exist 540 | mkdir_result = self.run_shell_command("mkdir -p analysis") 541 | 542 | cmd = f"echo 'Protein Protein' | {self.gmx_bin} rms -s md.tpr -f md.xtc -o analysis/rmsd.xvg -tu ns" 543 | result = self.run_shell_command(cmd) 544 | 545 | if not result["success"]: 546 | return { 547 | "success": False, 548 | "error": f"RMSD analysis failed: {result['stderr']}" 549 | } 550 | 551 | return { 552 | "success": True, 553 | "output_file": "analysis/rmsd.xvg", 554 | "analysis_type": "RMSD" 555 | } 556 | 557 | def analyze_rmsf(self) -> Dict[str, Any]: 558 | """ 559 | Perform RMSF analysis 560 | 561 | Returns: 562 | Dictionary with result information 563 | """ 564 | # Create analysis directory if it doesn't exist 565 | mkdir_result = self.run_shell_command("mkdir -p analysis") 566 | 567 | cmd = f"echo 'C-alpha' | {self.gmx_bin} rmsf -s md.tpr -f md.xtc -o analysis/rmsf.xvg -res" 568 | result = self.run_shell_command(cmd) 569 | 570 | if not result["success"]: 571 | return { 572 | "success": False, 573 | "error": f"RMSF analysis failed: {result['stderr']}" 574 | } 575 | 576 | return { 577 | "success": True, 578 | "output_file": "analysis/rmsf.xvg", 579 | "analysis_type": "RMSF" 580 | } 581 | 582 | def analyze_gyration(self) -> Dict[str, Any]: 583 | """ 584 | Perform radius of gyration analysis 585 | 586 | Returns: 587 | Dictionary with result information 588 | """ 589 | # Create analysis directory if it doesn't exist 590 | mkdir_result = self.run_shell_command("mkdir -p analysis") 591 | 592 | cmd = f"echo 'Protein' | {self.gmx_bins} gyrate -s md.tpr -f md.xtc -o analysis/gyrate.xvg" 593 | result = self.run_shell_command(cmd) 594 | 595 | if not result["success"]: 596 | return { 597 | "success": False, 598 | "error": f"Radius of gyration analysis failed: {result['stderr']}" 599 | } 600 | 601 | return { 602 | "success": True, 603 | "output_file": "analysis/gyrate.xvg", 604 | "analysis_type": "Radius of Gyration" 605 | } -------------------------------------------------------------------------------- /gromacs_copilot/protocols/protein_ligand.py: -------------------------------------------------------------------------------- 1 | """ 2 | Protein-ligand simulation protocol for GROMACS Copilot 3 | """ 4 | 5 | import os 6 | import logging 7 | from typing import Dict, Any, Optional, List 8 | 9 | from gromacs_copilot.protocols.protein import ProteinProtocol 10 | from gromacs_copilot.core.enums import SimulationStage 11 | from gromacs_copilot.config import FORCE_FIELDS, STANDARD_RESIDUES 12 | from gromacs_copilot.utils.shell import check_command_exists 13 | 14 | 15 | class ProteinLigandProtocol(ProteinProtocol): 16 | """Protocol for protein-ligand simulations""" 17 | 18 | def __init__(self, workspace: str = "./md_workspace", gmx_bin: str = "gmx"): 19 | """ 20 | Initialize the protein-ligand simulation protocol 21 | 22 | Args: 23 | workspace: Directory to use as the working directory 24 | """ 25 | super().__init__(workspace) 26 | 27 | # Initialize protein-ligand specific attributes 28 | self.ligand_file = None 29 | self.ligand_name = None 30 | self.complex_file = None 31 | self.has_ligand = False 32 | self.index_file = None 33 | self.gmx_bin = gmx_bin 34 | 35 | logging.info(f"Protein-ligand protocol initialized with workspace: {self.workspace}") 36 | 37 | def get_state(self) -> Dict[str, Any]: 38 | """ 39 | Get the current state of the protocol 40 | 41 | Returns: 42 | Dictionary with protocol state information 43 | """ 44 | # Get base state from parent class 45 | state = super().get_state() 46 | 47 | # Add protein-ligand specific information 48 | if state["success"]: 49 | state.update({ 50 | "ligand_file": self.ligand_file, 51 | "ligand_name": self.ligand_name, 52 | "complex_file": self.complex_file, 53 | "has_ligand": self.has_ligand, 54 | "index_file": self.index_file 55 | }) 56 | 57 | return state 58 | 59 | def check_prerequisites(self) -> Dict[str, Any]: 60 | """ 61 | Check if prerequisites for protein-ligand simulation are met 62 | 63 | Returns: 64 | Dictionary with prerequisite check information 65 | """ 66 | # Check GROMACS installation 67 | gromacs_check = super().check_prerequisites() 68 | if not gromacs_check["success"]: 69 | return gromacs_check 70 | 71 | # Check OpenBabel installation 72 | openbabel_installed = check_command_exists("obabel") 73 | 74 | # Check ACPYPE installation 75 | acpype_installed = check_command_exists("acpype") 76 | 77 | return { 78 | "success": gromacs_check["success"], 79 | "gromacs": gromacs_check, 80 | "openbabel": { 81 | "installed": openbabel_installed, 82 | "required": True 83 | }, 84 | "acpype": { 85 | "installed": acpype_installed, 86 | "required": True 87 | } 88 | } 89 | 90 | def check_for_ligands(self, pdb_file: str) -> Dict[str, Any]: 91 | """ 92 | Check for potential ligands in the PDB file 93 | 94 | Args: 95 | pdb_file: Path to the PDB file 96 | 97 | Returns: 98 | Dictionary with ligand information 99 | """ 100 | try: 101 | # Extract unique residue names from the PDB file that aren't standard amino acids or water 102 | cmd = f"grep '^ATOM\\|^HETATM' {pdb_file} | awk '{{print $4}}' | sort | uniq" 103 | result = self.run_shell_command(cmd) 104 | 105 | if not result["success"]: 106 | return { 107 | "success": False, 108 | "error": f"Failed to analyze PDB file: {result['stderr']}" 109 | } 110 | 111 | # Extract potential ligands (non-standard residues) 112 | residues = result["stdout"].strip().split() 113 | potential_ligands = [res for res in residues if res not in STANDARD_RESIDUES] 114 | 115 | return { 116 | "success": True, 117 | "ligands": potential_ligands 118 | } 119 | 120 | except Exception as e: 121 | return { 122 | "success": False, 123 | "error": f"Error checking for ligands: {str(e)}" 124 | } 125 | 126 | def set_ligand(self, ligand_name: str) -> Dict[str, Any]: 127 | """ 128 | Set the ligand for simulation 129 | 130 | Args: 131 | ligand_name: Residue name of the ligand in the PDB file 132 | 133 | Returns: 134 | Dictionary with result information 135 | """ 136 | if not self.protein_file: 137 | return { 138 | "success": False, 139 | "error": "No protein file has been set" 140 | } 141 | 142 | self.ligand_name = ligand_name 143 | 144 | # Create directory structure for protein-ligand preparation 145 | mkdir_cmd = "mkdir -p param/receptor param/ligand" 146 | mkdir_result = self.run_shell_command(mkdir_cmd) 147 | if not mkdir_result["success"]: 148 | return { 149 | "success": False, 150 | "error": f"Failed to create directories: {mkdir_result['stderr']}" 151 | } 152 | 153 | # Extract protein atoms to receptor.pdb 154 | extract_protein_cmd = f"grep '^ATOM' {self.protein_file} > param/receptor/receptor.pdb" 155 | protein_result = self.run_shell_command(extract_protein_cmd) 156 | if not protein_result["success"]: 157 | return { 158 | "success": False, 159 | "error": f"Failed to extract protein atoms: {protein_result['stderr']}" 160 | } 161 | 162 | # Extract ligand using Python to handle renaming 163 | extract_result = self.extract_ligand(os.path.join(self.workspace, self.protein_file), ligand_name) 164 | if not extract_result["success"]: 165 | return extract_result 166 | 167 | self.ligand_file = "param/ligand/ligand.pdb" 168 | self.has_ligand = True 169 | 170 | return { 171 | "success": True, 172 | "ligand_name": ligand_name, 173 | "ligand_file": self.ligand_file, 174 | "receptor_file": "param/receptor/receptor.pdb" 175 | } 176 | 177 | def extract_ligand(self, pdb_file: str, ligand_name: str) -> Dict[str, Any]: 178 | """ 179 | Extract ligand from PDB file and rename it to LIG 180 | 181 | Args: 182 | pdb_file: Path to the PDB file 183 | ligand_name: Residue name of the ligand 184 | 185 | Returns: 186 | Dictionary with result information 187 | """ 188 | try: 189 | # Create a Python script to extract the ligand 190 | script_content = f""" 191 | ligand_atom = [] 192 | keepLine = [] 193 | with open("{pdb_file}","r") as file: 194 | lines = file.readlines() 195 | for line in lines: 196 | if '{ligand_name}' in line[17:20]: 197 | line = line[:17]+"LIG"+line[20:] 198 | keepLine.append(line) 199 | ligand_atom.append(int(line[6:11])) 200 | elif "CONECT" in line[0:6]: 201 | idx = [int(x) for x in line.split()[1:]] 202 | if any(id in idx for id in ligand_atom): 203 | keepLine.append(line) 204 | with open("param/ligand/ligand.pdb","w") as file: 205 | for line in keepLine: 206 | file.write(line) 207 | """ 208 | with open("extract_ligand.py", "w") as f: 209 | f.write(script_content) 210 | 211 | # Run the Python script 212 | result = self.run_shell_command("python extract_ligand.py") 213 | if not result["success"]: 214 | return { 215 | "success": False, 216 | "error": f"Failed to extract ligand: {result['stderr']}" 217 | } 218 | 219 | # Clean up the temporary script 220 | os.remove("extract_ligand.py") 221 | 222 | return { 223 | "success": True, 224 | "ligand_file": "param/ligand/ligand.pdb" 225 | } 226 | 227 | except Exception as e: 228 | return { 229 | "success": False, 230 | "error": f"Error extracting ligand: {str(e)}" 231 | } 232 | 233 | def prepare_ligand_topology(self) -> Dict[str, Any]: 234 | """ 235 | Prepare ligand topology using OpenBabel and ACPYPE 236 | 237 | Returns: 238 | Dictionary with result information 239 | """ 240 | if not self.has_ligand or not self.ligand_file: 241 | return { 242 | "success": False, 243 | "error": "No ligand has been set" 244 | } 245 | 246 | # Check if OpenBabel and ACPYPE are installed 247 | prerequisites = self.check_prerequisites() 248 | if not prerequisites["openbabel"]["installed"]: 249 | return { 250 | "success": False, 251 | "error": "OpenBabel is required for ligand preparation but is not installed" 252 | } 253 | 254 | if not prerequisites["acpype"]["installed"]: 255 | return { 256 | "success": False, 257 | "error": "ACPYPE is required for ligand preparation but is not installed" 258 | } 259 | 260 | # Convert to MOL2 format with OpenBabel (adding hydrogens) 261 | babel_cmd = "cd param/ligand && obabel -ipdb ligand.pdb -omol2 -h > ligand.mol2" 262 | babel_result = self.run_shell_command(babel_cmd) 263 | if not babel_result["success"]: 264 | return { 265 | "success": False, 266 | "error": f"Failed to convert ligand to MOL2 format: {babel_result['stderr']}" 267 | } 268 | 269 | # Run ACPYPE to generate ligand topology 270 | acpype_cmd = "cd param/ligand && acpype -i ligand.mol2" 271 | acpype_result = self.run_shell_command(acpype_cmd) 272 | if not acpype_result["success"]: 273 | return { 274 | "success": False, 275 | "error": f"Failed to generate ligand topology with ACPYPE: {acpype_result['stderr']}" 276 | } 277 | 278 | # Copy necessary files to workspace 279 | copy_cmd = "cp param/ligand/ligand.acpype/ligand_GMX.itp ligand.itp" 280 | copy_result = self.run_shell_command(copy_cmd) 281 | if not copy_result["success"]: 282 | return { 283 | "success": False, 284 | "error": f"Failed to copy ligand topology: {copy_result['stderr']}" 285 | } 286 | 287 | # Generate restraints for ligand 288 | ndx_cmd = f"echo $'r LIG & !a H*\nname 3 LIG-H\nq'| {self.gmx_bin} make_ndx -f param/ligand/ligand.acpype/ligand_NEW.pdb -o lig_noh.ndx" 289 | ndx_result = self.run_shell_command(ndx_cmd) 290 | if not ndx_result["success"]: 291 | return { 292 | "success": False, 293 | "error": f"Failed to create index for ligand restraints: {ndx_result['stderr']}" 294 | } 295 | 296 | # Generate position restraints for ligand 297 | # posre_cmd = """echo "LIG-H" | gmx genrestr -f param/ligand/ligand.acpype/ligand_NEW.pdb -o posre_ligand.itp -n lig_noh.ndx -fc 1000 1000 1000""" 298 | # copying position restrained 299 | posre_cmd = "cp param/ligand/ligand.acpype/posre_ligand.itp ." 300 | posre_result = self.run_shell_command(posre_cmd) 301 | if not posre_result["success"]: 302 | return { 303 | "success": False, 304 | "error": f"Failed to generate position restraints for ligand: {posre_result['stderr']}" 305 | } 306 | 307 | # Append posre_ligand.itp include directive to ligand.itp 308 | append_cmd = '''echo ' 309 | ; Include Position restraint file 310 | #ifdef POSRES 311 | #include "posre_ligand.itp" 312 | #endif' >> ligand.itp''' 313 | append_result = self.run_shell_command(append_cmd) 314 | if not append_result["success"]: 315 | return { 316 | "success": False, 317 | "error": f"Failed to update ligand.itp with position restraints: {append_result['stderr']}" 318 | } 319 | 320 | return { 321 | "success": True, 322 | "ligand_topology": "ligand.itp", 323 | "ligand_posre": "posre_ligand.itp" 324 | } 325 | 326 | def prepare_receptor_topology(self, force_field: str, water_model: str = "spc") -> Dict[str, Any]: 327 | """ 328 | Generate topology for the receptor 329 | 330 | Args: 331 | force_field: Name of the force field to use 332 | water_model: Water model to use 333 | 334 | Returns: 335 | Dictionary with result information 336 | """ 337 | if not os.path.exists("param/receptor/receptor.pdb"): 338 | return { 339 | "success": False, 340 | "error": "Receptor file not found" 341 | } 342 | 343 | # Map user-friendly force field names to GROMACS internal names 344 | if force_field not in FORCE_FIELDS: 345 | return { 346 | "success": False, 347 | "error": f"Unknown force field: {force_field}. Available options: {list(FORCE_FIELDS.keys())}" 348 | } 349 | 350 | ff_name = FORCE_FIELDS[force_field] 351 | 352 | # Generate topology for receptor 353 | cmd = f"cd param/receptor && {self.gmx_bin} pdb2gmx -f receptor.pdb -o receptor_GMX.pdb -p topol.top -i posre.itp -ff {ff_name} -water {water_model}" 354 | result = self.run_shell_command(cmd) 355 | 356 | if not result["success"]: 357 | return { 358 | "success": False, 359 | "error": f"Failed to generate receptor topology: {result['stderr']}" 360 | } 361 | 362 | # Copy files to workspace 363 | copy_cmd = "cp param/receptor/*.itp param/receptor/topol.top ." 364 | copy_result = self.run_shell_command(copy_cmd) 365 | if not copy_result["success"]: 366 | return { 367 | "success": False, 368 | "error": f"Failed to copy receptor topology files: {copy_result['stderr']}" 369 | } 370 | 371 | return { 372 | "success": True, 373 | "receptor_topology": "topol.top" 374 | } 375 | 376 | def merge_protein_ligand(self) -> Dict[str, Any]: 377 | """ 378 | Merge protein and ligand structures and update the topology 379 | 380 | Returns: 381 | Dictionary with result information 382 | """ 383 | if not self.has_ligand: 384 | return { 385 | "success": False, 386 | "error": "No ligand has been set" 387 | } 388 | 389 | # Merge protein and ligand PDB files 390 | merge_cmd = "grep -h ATOM param/receptor/receptor_GMX.pdb param/ligand/ligand.acpype/ligand_NEW.pdb > complex.pdb" 391 | merge_result = self.run_shell_command(merge_cmd) 392 | if not merge_result["success"]: 393 | return { 394 | "success": False, 395 | "error": f"Failed to merge protein and ligand structures: {merge_result['stderr']}" 396 | } 397 | 398 | # Update topology file to include ligand 399 | update_cmd = """sed -i '/forcefield\\.itp"/a\\ 400 | #include "ligand.itp"' topol.top""" 401 | update_result = self.run_shell_command(update_cmd) 402 | if not update_result["success"]: 403 | return { 404 | "success": False, 405 | "error": f"Failed to update topology file: {update_result['stderr']}" 406 | } 407 | 408 | # Add ligand to topology molecules 409 | add_cmd = """echo "ligand 1" >> topol.top""" 410 | add_result = self.run_shell_command(add_cmd) 411 | if not add_result["success"]: 412 | return { 413 | "success": False, 414 | "error": f"Failed to add ligand to topology molecules: {add_result['stderr']}" 415 | } 416 | 417 | self.complex_file = "complex.pdb" 418 | self.topology_file = "topol.top" 419 | self.box_file = self.complex_file 420 | 421 | return { 422 | "success": True, 423 | "complex_file": self.complex_file, 424 | "topology_file": self.topology_file 425 | } 426 | 427 | def create_index_groups(self) -> Dict[str, Any]: 428 | """ 429 | Create custom index groups for protein-ligand simulation 430 | 431 | Returns: 432 | Dictionary with result information 433 | """ 434 | if not self.has_ligand: 435 | return { 436 | "success": False, 437 | "error": "No ligand has been set" 438 | } 439 | 440 | if not self.solvated_file: 441 | return { 442 | "success": False, 443 | "error": "System must be solvated first" 444 | } 445 | 446 | # Create index groups 447 | ndx_cmd = f"""echo -e "1 | r LIG\\nr SOL | r CL | r NA\\nq" | {self.gmx_bin} make_ndx -f {self.solvated_file} -o index.ndx""" 448 | ndx_result = self.run_shell_command(ndx_cmd) 449 | if not ndx_result["success"]: 450 | return { 451 | "success": False, 452 | "error": f"Failed to create index groups: {ndx_result['stderr']}" 453 | } 454 | 455 | # Rename the groups using Python 456 | script_content = """ 457 | import re 458 | with open('index.ndx', 'r') as file: 459 | content = file.read() 460 | matches = re.findall(r'\\[ \\w+ \\]', content) 461 | if matches: 462 | content = content.replace(matches[-1], '[ Water_Ions ]') 463 | content = content.replace(matches[-2], '[ Protein_Ligand ]') 464 | with open('index.ndx', 'w') as file: 465 | file.write(content) 466 | """ 467 | with open("rename_groups.py", "w") as f: 468 | f.write(script_content) 469 | 470 | # Run the Python script 471 | rename_result = self.run_shell_command("python rename_groups.py") 472 | if not rename_result["success"]: 473 | return { 474 | "success": False, 475 | "error": f"Failed to rename index groups: {rename_result['stderr']}" 476 | } 477 | 478 | # Clean up the temporary script 479 | os.remove("rename_groups.py") 480 | 481 | # Update MDP files 482 | self.create_mdp_file("nvt") 483 | update_nvt_cmd = "sed -i 's/Protein Non-Protein/Protein_Ligand Water_Ions/g' nvt.mdp" 484 | nvt_result = self.run_shell_command(update_nvt_cmd) 485 | 486 | self.create_mdp_file("npt") 487 | update_npt_cmd = "sed -i 's/Protein Non-Protein/Protein_Ligand Water_Ions/g' npt.mdp" 488 | npt_result = self.run_shell_command(update_npt_cmd) 489 | 490 | self.create_mdp_file("md") 491 | update_md_cmd = "sed -i 's/Protein Non-Protein/Protein_Ligand Water_Ions/g' md.mdp" 492 | md_result = self.run_shell_command(update_md_cmd) 493 | 494 | if not (nvt_result["success"] and npt_result["success"] and md_result["success"]): 495 | return { 496 | "success": False, 497 | "error": "Failed to update MDP files with new index groups" 498 | } 499 | 500 | self.index_file = "index.ndx" 501 | 502 | return { 503 | "success": True, 504 | "index_file": self.index_file, 505 | "groups": ["Protein_Ligand", "Water_Ions"] 506 | } 507 | 508 | def generate_topology(self, force_field: str, water_model: str = "spc") -> Dict[str, Any]: 509 | """ 510 | Generate topology for the protein-ligand complex 511 | 512 | Args: 513 | force_field: Name of the force field to use 514 | water_model: Water model to use 515 | 516 | Returns: 517 | Dictionary with result information 518 | """ 519 | if not self.protein_file: 520 | return { 521 | "success": False, 522 | "error": "No protein file has been set" 523 | } 524 | 525 | # Handle protein-ligand complex 526 | if self.has_ligand: 527 | # Prepare receptor topology 528 | receptor_result = self.prepare_receptor_topology(force_field, water_model) 529 | if not receptor_result["success"]: 530 | return receptor_result 531 | 532 | # Prepare ligand topology 533 | ligand_result = self.prepare_ligand_topology() 534 | if not ligand_result["success"]: 535 | return ligand_result 536 | 537 | # Merge protein and ligand 538 | merge_result = self.merge_protein_ligand() 539 | if not merge_result["success"]: 540 | return merge_result 541 | 542 | return { 543 | "success": True, 544 | "topology_file": self.topology_file, 545 | "complex_file": self.complex_file, 546 | "force_field": force_field, 547 | "water_model": water_model, 548 | "has_ligand": self.has_ligand 549 | } 550 | else: 551 | # Standard protein-only topology generation 552 | return super().generate_topology(force_field, water_model) 553 | 554 | def solvate_system(self) -> Dict[str, Any]: 555 | """ 556 | Solvate the protein-ligand complex in water 557 | 558 | Returns: 559 | Dictionary with result information 560 | """ 561 | # Use the parent class solvate_system method 562 | result = super().solvate_system() 563 | 564 | if not result["success"]: 565 | return result 566 | 567 | # If this is a protein-ligand system, create index groups 568 | if self.has_ligand: 569 | index_result = self.create_index_groups() 570 | if not index_result["success"]: 571 | return { 572 | "success": False, 573 | "error": f"Failed to create index groups: {index_result['error']}" 574 | } 575 | 576 | return { 577 | "success": True, 578 | "solvated_file": self.solvated_file, 579 | "has_ligand": self.has_ligand, 580 | "index_file": self.index_file if self.has_ligand else None 581 | } 582 | 583 | def add_ions(self, concentration: float = .15, neutral: bool = True) -> Dict[str, Any]: 584 | """ 585 | Add ions to the solvated system 586 | 587 | Args: 588 | concentration: Salt concentration in M 589 | neutral: Whether to neutralize the system 590 | 591 | Returns: 592 | Dictionary with result information 593 | """ 594 | # Use the parent class add_ions method 595 | result = super().add_ions(concentration, neutral) 596 | 597 | if not result["success"]: 598 | return result 599 | 600 | # If this is a protein-ligand system, update index groups 601 | if self.has_ligand: 602 | index_result = self.create_index_groups() 603 | if not index_result["success"]: 604 | return { 605 | "success": False, 606 | "error": f"Failed to update index groups: {index_result['error']}" 607 | } 608 | 609 | return { 610 | "success": True, 611 | "solvated_file": self.solvated_file, 612 | "concentration": concentration, 613 | "neutral": neutral, 614 | "has_ligand": self.has_ligand, 615 | "index_file": self.index_file if self.has_ligand else None 616 | } 617 | 618 | def run_energy_minimization(self) -> Dict[str, Any]: 619 | """ 620 | Run energy minimization 621 | 622 | Returns: 623 | Dictionary with result information 624 | """ 625 | if not self.solvated_file or not self.topology_file: 626 | return { 627 | "success": False, 628 | "error": "Solvated file or topology file not defined" 629 | } 630 | 631 | # Create em.mdp file 632 | em_mdp = self.create_mdp_file("em") 633 | if not em_mdp["success"]: 634 | return em_mdp 635 | 636 | # Generate tpr file for minimization, using index file if available 637 | index_option = f"-n {self.index_file}" if self.has_ligand and self.index_file else "" 638 | cmd = f"{self.gmx_bin} grompp -f em.mdp -c {self.solvated_file} -p {self.topology_file} -o em.tpr {index_option}" 639 | result = self.run_shell_command(cmd) 640 | 641 | if not result["success"]: 642 | return { 643 | "success": False, 644 | "error": f"Failed to prepare energy minimization: {result['stderr']}" 645 | } 646 | 647 | # Run energy minimization 648 | cmd = f"{self.gmx_bin} mdrun -v -deffnm em" 649 | result = self.run_shell_command(cmd) 650 | 651 | if not result["success"]: 652 | return { 653 | "success": False, 654 | "error": f"Energy minimization failed: {result['stderr']}" 655 | } 656 | 657 | self.minimized_file = "em.gro" 658 | 659 | return { 660 | "success": True, 661 | "minimized_file": self.minimized_file, 662 | "log_file": "em.log", 663 | "energy_file": "em.edr" 664 | } 665 | 666 | # Override run_nvt_equilibration to use index file if available 667 | def run_nvt_equilibration(self) -> Dict[str, Any]: 668 | """ 669 | Run NVT equilibration 670 | 671 | Returns: 672 | Dictionary with result information 673 | """ 674 | if not self.minimized_file or not self.topology_file: 675 | return { 676 | "success": False, 677 | "error": "Minimized file or topology file not defined" 678 | } 679 | 680 | # Create nvt.mdp file 681 | nvt_mdp = self.create_mdp_file("nvt") 682 | if not nvt_mdp["success"]: 683 | return nvt_mdp 684 | 685 | # Generate tpr file for NVT equilibration, using index file if available 686 | index_option = f"-n {self.index_file}" if self.has_ligand and self.index_file else "" 687 | cmd = f"{self.gmx_bin} grompp -f nvt.mdp -c {self.minimized_file} -r {self.minimized_file} -p {self.topology_file} -o nvt.tpr -maxwarn 2 {index_option}" 688 | result = self.run_shell_command(cmd) 689 | 690 | if not result["success"]: 691 | return { 692 | "success": False, 693 | "error": f"Failed to prepare NVT equilibration: {result['stderr']}" 694 | } 695 | 696 | # Run NVT equilibration 697 | cmd = f"{self.gmx_bin} mdrun -v -deffnm nvt" 698 | result = self.run_shell_command(cmd) 699 | 700 | if not result["success"]: 701 | return { 702 | "success": False, 703 | "error": f"NVT equilibration failed: {result['stderr']}" 704 | } 705 | 706 | return { 707 | "success": True, 708 | "nvt_file": "nvt.gro", 709 | "nvt_checkpoint": "nvt.cpt", 710 | "log_file": "nvt.log", 711 | "energy_file": "nvt.edr" 712 | } 713 | 714 | # Override run_npt_equilibration to use index file if available 715 | def run_npt_equilibration(self) -> Dict[str, Any]: 716 | """ 717 | Run NPT equilibration 718 | 719 | Returns: 720 | Dictionary with result information 721 | """ 722 | # Create npt.mdp file 723 | npt_mdp = self.create_mdp_file("npt") 724 | if not npt_mdp["success"]: 725 | return npt_mdp 726 | 727 | # Generate tpr file for NPT equilibration, using index file if available 728 | index_option = f"-n {self.index_file}" if self.has_ligand and self.index_file else "" 729 | cmd = f"{self.gmx_bin} grompp -f npt.mdp -c nvt.gro -r nvt.gro -t nvt.cpt -p {self.topology_file} -o npt.tpr -maxwarn 2 {index_option}" 730 | result = self.run_shell_command(cmd) 731 | 732 | if not result["success"]: 733 | return { 734 | "success": False, 735 | "error": f"Failed to prepare NPT equilibration: {result['stderr']}" 736 | } 737 | 738 | # Run NPT equilibration 739 | cmd = f"{self.gmx_bin} mdrun -v -deffnm npt" 740 | result = self.run_shell_command(cmd) 741 | 742 | if not result["success"]: 743 | return { 744 | "success": False, 745 | "error": f"NPT equilibration failed: {result['stderr']}" 746 | } 747 | 748 | self.equilibrated_file = "npt.gro" 749 | 750 | return { 751 | "success": True, 752 | "equilibrated_file": self.equilibrated_file, 753 | "npt_checkpoint": "npt.cpt", 754 | "log_file": "npt.log", 755 | "energy_file": "npt.edr" 756 | } 757 | 758 | # Override run_production_md to use index file if available 759 | def run_production_md(self, length_ns: float = 10.0) -> Dict[str, Any]: 760 | """ 761 | Run production MD 762 | 763 | Args: 764 | length_ns: Length of the simulation in nanoseconds 765 | 766 | Returns: 767 | Dictionary with result information 768 | """ 769 | if not self.equilibrated_file or not self.topology_file: 770 | return { 771 | "success": False, 772 | "error": "Equilibrated file or topology file not defined" 773 | } 774 | 775 | # Calculate number of steps (2 fs timestep) 776 | nsteps = int(length_ns * 1000000 / 2) 777 | 778 | # Create md.mdp file with custom steps 779 | md_mdp = self.create_mdp_file("md", {"nsteps": nsteps}) 780 | if not md_mdp["success"]: 781 | return md_mdp 782 | 783 | # Generate tpr file for production MD, using index file if available 784 | index_option = f"-n {self.index_file}" if self.has_ligand and self.index_file else "" 785 | cmd = f"{self.gmx_bin} grompp -f md.mdp -c {self.equilibrated_file} -t npt.cpt -p {self.topology_file} -o md.tpr -maxwarn 2 {index_option}" 786 | result = self.run_shell_command(cmd) 787 | 788 | if not result["success"]: 789 | return { 790 | "success": False, 791 | "error": f"Failed to prepare production MD: {result['stderr']}" 792 | } 793 | 794 | # Run production MD 795 | cmd = f"{self.gmx_bin} mdrun -v -deffnm md" 796 | result = self.run_shell_command(cmd) 797 | 798 | if not result["success"]: 799 | return { 800 | "success": False, 801 | "error": f"Production MD failed: {result['stderr']}" 802 | } 803 | 804 | self.production_file = "md.gro" 805 | 806 | return { 807 | "success": True, 808 | "production_file": self.production_file, 809 | "trajectory_file": "md.xtc", 810 | "log_file": "md.log", 811 | "energy_file": "md.edr", 812 | "length_ns": length_ns 813 | } 814 | 815 | # Add protein-ligand specific analysis methods 816 | def analyze_ligand_rmsd(self) -> Dict[str, Any]: 817 | """ 818 | Perform RMSD analysis focused on the ligand 819 | 820 | Returns: 821 | Dictionary with result information 822 | """ 823 | if not self.has_ligand: 824 | return { 825 | "success": False, 826 | "error": "No ligand has been set" 827 | } 828 | 829 | # Create analysis directory if it doesn't exist 830 | mkdir_result = self.run_shell_command("mkdir -p analysis") 831 | 832 | cmd = f"echo 'LIG LIG' | {self.gmx_bin} rms -s md.tpr -f md.xtc -o analysis/ligand_rmsd.xvg -tu ns" 833 | result = self.run_shell_command(cmd) 834 | 835 | if not result["success"]: 836 | return { 837 | "success": False, 838 | "error": f"Ligand RMSD analysis failed: {result['stderr']}" 839 | } 840 | 841 | return { 842 | "success": True, 843 | "output_file": "analysis/ligand_rmsd.xvg", 844 | "analysis_type": "Ligand RMSD" 845 | } 846 | 847 | def analyze_protein_ligand_contacts(self) -> Dict[str, Any]: 848 | """ 849 | Analyze contacts between protein and ligand 850 | 851 | Returns: 852 | Dictionary with result information 853 | """ 854 | if not self.has_ligand: 855 | return { 856 | "success": False, 857 | "error": "No ligand has been set" 858 | } 859 | 860 | # Create analysis directory if it doesn't exist 861 | mkdir_result = self.run_shell_command("mkdir -p analysis") 862 | 863 | cmd = f"echo -e 'Protein\\nLIG' | {self.gmx_bin} mindist -s md.tpr -f md.xtc -od analysis/protein_ligand_mindist.xvg -tu ns" 864 | result = self.run_shell_command(cmd) 865 | 866 | if not result["success"]: 867 | return { 868 | "success": False, 869 | "error": f"Protein-ligand contacts analysis failed: {result['stderr']}" 870 | } 871 | 872 | return { 873 | "success": True, 874 | "output_file": "analysis/protein_ligand_mindist.xvg", 875 | "analysis_type": "Protein-Ligand Minimum Distance" 876 | } -------------------------------------------------------------------------------- /gromacs_copilot/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility modules for GROMACS Copilot 3 | """ 4 | 5 | from gromacs_copilot.utils.terminal import Colors, print_message, prompt_user 6 | from gromacs_copilot.utils.shell import run_shell_command, check_command_exists, find_executable 7 | from gromacs_copilot.utils.logging_utils import setup_logging, TerminalLogHandler 8 | 9 | __all__ = [ 10 | 'Colors', 11 | 'print_message', 12 | 'prompt_user', 13 | 'run_shell_command', 14 | 'check_command_exists', 15 | 'find_executable', 16 | 'setup_logging', 17 | 'TerminalLogHandler' 18 | ] -------------------------------------------------------------------------------- /gromacs_copilot/utils/logging_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Logging utilities for GROMACS Copilot 3 | """ 4 | 5 | import logging 6 | import sys 7 | from typing import Optional 8 | 9 | from gromacs_copilot.utils.terminal import print_message 10 | from gromacs_copilot.core.enums import MessageType 11 | 12 | class TerminalLogHandler(logging.Handler): 13 | """Custom logging handler that formats log messages for terminal output""" 14 | 15 | def emit(self, record): 16 | msg = self.format(record) 17 | if record.levelno >= logging.ERROR: 18 | print_message(msg, MessageType.ERROR) 19 | elif record.levelno >= logging.WARNING: 20 | print_message(msg, MessageType.WARNING) 21 | else: 22 | print_message(msg, MessageType.INFO) 23 | 24 | 25 | def setup_logging(log_file: Optional[str] = "md_agent.log", level: int = logging.INFO): 26 | """ 27 | Set up logging for GROMACS Copilot 28 | 29 | Args: 30 | log_file: Path to log file 31 | level: Logging level 32 | """ 33 | # Configure root logger 34 | root_logger = logging.getLogger() 35 | root_logger.setLevel(level) 36 | 37 | # Clear any existing handlers 38 | for handler in root_logger.handlers[:]: 39 | root_logger.removeHandler(handler) 40 | 41 | # Create formatters 42 | file_formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s") 43 | terminal_formatter = logging.Formatter("%(message)s") 44 | 45 | # Set up file handler 46 | if log_file: 47 | file_handler = logging.FileHandler(log_file) 48 | file_handler.setLevel(level) 49 | file_handler.setFormatter(file_formatter) 50 | root_logger.addHandler(file_handler) 51 | 52 | # Set up custom terminal handler 53 | terminal_handler = TerminalLogHandler() 54 | terminal_handler.setLevel(level) 55 | terminal_handler.setFormatter(terminal_formatter) 56 | root_logger.addHandler(terminal_handler) 57 | 58 | # Log setup completion 59 | logging.info(f"Logging initialized with level {logging.getLevelName(level)}") -------------------------------------------------------------------------------- /gromacs_copilot/utils/shell.py: -------------------------------------------------------------------------------- 1 | """ 2 | Shell command execution utilities for GROMACS Copilot 3 | """ 4 | 5 | import subprocess 6 | import logging 7 | import shutil 8 | from typing import Dict, Any, Optional 9 | 10 | from gromacs_copilot.utils.terminal import print_message 11 | from gromacs_copilot.core.enums import MessageType 12 | 13 | def run_shell_command(command: str, capture_output: bool = True, 14 | suppress_output: bool = False) -> Dict[str, Any]: 15 | """ 16 | Run a shell command with proper error handling 17 | 18 | Args: 19 | command: Shell command to run 20 | capture_output: Whether to capture stdout/stderr 21 | suppress_output: Whether to suppress terminal output 22 | 23 | Returns: 24 | Dictionary with command result information 25 | """ 26 | logging.info(f"Running command: {command}") 27 | 28 | if not suppress_output: 29 | print_message(command, MessageType.COMMAND) 30 | 31 | try: 32 | if capture_output: 33 | result = subprocess.run( 34 | command, 35 | shell=True, 36 | check=False, 37 | text=True, 38 | capture_output=True 39 | ) 40 | 41 | if result.returncode == 0: 42 | # Only show partial output if it's too long 43 | if not suppress_output: 44 | if len(result.stdout) > 500: 45 | trimmed_output = result.stdout[:500] + "...\n[Output trimmed for brevity]" 46 | print_message(f"Command succeeded with output:\n{trimmed_output}", MessageType.SUCCESS) 47 | elif result.stdout.strip(): 48 | print_message(f"Command succeeded with output:\n{result.stdout}", MessageType.SUCCESS) 49 | else: 50 | print_message("Command succeeded with no output", MessageType.SUCCESS) 51 | else: 52 | if not suppress_output: 53 | print_message(f"Command failed with error:\n{result.stderr}", MessageType.ERROR) 54 | 55 | return { 56 | "success": result.returncode == 0, 57 | "return_code": result.returncode, 58 | "stdout": result.stdout, 59 | "stderr": result.stderr, 60 | "command": command 61 | } 62 | else: 63 | result = subprocess.run( 64 | command, 65 | shell=True, 66 | check=False 67 | ) 68 | 69 | if not suppress_output: 70 | if result.returncode == 0: 71 | print_message("Command succeeded", MessageType.SUCCESS) 72 | else: 73 | print_message("Command failed", MessageType.ERROR) 74 | 75 | return { 76 | "success": result.returncode == 0, 77 | "return_code": result.returncode, 78 | "stdout": "Output not captured", 79 | "stderr": "Error output not captured", 80 | "command": command 81 | } 82 | except Exception as e: 83 | error_msg = str(e) 84 | logging.error(f"Command execution failed: {error_msg}") 85 | 86 | if not suppress_output: 87 | print_message(f"Command execution failed: {error_msg}", MessageType.ERROR) 88 | 89 | return { 90 | "success": False, 91 | "return_code": 1, 92 | "stdout": "", 93 | "stderr": error_msg, 94 | "command": command, 95 | "error": error_msg 96 | } 97 | 98 | 99 | def check_command_exists(command: str) -> bool: 100 | """ 101 | Check if a command exists in the system PATH 102 | 103 | Args: 104 | command: Command to check 105 | 106 | Returns: 107 | bool: True if the command exists, False otherwise 108 | """ 109 | return shutil.which(command) is not None 110 | 111 | 112 | def find_executable(executable_names: list) -> Optional[str]: 113 | """ 114 | Find an executable from a list of possible names 115 | 116 | Args: 117 | executable_names: List of possible executable names 118 | 119 | Returns: 120 | str: Path to the executable if found, None otherwise 121 | """ 122 | for name in executable_names: 123 | path = shutil.which(name) 124 | if path: 125 | return path 126 | return None -------------------------------------------------------------------------------- /gromacs_copilot/utils/terminal.py: -------------------------------------------------------------------------------- 1 | """ 2 | Terminal output formatting utilities for GROMACS Copilot 3 | """ 4 | 5 | import sys 6 | import shutil 7 | from typing import Optional 8 | 9 | from gromacs_copilot.core.enums import MessageType 10 | 11 | class Colors: 12 | """ANSI color codes for terminal output""" 13 | RESET = "\033[0m" 14 | BOLD = "\033[1m" 15 | UNDERLINE = "\033[4m" 16 | 17 | # Foreground colors 18 | BLACK = "\033[30m" 19 | RED = "\033[31m" 20 | GREEN = "\033[32m" 21 | YELLOW = "\033[33m" 22 | BLUE = "\033[34m" 23 | MAGENTA = "\033[35m" 24 | CYAN = "\033[36m" 25 | WHITE = "\033[37m" 26 | 27 | # Background colors 28 | BG_BLACK = "\033[40m" 29 | BG_RED = "\033[41m" 30 | BG_GREEN = "\033[42m" 31 | BG_YELLOW = "\033[43m" 32 | BG_BLUE = "\033[44m" 33 | BG_MAGENTA = "\033[45m" 34 | BG_CYAN = "\033[46m" 35 | BG_WHITE = "\033[47m" 36 | 37 | # Bright variants 38 | BRIGHT_BLACK = "\033[90m" 39 | BRIGHT_RED = "\033[91m" 40 | BRIGHT_GREEN = "\033[92m" 41 | BRIGHT_YELLOW = "\033[93m" 42 | BRIGHT_BLUE = "\033[94m" 43 | BRIGHT_MAGENTA = "\033[95m" 44 | BRIGHT_CYAN = "\033[96m" 45 | BRIGHT_WHITE = "\033[97m" 46 | 47 | @classmethod 48 | def disable_colors(cls): 49 | """Disable all colors by setting them to empty strings""" 50 | for attr in dir(cls): 51 | if not attr.startswith('__') and not callable(getattr(cls, attr)): 52 | setattr(cls, attr, '') 53 | 54 | 55 | def should_use_colors() -> bool: 56 | """ 57 | Determine if colors should be used in terminal output 58 | 59 | Returns: 60 | bool: True if colors should be used, False otherwise 61 | """ 62 | return sys.stdout.isatty() 63 | 64 | 65 | def print_message(message: str, msg_type: MessageType = MessageType.INFO, 66 | style: Optional[str] = None, width: Optional[int] = None): 67 | """ 68 | Print a formatted message to the console 69 | 70 | Args: 71 | message: The message to print 72 | msg_type: Type of message (info, success, warning, error, etc.) 73 | style: Optional additional styling (box, divider) 74 | width: Width of the message box (defaults to terminal width) 75 | """ 76 | # Get terminal width if not specified 77 | if not width: 78 | try: 79 | width = shutil.get_terminal_size().columns 80 | except: 81 | width = 80 82 | 83 | # Configure colors and prefixes based on message type 84 | if msg_type == MessageType.INFO: 85 | color = Colors.CYAN 86 | prefix = "ℹ️ INFO │ " 87 | elif msg_type == MessageType.SUCCESS: 88 | color = Colors.GREEN 89 | prefix = "✓ SUCCESS │ " 90 | elif msg_type == MessageType.WARNING: 91 | color = Colors.YELLOW 92 | prefix = "⚠️ WARNING │ " 93 | elif msg_type == MessageType.ERROR: 94 | color = Colors.RED 95 | prefix = "✗ ERROR │ " 96 | elif msg_type == MessageType.TITLE: 97 | color = Colors.BRIGHT_BLUE + Colors.BOLD 98 | prefix = "🧪 " 99 | elif msg_type == MessageType.SYSTEM: 100 | color = Colors.BRIGHT_MAGENTA 101 | prefix = "🤖 SYSTEM │ " 102 | elif msg_type == MessageType.USER: 103 | color = Colors.BRIGHT_CYAN 104 | prefix = "👤 USER │ " 105 | elif msg_type == MessageType.COMMAND: 106 | color = Colors.BRIGHT_BLACK 107 | prefix = "$ " 108 | elif msg_type == MessageType.TOOL: 109 | color = Colors.BRIGHT_GREEN 110 | prefix = "🔧 TOOL │ " 111 | elif msg_type == MessageType.FINAL: 112 | color = Colors.BRIGHT_GREEN + Colors.BOLD 113 | prefix = "🏁 FINAL │ " 114 | else: 115 | color = "" 116 | prefix = "" 117 | 118 | # Apply styling 119 | if style == "box": 120 | box_width = width - 4 # Account for side margins 121 | print(f"{color}┌{'─' * box_width}┐{Colors.RESET}") 122 | 123 | # Split message into lines that fit within the box 124 | lines = [] 125 | curr_line = "" 126 | 127 | for word in message.split(): 128 | if len(curr_line) + len(word) + 1 <= box_width - 4: # -4 for margins 129 | curr_line += word + " " 130 | else: 131 | lines.append(curr_line) 132 | curr_line = word + " " 133 | if curr_line: 134 | lines.append(curr_line) 135 | 136 | # Print each line within the box 137 | for line in lines: 138 | padding = box_width - len(line) - 2 139 | print(f"{color}│ {line}{' ' * padding} │{Colors.RESET}") 140 | 141 | print(f"{color}└{'─' * box_width}┘{Colors.RESET}") 142 | 143 | elif style == "divider": 144 | print(f"{color}{'═' * width}{Colors.RESET}") 145 | print(f"{color}{prefix}{message}{Colors.RESET}") 146 | print(f"{color}{'═' * width}{Colors.RESET}") 147 | 148 | else: 149 | # Basic formatting with prefix 150 | print(f"{color}{prefix}{message}{Colors.RESET}") 151 | 152 | 153 | def prompt_user(message: str, default: Optional[str] = None, 154 | choices: Optional[list] = None) -> str: 155 | """ 156 | Prompt the user for input with optional default value and choices 157 | 158 | Args: 159 | message: The message to display to the user 160 | default: Optional default value if user hits enter 161 | choices: Optional list of valid choices 162 | 163 | Returns: 164 | str: The user's response 165 | """ 166 | # Format message with default value if provided 167 | if default is not None: 168 | prompt = f"{Colors.BRIGHT_CYAN}{message} [{default}]: {Colors.RESET}" 169 | else: 170 | prompt = f"{Colors.BRIGHT_CYAN}{message}: {Colors.RESET}" 171 | 172 | # Print choices if provided 173 | if choices: 174 | for i, choice in enumerate(choices, 1): 175 | print(f"{Colors.BRIGHT_CYAN} {i}. {choice}{Colors.RESET}") 176 | 177 | while True: 178 | response = input(prompt) 179 | 180 | # Use default if empty response and default provided 181 | if not response and default is not None: 182 | return default 183 | 184 | # Try to interpret as a choice number 185 | try: 186 | choice_idx = int(response) - 1 187 | if 0 <= choice_idx < len(choices): 188 | return choices[choice_idx] 189 | else: 190 | print(f"{Colors.YELLOW}Please enter a number between 1 and {len(choices)}{Colors.RESET}") 191 | except ValueError: 192 | # If response matches a choice directly, return it 193 | if response in choices: 194 | return response 195 | print(f"{Colors.YELLOW}Please enter a valid choice{Colors.RESET}") 196 | else: 197 | # Simple prompt without choices 198 | response = input(prompt) 199 | 200 | # Use default if empty response and default provided 201 | if not response and default is not None: 202 | return default 203 | 204 | return response -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | with open("README.md", "r", encoding="utf-8") as fh: 4 | long_description = fh.read() 5 | 6 | setup( 7 | name="gromacs_copilot", 8 | version="0.2.3", 9 | packages=find_packages(), 10 | install_requires=[ 11 | "requests>=2.25.0", 12 | "mcp>=1.4.1", 13 | ], 14 | entry_points={ 15 | "console_scripts": [ 16 | "gmx_copilot=gromacs_copilot.cli:main", 17 | ], 18 | }, 19 | author="ChatMol Team", 20 | author_email="jinyuansun@chatmol.org", 21 | description="A molecular dynamics simulation assistant powered by AI using GROMACS.", 22 | long_description=long_description, 23 | long_description_content_type="text/markdown", 24 | url="https://github.com/ChatMol/gromacs_copilot", 25 | classifiers=[ 26 | "Programming Language :: Python :: 3", 27 | "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", 28 | "Operating System :: OS Independent", 29 | ], 30 | python_requires='>=3.7', 31 | ) --------------------------------------------------------------------------------