├── .gitignore
├── COMMERCIAL_LICENSE.md
├── LICENSE
├── README.md
├── assets
├── 1pga_gmx_copilot_animation.gif
├── prot_lig.gif
├── prot_lig_rmsf.pdf
└── report.pdf
├── gromacs_copilot
├── __init__.py
├── __main__.py
├── cli.py
├── config.py
├── core
│ ├── __init__.py
│ ├── enums.py
│ └── md_agent.py
├── mcp_server.py
├── protocols
│ ├── __init__.py
│ ├── analysis.py
│ ├── base.py
│ ├── mmpbsa.py
│ ├── protein.py
│ └── protein_ligand.py
└── utils
│ ├── __init__.py
│ ├── logging_utils.py
│ ├── shell.py
│ └── terminal.py
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # UV
98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | #uv.lock
102 |
103 | # poetry
104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | # This is especially recommended for binary packages to ensure reproducibility, and is more
106 | # commonly ignored for libraries.
107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 |
110 | # pdm
111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | # in version control.
115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 |
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 |
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 |
127 | # SageMath parsed files
128 | *.sage.py
129 |
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 |
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 |
143 | # Rope project settings
144 | .ropeproject
145 |
146 | # mkdocs documentation
147 | /site
148 |
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 |
154 | # Pyre type checker
155 | .pyre/
156 |
157 | # pytype static type analyzer
158 | .pytype/
159 |
160 | # Cython debug symbols
161 | cython_debug/
162 |
163 | # PyCharm
164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | # and can be added to the global gitignore or merged into this file. For a more nuclear
167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 |
170 | # PyPI configuration file
171 | .pypirc
172 |
173 | experimental/
174 | examples/
--------------------------------------------------------------------------------
/COMMERCIAL_LICENSE.md:
--------------------------------------------------------------------------------
1 | # Commercial License for gromacs_copilot
2 |
3 | This software is dual-licensed under:
4 |
5 | 1. **GNU General Public License v3.0 (GPLv3)**
6 | - Free to use, modify, and distribute under **GPL terms**.
7 | - Any derivative work **must also be open-sourced** under the same GPL license.
8 |
9 | 2. **Commercial License**
10 | - If you wish to use this software **without GPL restrictions** (e.g., for proprietary software, SaaS products, or internal business applications), a commercial license is available.
11 | - Contact us at jinyuansun_at_chatmol.org to discuss licensing options.
12 |
13 | ## Benefits of the Commercial License:
14 | ✅ Use in closed-source or proprietary projects.
15 | ✅ No obligation to disclose your modifications.
16 | ✅ Official support and priority updates.
17 |
18 | For inquiries, please email **jinyuansun_at_chatmol.org** or visit **[our website](https://chatmol.org/)**.
19 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | This project is licensed under the terms of the GNU General Public License v3.0.
2 |
3 | You can redistribute and modify this project under the terms of the GNU General Public License as published by the Free Software Foundation.
4 |
5 | However, if you wish to use this software without the restrictions of the GPL (e.g., for proprietary or commercial use), please contact us for a commercial license.
6 |
7 | See the full GPL-3.0 license at: https://www.gnu.org/licenses/gpl-3.0.html
8 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # GROMACS Copilot
2 | **Let LLM run your MDs.**
3 |
4 | The good news: 🎉 You now have more time to hang out with your cat! 🐱💖
5 | The bad news: 😢 You'll miss out on GROMACS' legendary wisdom... 🧙♂️💬
6 |
7 | ## Introduction
8 | This agent automates **MD simulations** for proteins in water using **GROMACS**. It sets up the system, runs simulations, and analyzes **RMSD, RMSF, Rg, H-bonds**, etc.
9 |
10 |
32 |
33 | ## How to Run
34 |
35 | ### Before using a LLM
36 | 1. Install the package
37 | ```bash
38 | pip install git+https://github.com/ChatMol/gromacs_copilot.git
39 | conda install -c conda-forge acpype # for protein-ligand complex
40 | conda install -c conda-forge gmx_mmpbsa # for MM-PBSA/GBSA analysis
41 | ```
42 | 2. Prepare a working dir and a input pdb
43 | ```bash
44 | mkdir md_workspace && cd md_workspace
45 | wget https://files.rcsb.org/download/1PGA.pdb
46 | grep -v HOH 1PGA.pdb > 1pga_protein.pdb
47 | cd ..
48 | ```
49 |
50 | ### Using DeepSeek
51 | ```bash
52 | gmx_copilot --workspace md_workspace/ \
53 | --prompt "setup simulation system for 1pga_protein.pdb in the workspace" \
54 | --api-key $DEEPSEEK_API_KEY \
55 | --model deepseek-chat \
56 | --url https://api.deepseek.com/chat/completions
57 | ```
58 |
59 | ### Using OpenAI
60 | ```bash
61 | gmx_copilot --workspace md_workspace/ \
62 | --prompt "setup simulation system for 1pga_protein.pdb in the workspace" \
63 | --api-key $OPENAI_API_KEY \
64 | --model gpt-4o \
65 | --url https://api.openai.com/v1/chat/completions
66 | ```
67 |
68 | ### Using Gemini
69 | ```bash
70 | gmx_copilot --workspace md_workspace/ \
71 | --prompt "setup simulation system for 1pga_protein.pdb in the workspace" \
72 | --api-key $GEMINI_API_KEY \
73 | --model gemini-2.0-flash \
74 | --url https://generativelanguage.googleapis.com/v1beta/chat/completions
75 | ```
76 |
77 | 3. Agent mode
78 | The agent mode is good automation of a long acting trajectory of using tools.
79 | ```bash
80 | gmx_copilot --workspace md_workspace/ \
81 | --prompt "run 1 ns production md for 1pga_protein.pdb in the workspace, and analyze rmsd" \
82 | --mode agent
83 | ```
84 |
85 | The agent handles **system setup, simulation execution, and result analysis** automatically. 🚀
86 |
87 |
88 | ## License
89 | This project is dual-licensed under:
90 | - **GPLv3** (Open Source License)
91 | - **Commercial License** (For proprietary use)
92 |
93 | For commercial licensing, [read this](COMMERCIAL_LICENSE.md).
94 |
95 | ## Known issues
96 | 1. 🤖 LLM sometimes struggles with selecting the correct group index. Double-checking the selection is recommended.
97 | 2. ⚡ The interaction between LLM and `gmx` prompt input isn't always seamless. Running commands based on suggestions can help you get the correct results more easily.
98 |
99 | ## Disclaimer
100 |
101 | GROMACS Copilot is provided "as is" without warranty of any kind, express or implied. The authors and contributors disclaim all warranties including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose. Users employ this software at their own risk.
102 |
103 | The authors bear no responsibility for any consequences arising from the use, misuse, or misinterpretation of this software or its outputs. Results obtained through GROMACS Copilot should be independently validated prior to use in research, publications, or decision-making processes.
104 |
105 | This software is intended for research and educational purposes only. Users are solely responsible for ensuring compliance with applicable laws, regulations, and ethical standards in their jurisdiction.
--------------------------------------------------------------------------------
/assets/1pga_gmx_copilot_animation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChatMol/gromacs_copilot/c0d75a5eea8d6cc0a00b523ef8455380ade237a7/assets/1pga_gmx_copilot_animation.gif
--------------------------------------------------------------------------------
/assets/prot_lig.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChatMol/gromacs_copilot/c0d75a5eea8d6cc0a00b523ef8455380ade237a7/assets/prot_lig.gif
--------------------------------------------------------------------------------
/assets/prot_lig_rmsf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChatMol/gromacs_copilot/c0d75a5eea8d6cc0a00b523ef8455380ade237a7/assets/prot_lig_rmsf.pdf
--------------------------------------------------------------------------------
/assets/report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChatMol/gromacs_copilot/c0d75a5eea8d6cc0a00b523ef8455380ade237a7/assets/report.pdf
--------------------------------------------------------------------------------
/gromacs_copilot/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | GROMACS Copilot - AI-assisted molecular dynamics simulations
3 | ============================================================
4 |
5 | A tool for automating GROMACS molecular dynamics simulations
6 | with AI assistance to guide setup, execution, and analysis.
7 |
8 | Created by the ChatMol Team
9 | """
10 |
11 | __version__ = "0.2.0"
12 | __author__ = "ChatMol Team"
13 | __email__ = "jinyuansun@chatmol.org"
--------------------------------------------------------------------------------
/gromacs_copilot/__main__.py:
--------------------------------------------------------------------------------
1 | """
2 | Entry point for running GROMACS Copilot as a module
3 | """
4 |
5 | from gromacs_copilot.cli import main
6 |
7 | if __name__ == "__main__":
8 | main()
--------------------------------------------------------------------------------
/gromacs_copilot/cli.py:
--------------------------------------------------------------------------------
1 | """
2 | Command-line interface for GROMACS Copilot
3 | """
4 |
5 | import os
6 | import sys
7 | import argparse
8 | import logging
9 |
10 | from gromacs_copilot.core.md_agent import MDLLMAgent
11 | from gromacs_copilot.utils.terminal import Colors, print_message
12 | from gromacs_copilot.utils.logging_utils import setup_logging
13 | from gromacs_copilot.core.enums import MessageType
14 | from gromacs_copilot.config import DEFAULT_WORKSPACE, DEFAULT_MODEL, DEFAULT_OPENAI_URL
15 |
16 |
17 | def parse_arguments():
18 | """
19 | Parse command-line arguments
20 |
21 | Returns:
22 | argparse.Namespace: Parsed arguments
23 | """
24 | parser = argparse.ArgumentParser(description="GROMACS Copilot")
25 | parser.add_argument("--api-key", help="API key for LLM service")
26 | parser.add_argument("--url",
27 | help=(
28 | "The url of the LLM service, "
29 | "\ndeepseek: https://api.deepseek.com/chat/completions"
30 | "\nopenai: https://api.openai.com/v1/chat/completions"
31 | ),
32 | default=DEFAULT_OPENAI_URL)
33 | parser.add_argument("--model", default=DEFAULT_MODEL, help="Model to use for LLM")
34 | parser.add_argument("--workspace", default=DEFAULT_WORKSPACE, help="Workspace directory")
35 | parser.add_argument("--prompt", help="Starting prompt for the LLM")
36 | parser.add_argument("--no-color", action="store_true", help="Disable colored output")
37 | parser.add_argument("--log-file", default="md_agent.log", help="Log file path")
38 | parser.add_argument("--log-level", default="INFO",
39 | choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
40 | help="Logging level")
41 | parser.add_argument("--mode", default="copilot", choices=['copilot', 'agent'],
42 | help="The copilot mode or agent mode, copilot will be more like a advisor."
43 | )
44 |
45 | return parser.parse_args()
46 |
47 |
48 | def main():
49 | """
50 | Main entry point for the CLI
51 | """
52 | # Parse command line arguments
53 | args = parse_arguments()
54 |
55 | # Setup logging
56 | log_level = getattr(logging, args.log_level)
57 | setup_logging(args.log_file, level=log_level)
58 |
59 | # Disable colors if requested or if not in a terminal
60 | if args.no_color or not sys.stdout.isatty():
61 | Colors.disable_colors()
62 |
63 | # Display splash screen
64 | print_message("", style="divider")
65 | print_message("GROMACS Copilot", MessageType.TITLE, style="box")
66 | print_message("A molecular dynamics simulation assistant powered by AI, created by the ChatMol Team.", MessageType.INFO)
67 | print_message("", style="divider")
68 |
69 | try:
70 | # Check for API key
71 | if args.url == "https://api.openai.com/v1/chat/completions":
72 | api_key = args.api_key or os.environ.get("OPENAI_API_KEY")
73 | elif args.url == "https://api.deepseek.com/chat/completions":
74 | api_key = args.api_key or os.environ.get("DEEPSEEK_API_KEY")
75 | else:
76 | api_key = args.api_key
77 |
78 | if not api_key:
79 | print_message(
80 | "API key not found. Please provide an API key using --api-key or set the "
81 | "OPENAI_API_KEY or DEEPSEEK_API_KEY environment variable.",
82 | MessageType.ERROR
83 | )
84 | sys.exit(1)
85 |
86 | # Create and run MD LLM agent
87 | print_message(f"Initializing with model: {args.model}", MessageType.INFO)
88 | print_message(f"Using workspace: {args.workspace}", MessageType.INFO)
89 |
90 | agent = MDLLMAgent(
91 | api_key=api_key,
92 | model=args.model,
93 | workspace=args.workspace,
94 | url=args.url,
95 | mode=args.mode
96 | )
97 | agent.run(starting_prompt=args.prompt)
98 |
99 | except KeyboardInterrupt:
100 | print_message("\nExiting the MD agent. Thank you for using GROMACS Copilot!",
101 | MessageType.SUCCESS, style="box")
102 | except Exception as e:
103 | error_msg = str(e)
104 | logging.error(f"Error running MD LLM agent: {error_msg}")
105 | print_message(f"Error running MD LLM agent: {error_msg}",
106 | MessageType.ERROR, style="box")
107 |
108 |
109 | if __name__ == "__main__":
110 | main()
--------------------------------------------------------------------------------
/gromacs_copilot/config.py:
--------------------------------------------------------------------------------
1 | """
2 | Configuration constants and settings for GROMACS Copilot
3 | """
4 |
5 | # Default settings
6 | DEFAULT_WORKSPACE = "./md_workspace"
7 | DEFAULT_MODEL = "gpt-4o"
8 | DEFAULT_OPENAI_URL = "https://api.openai.com/v1/chat/completions"
9 | DEFAULT_DEEPSEEK_URL = "https://api.deepseek.com/chat/completions"
10 |
11 | # Force fields
12 | FORCE_FIELDS = {
13 | "AMBER99SB-ILDN": "amber99sb-ildn",
14 | "CHARMM36": "charmm36-feb2021",
15 | "GROMOS96 53a6": "gromos53a6",
16 | "OPLS-AA/L": "oplsaa"
17 | }
18 |
19 | # Water models
20 | WATER_MODELS = ["spc", "tip3p", "tip4p"]
21 |
22 | # Box types
23 | BOX_TYPES = ["cubic", "dodecahedron", "octahedron"]
24 |
25 | # MDP file types
26 | MDP_TYPES = ["ions", "em", "nvt", "npt", "md"]
27 |
28 | # Default MDP parameters
29 | DEFAULT_MDP_PARAMS = {
30 | "ions": {
31 | "integrator": "steep",
32 | "emtol": 1000.0,
33 | "emstep": 0.01,
34 | "nsteps": 50000,
35 | "nstlist": 1,
36 | "cutoff-scheme": "Verlet",
37 | "ns_type": "grid",
38 | "coulombtype": "cutoff",
39 | "rcoulomb": 1.0,
40 | "rvdw": 1.0,
41 | "pbc": "xyz"
42 | },
43 | "em": {
44 | "integrator": "steep",
45 | "emtol": 1000.0,
46 | "emstep": 0.01,
47 | "nsteps": 50000,
48 | "nstlist": 1,
49 | "cutoff-scheme": "Verlet",
50 | "ns_type": "grid",
51 | "coulombtype": "PME",
52 | "rcoulomb": 1.0,
53 | "rvdw": 1.0,
54 | "pbc": "xyz"
55 | },
56 | "nvt": {
57 | "title": "Protein-ligand complex NVT equilibration",
58 | "define": "-DPOSRES",
59 | "integrator": "md",
60 | "nsteps": 50000,
61 | "dt": 0.002,
62 | "nstxout": 500,
63 | "nstvout": 500,
64 | "nstenergy": 500,
65 | "nstlog": 500,
66 | "continuation": "no",
67 | "constraint_algorithm": "lincs",
68 | "constraints": "h-bonds",
69 | "lincs_iter": 1,
70 | "lincs_order": 4,
71 | "cutoff-scheme": "Verlet",
72 | "ns_type": "grid",
73 | "nstlist": 10,
74 | "rcoulomb": 1.0,
75 | "rvdw": 1.0,
76 | "DispCorr": "EnerPres",
77 | "coulombtype": "PME",
78 | "pme_order": 4,
79 | "fourierspacing": 0.16,
80 | "tcoupl": "V-rescale",
81 | "tc-grps": "Protein Non-Protein",
82 | "tau_t": "0.1 0.1",
83 | "ref_t": "300 300",
84 | "pcoupl": "no",
85 | "pbc": "xyz",
86 | "gen_vel": "yes",
87 | "gen_temp": 300,
88 | "gen_seed": -1
89 | },
90 | "npt": {
91 | "title": "Protein-ligand complex NPT equilibration",
92 | "define": "-DPOSRES",
93 | "integrator": "md",
94 | "nsteps": 50000,
95 | "dt": 0.002,
96 | "nstxout": 500,
97 | "nstvout": 500,
98 | "nstenergy": 500,
99 | "nstlog": 500,
100 | "continuation": "yes",
101 | "constraint_algorithm": "lincs",
102 | "constraints": "h-bonds",
103 | "lincs_iter": 1,
104 | "lincs_order": 4,
105 | "cutoff-scheme": "Verlet",
106 | "ns_type": "grid",
107 | "nstlist": 10,
108 | "rcoulomb": 1.0,
109 | "rvdw": 1.0,
110 | "DispCorr": "EnerPres",
111 | "coulombtype": "PME",
112 | "pme_order": 4,
113 | "fourierspacing": 0.16,
114 | "tcoupl": "V-rescale",
115 | "tc-grps": "Protein Non-Protein",
116 | "tau_t": "0.1 0.1",
117 | "ref_t": "300 300",
118 | "pcoupl": "Parrinello-Rahman",
119 | "pcoupltype": "isotropic",
120 | "tau_p": 2.0,
121 | "ref_p": 1.0,
122 | "compressibility": 4.5e-5,
123 | "refcoord_scaling": "com",
124 | "pbc": "xyz",
125 | "gen_vel": "no"
126 | },
127 | "md": {
128 | "title": "Protein-ligand complex MD simulation",
129 | "integrator": "md",
130 | "nsteps": 5000000, # Default 10 ns
131 | "dt": 0.002,
132 | "nstxout": 5000,
133 | "nstvout": 5000,
134 | "nstenergy": 5000,
135 | "nstlog": 5000,
136 | "nstxout-compressed": 5000,
137 | "compressed-x-grps": "System",
138 | "continuation": "yes",
139 | "constraint_algorithm": "lincs",
140 | "constraints": "h-bonds",
141 | "lincs_iter": 1,
142 | "lincs_order": 4,
143 | "cutoff-scheme": "Verlet",
144 | "ns_type": "grid",
145 | "nstlist": 10,
146 | "rcoulomb": 1.0,
147 | "rvdw": 1.0,
148 | "DispCorr": "EnerPres",
149 | "coulombtype": "PME",
150 | "pme_order": 4,
151 | "fourierspacing": 0.16,
152 | "tcoupl": "V-rescale",
153 | "tc-grps": "Protein Non-Protein",
154 | "tau_t": "0.1 0.1",
155 | "ref_t": "300 300",
156 | "pcoupl": "Parrinello-Rahman",
157 | "pcoupltype": "isotropic",
158 | "tau_p": 2.0,
159 | "ref_p": 1.0,
160 | "compressibility": 4.5e-5,
161 | "pbc": "xyz",
162 | "gen_vel": "no"
163 | }
164 | }
165 |
166 | # Standard residues list
167 | STANDARD_RESIDUES = [
168 | "ALA", "ARG", "ASN", "ASP", "CYS", "GLN", "GLU", "GLY", "HIS", "ILE",
169 | "LEU", "LYS", "MET", "PHE", "PRO", "SER", "THR", "TRP", "TYR", "VAL",
170 | "HOH", "WAT", "TIP", "SOL", "NA", "CL", "K", "CA", "MG", "ZN"
171 | ]
172 |
173 | # System message for LLM
174 | SYSTEM_MESSAGE_ADVISOR = """You are an expert molecular dynamics (MD) assistant that helps run GROMACS simulations.
175 |
176 | Your primary goal is to guide the user through setting up and running MD simulations for protein systems.
177 | You have access to various functions to interact with GROMACS and manage simulations.
178 |
179 | 1. First, you should check if GROMACS is installed using check_gromacs_installation()
180 | 2. Guide the user through the entire MD workflow in these stages:
181 | - Setup: Get protein file and prepare workspace
182 | - Prepare Protein: Generate topology with appropriate force field
183 | - Solvation: Add water and ions to the system
184 | - Energy Minimization: Remove bad contacts
185 | - Equilibration: Equilibrate the system (NVT and NPT)
186 | - Production: Run the actual MD simulation
187 | - Analysis: Analyze results (RMSD, RMSF, etc.)
188 | 3. The default protocol is protein only, for other functions, switch to corresponding protocol first.
189 | - MM/GBSA: switch_to_mmpbsa_protocol
190 | - Protein-Ligand complex: set_ligand
191 |
192 |
193 | IMPORTANT: When running GROMACS commands that require interactive group selection, ALWAYS use echo commands to pipe the selection to the GROMACS command. For example:
194 | - Instead of: gmx rms -s md.tpr -f md.xtc -o rmsd.xvg
195 | - Use: echo "Protein Protein" | gmx rms -s md.tpr -f md.xtc -o rmsd.xvg
196 |
197 |
198 | For each step:
199 | 1. Explain what you're doing and why
200 | 2. Execute the necessary functions to perform the actions
201 | 3. Check the results and handle any errors
202 | 4. Ask the user for input when needed
203 |
204 |
205 | When you reach a point where you're waiting for the user's response or you've completed
206 | the current stage of the workflow, end your response with: "This is the final answer at this stage."
207 |
208 | Always provide clear explanations for technical concepts, and guide the user through the
209 | entire process from start to finish.
210 | """
211 |
212 | SYSTEM_MESSAGE_AGENT = """You are an autonomous MD agent that runs GROMACS simulations for the user.
213 |
214 | Your primary goal is to execute molecular dynamics simulations of proteins and protein-ligand systems as requested by the user. Take direct action, making reasonable default choices when parameters aren't specified.
215 |
216 | 1. First, check if GROMACS is installed using check_gromacs_installation()
217 | 2. Execute the MD workflow efficiently
218 | 3. The default protocol is protein only, for other functions, switch to corresponding protocol first.
219 | - MM/GBSA: switch_to_mmpbsa_protocol
220 | - Protein-Ligand complex: set_ligand
221 |
222 | IMPORTANT: When running GROMACS commands that require interactive group selection, use echo commands:
223 | - Use: echo "Protein Protein" | gmx rms -s md.tpr -f md.xtc -o rmsd.xvg
224 |
225 | For each action:
226 | 1. Execute the necessary functions without asking for confirmation
227 | 2. Check results and solve problems autonomously
228 | 3. Explain what you're doing briefly but focus on execution
229 | 4. Only ask for input when absolutely necessary
230 |
231 | Keep in mind:
232 | - Select reasonable default parameters when not specified
233 | - Handle protein-ligand systems automatically when detected
234 |
235 | When you complete a stage or need user input, end with: "This is the final answer at this stage."
236 |
237 | Focus on efficiently completing the requested simulation with minimal user intervention.
238 | """
--------------------------------------------------------------------------------
/gromacs_copilot/core/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Core module for GROMACS Copilot
3 | """
4 |
5 | from gromacs_copilot.core.enums import SimulationStage, MessageType
6 | from gromacs_copilot.core.md_agent import MDLLMAgent
7 |
8 | __all__ = [
9 | 'SimulationStage',
10 | 'MessageType',
11 | 'MDLLMAgent'
12 | ]
--------------------------------------------------------------------------------
/gromacs_copilot/core/enums.py:
--------------------------------------------------------------------------------
1 | """
2 | Enumerations for GROMACS Copilot
3 | """
4 |
5 | from enum import Enum, auto
6 |
7 | class SimulationStage(Enum):
8 | """Stages of the MD simulation workflow"""
9 | SETUP = auto()
10 | PREPARE_PROTEIN = auto()
11 | PREPARE_LIGAND = auto() # For protein-ligand simulations
12 | PREPARE_COMPLEX = auto() # For protein-ligand simulations
13 | SOLVATION = auto()
14 | ENERGY_MINIMIZATION = auto()
15 | EQUILIBRATION = auto()
16 | PRODUCTION = auto()
17 | ANALYSIS = auto()
18 | COMPLETED = auto()
19 |
20 | class MessageType(Enum):
21 | """Types of messages for terminal output"""
22 | INFO = auto()
23 | SUCCESS = auto()
24 | WARNING = auto()
25 | ERROR = auto()
26 | TITLE = auto()
27 | SYSTEM = auto()
28 | USER = auto()
29 | COMMAND = auto()
30 | TOOL = auto()
31 | FINAL = auto()
--------------------------------------------------------------------------------
/gromacs_copilot/core/md_agent.py:
--------------------------------------------------------------------------------
1 | """
2 | Main MD Agent class for GROMACS Copilot
3 | """
4 |
5 | import os
6 | import json
7 | import logging
8 | import requests
9 | from typing import List, Dict, Any, Optional, Union
10 |
11 | from gromacs_copilot.protocols.protein import ProteinProtocol
12 | from gromacs_copilot.protocols.protein_ligand import ProteinLigandProtocol
13 | from gromacs_copilot.protocols.mmpbsa import MMPBSAProtocol
14 | from gromacs_copilot.protocols.analysis import AnalysisProtocol
15 |
16 | from gromacs_copilot.utils.terminal import print_message, prompt_user
17 | from gromacs_copilot.core.enums import MessageType, SimulationStage
18 | from gromacs_copilot.config import SYSTEM_MESSAGE_ADVISOR, SYSTEM_MESSAGE_AGENT
19 |
20 |
21 | class MDLLMAgent:
22 | """LLM-based agent for running molecular dynamics simulations with GROMACS"""
23 |
24 | def __init__(self, api_key: str = None, model: str = "gpt-4o",
25 | workspace: str = "./md_workspace",
26 | url: str = "https://api.openai.com/v1/chat/completions", mode: str = "copilot", gmx_bin: str = "gmx"):
27 | """
28 | Initialize the MD LLM agent
29 |
30 | Args:
31 | api_key: API key for LLM service
32 | model: Model to use for LLM
33 | workspace: Directory to use as the working directory
34 | url: URL of the LLM API endpoint
35 | """
36 | self.api_key = api_key or os.environ.get("OPENAI_API_KEY")
37 | self.url = url
38 | if not self.api_key:
39 | raise ValueError("API key is required. Provide as parameter or set OPENAI_API_KEY environment variable")
40 |
41 | self.model = model
42 | self.conversation_history = []
43 | self.workspace = workspace
44 | self.gmx_bin = gmx_bin
45 |
46 | # Initialize protocol (will be set to protein or protein-ligand as needed)
47 | self.protocol = ProteinProtocol(workspace, self.gmx_bin)
48 | self.mode = mode
49 |
50 | logging.info(f"MD LLM Agent initialized with model: {model}")
51 |
52 | def switch_to_mmpbsa_protocol(self) -> Dict[str, Any]:
53 | """
54 | Switch to MM-PBSA protocol
55 |
56 | Returns:
57 | Dictionary with result information
58 | """
59 | try:
60 | # Create new MM-PBSA protocol
61 | old_protocol = self.protocol
62 | self.protocol = MMPBSAProtocol(self.workspace)
63 |
64 | # Copy relevant state from the old protocol if possible
65 | if hasattr(old_protocol, 'topology_file'):
66 | self.protocol.topology_file = old_protocol.topology_file
67 |
68 | if hasattr(old_protocol, 'trajectory_file'):
69 | self.protocol.trajectory_file = old_protocol.trajectory_file
70 |
71 | logging.info("Switched to MM-PBSA protocol")
72 |
73 | return {
74 | "success": True,
75 | "message": "Switched to MM-PBSA protocol successfully",
76 | "previous_protocol": old_protocol.__class__.__name__,
77 | "current_protocol": "MMPBSAProtocol"
78 | }
79 | except Exception as e:
80 | return {
81 | "success": False,
82 | "error": f"Failed to switch to MM-PBSA protocol: {str(e)}"
83 | }
84 |
85 | def switch_to_protein_ligand_protocol(self) -> Dict[str, Any]:
86 | """
87 | Switch to Protein-Ligand protocol
88 |
89 | Returns:
90 | Dictionary with result information
91 | """
92 | try:
93 | # Create new Protein-Ligand protocol
94 | old_protocol = self.protocol
95 | self.protocol = ProteinLigandProtocol(self.workspace)
96 |
97 | # Copy relevant state from the old protocol if possible
98 | if hasattr(old_protocol, 'topology_file'):
99 | self.protocol.topology_file = old_protocol.topology_file
100 |
101 | if hasattr(old_protocol, 'trajectory_file'):
102 | self.protocol.trajectory_file = old_protocol.trajectory_file
103 |
104 | logging.info("Switched to Protein-Ligand protocol")
105 |
106 | return {
107 | "success": True,
108 | "message": "Switched to Protein-Ligand protocol successfully",
109 | "previous_protocol": old_protocol.__class__.__name__,
110 | "current_protocol": "ProteinLigandProtocol"
111 | }
112 | except Exception as e:
113 | return {
114 | "success": False,
115 | "error": f"Failed to switch to Protein-Ligand protocol: {str(e)}"
116 | }
117 |
118 | def switch_to_analysis_protocol(self) -> Dict[str, Any]:
119 | """
120 | Switch to Analysis protocol
121 |
122 | Returns:
123 | Dictionary with result information
124 | """
125 | try:
126 | # Create new Analysis protocol
127 | old_protocol = self.protocol
128 | self.protocol = AnalysisProtocol(self.workspace)
129 |
130 | # Copy relevant state from the old protocol if possible
131 | if hasattr(old_protocol, 'topology_file'):
132 | self.protocol.topology_file = old_protocol.topology_file
133 |
134 | if hasattr(old_protocol, 'trajectory_file'):
135 | self.protocol.trajectory_file = old_protocol.trajectory_file
136 |
137 | logging.info("Switched to Analysis protocol")
138 |
139 | return {
140 | "success": True,
141 | "message": "Switched to Analysis protocol successfully",
142 | "previous_protocol": old_protocol.__class__.__name__,
143 | "current_protocol": "AnalysisProtocol"
144 | }
145 | except Exception as e:
146 | return {
147 | "success": False,
148 | "error": f"Failed to switch to Analysis protocol: {str(e)}"
149 | }
150 |
151 | def get_tool_schema(self) -> List[Dict[str, Any]]:
152 | """
153 | Get the schema for the tools available to the LLM
154 |
155 | Returns:
156 | List of tool schema dictionaries
157 | """
158 | tools = [
159 | {
160 | "type": "function",
161 | "function": {
162 | "name": "run_shell_command",
163 | "description": "Run a shell command",
164 | "parameters": {
165 | "type": "object",
166 | "properties": {
167 | "command": {
168 | "type": "string",
169 | "description": "Shell command to run"
170 | },
171 | "capture_output": {
172 | "type": "boolean",
173 | "description": "Whether to capture stdout/stderr"
174 | }
175 | },
176 | "required": ["command"]
177 | }
178 | }
179 | },
180 | {
181 | "type": "function",
182 | "function": {
183 | "name": "get_workspace_info",
184 | "description": "Get information about the current workspace",
185 | "parameters": {
186 | "type": "object",
187 | "properties": {},
188 | "required": []
189 | }
190 | }
191 | },
192 | {
193 | "type": "function",
194 | "function": {
195 | "name": "check_gromacs_installation",
196 | "description": "Check if GROMACS is installed and available",
197 | "parameters": {
198 | "type": "object",
199 | "properties": {},
200 | "required": []
201 | }
202 | }
203 | },
204 | {
205 | "type": "function",
206 | "function": {
207 | "name": "set_protein_file",
208 | "description": "Set and prepare the protein file for simulation",
209 | "parameters": {
210 | "type": "object",
211 | "properties": {
212 | "file_path": {
213 | "type": "string",
214 | "description": "Path to the protein structure file (PDB or GRO)"
215 | }
216 | },
217 | "required": ["file_path"]
218 | }
219 | }
220 | },
221 | {
222 | "type": "function",
223 | "function": {
224 | "name": "check_for_ligands",
225 | "description": "Check for potential ligands in the PDB file",
226 | "parameters": {
227 | "type": "object",
228 | "properties": {
229 | "pdb_file": {
230 | "type": "string",
231 | "description": "Path to the PDB file"
232 | }
233 | },
234 | "required": ["pdb_file"]
235 | }
236 | }
237 | },
238 | {
239 | "type": "function",
240 | "function": {
241 | "name": "set_ligand",
242 | "description": "Set the ligand for simulation",
243 | "parameters": {
244 | "type": "object",
245 | "properties": {
246 | "ligand_name": {
247 | "type": "string",
248 | "description": "Residue name of the ligand in the PDB file"
249 | }
250 | },
251 | "required": ["ligand_name"]
252 | }
253 | }
254 | },
255 | {
256 | "type": "function",
257 | "function": {
258 | "name": "generate_topology",
259 | "description": "Generate topology for the protein",
260 | "parameters": {
261 | "type": "object",
262 | "properties": {
263 | "force_field": {
264 | "type": "string",
265 | "description": "Name of the force field to use",
266 | "enum": ["AMBER99SB-ILDN", "CHARMM36", "GROMOS96 53a6", "OPLS-AA/L"]
267 | },
268 | "water_model": {
269 | "type": "string",
270 | "description": "Water model to use",
271 | "enum": ["spc", "tip3p", "tip4p"]
272 | }
273 | },
274 | "required": ["force_field"]
275 | }
276 | }
277 | },
278 | {
279 | "type": "function",
280 | "function": {
281 | "name": "define_simulation_box",
282 | "description": "Define the simulation box",
283 | "parameters": {
284 | "type": "object",
285 | "properties": {
286 | "distance": {
287 | "type": "number",
288 | "description": "Minimum distance between protein and box edge (nm)"
289 | },
290 | "box_type": {
291 | "type": "string",
292 | "description": "Type of box",
293 | "enum": ["cubic", "dodecahedron", "octahedron"]
294 | }
295 | },
296 | "required": []
297 | }
298 | }
299 | },
300 | {
301 | "type": "function",
302 | "function": {
303 | "name": "solvate_system",
304 | "description": "Solvate the protein in water",
305 | "parameters": {
306 | "type": "object",
307 | "properties": {},
308 | "required": []
309 | }
310 | }
311 | },
312 | {
313 | "type": "function",
314 | "function": {
315 | "name": "create_mdp_file",
316 | "description": "Create an MDP parameter file for GROMACS",
317 | "parameters": {
318 | "type": "object",
319 | "properties": {
320 | "mdp_type": {
321 | "type": "string",
322 | "description": "Type of MDP file",
323 | "enum": ["ions", "em", "nvt", "npt", "md"]
324 | },
325 | "params": {
326 | "type": "object",
327 | "description": "Optional override parameters",
328 | "properties": {
329 | "nsteps": {
330 | "type": "integer",
331 | "description": "Number of steps"
332 | },
333 | "dt": {
334 | "type": "number",
335 | "description": "Time step (fs)"
336 | }
337 | }
338 | }
339 | },
340 | "required": ["mdp_type"]
341 | }
342 | }
343 | },
344 | {
345 | "type": "function",
346 | "function": {
347 | "name": "add_ions",
348 | "description": "Add ions to the solvated system",
349 | "parameters": {
350 | "type": "object",
351 | "properties": {
352 | "concentration": {
353 | "type": "number",
354 | "description": "Salt concentration in M, default is 0.15"
355 | },
356 | "neutral": {
357 | "type": "boolean",
358 | "description": "Whether to neutralize the system"
359 | }
360 | },
361 | "required": []
362 | }
363 | }
364 | },
365 | {
366 | "type": "function",
367 | "function": {
368 | "name": "run_energy_minimization",
369 | "description": "Run energy minimization",
370 | "parameters": {
371 | "type": "object",
372 | "properties": {},
373 | "required": []
374 | }
375 | }
376 | },
377 | {
378 | "type": "function",
379 | "function": {
380 | "name": "run_nvt_equilibration",
381 | "description": "Run NVT equilibration",
382 | "parameters": {
383 | "type": "object",
384 | "properties": {},
385 | "required": []
386 | }
387 | }
388 | },
389 | {
390 | "type": "function",
391 | "function": {
392 | "name": "run_npt_equilibration",
393 | "description": "Run NPT equilibration",
394 | "parameters": {
395 | "type": "object",
396 | "properties": {},
397 | "required": []
398 | }
399 | }
400 | },
401 | {
402 | "type": "function",
403 | "function": {
404 | "name": "run_production_md",
405 | "description": "Run production MD",
406 | "parameters": {
407 | "type": "object",
408 | "properties": {
409 | "length_ns": {
410 | "type": "number",
411 | "description": "Length of the simulation in nanoseconds"
412 | }
413 | },
414 | "required": []
415 | }
416 | }
417 | },
418 | {
419 | "type": "function",
420 | "function": {
421 | "name": "analyze_rmsd",
422 | "description": "Perform RMSD analysis",
423 | "parameters": {
424 | "type": "object",
425 | "properties": {},
426 | "required": []
427 | }
428 | }
429 | },
430 | {
431 | "type": "function",
432 | "function": {
433 | "name": "analyze_rmsf",
434 | "description": "Perform RMSF analysis",
435 | "parameters": {
436 | "type": "object",
437 | "properties": {},
438 | "required": []
439 | }
440 | }
441 | },
442 | {
443 | "type": "function",
444 | "function": {
445 | "name": "analyze_gyration",
446 | "description": "Perform radius of gyration analysis",
447 | "parameters": {
448 | "type": "object",
449 | "properties": {},
450 | "required": []
451 | }
452 | }
453 | },
454 | {
455 | "type": "function",
456 | "function": {
457 | "name": "analyze_ligand_rmsd",
458 | "description": "Perform RMSD analysis focused on the ligand",
459 | "parameters": {
460 | "type": "object",
461 | "properties": {},
462 | "required": []
463 | }
464 | }
465 | },
466 | {
467 | "type": "function",
468 | "function": {
469 | "name": "analyze_protein_ligand_contacts",
470 | "description": "Analyze contacts between protein and ligand",
471 | "parameters": {
472 | "type": "object",
473 | "properties": {},
474 | "required": []
475 | }
476 | }
477 | },
478 | {
479 | "type": "function",
480 | "function": {
481 | "name": "set_simulation_stage",
482 | "description": "Set the current simulation stage",
483 | "parameters": {
484 | "type": "object",
485 | "properties": {
486 | "stage": {
487 | "type": "string",
488 | "description": "Name of the stage to set",
489 | "enum": [s.name for s in SimulationStage]
490 | }
491 | },
492 | "required": ["stage"]
493 | }
494 | }
495 | },
496 | {
497 | "type": "function",
498 | "function": {
499 | "name": "create_mmpbsa_index_file",
500 | "description": "Create index file for MM-PBSA analysis",
501 | "parameters": {
502 | "type": "object",
503 | "properties": {
504 | "protein_selection": {
505 | "type": "string",
506 | "description": "Selection for protein group"
507 | },
508 | "ligand_selection": {
509 | "type": "string",
510 | "description": "Selection for ligand group"
511 | }
512 | },
513 | "required": []
514 | }
515 | }
516 | },
517 | {
518 | "type": "function",
519 | "function": {
520 | "name": "create_mmpbsa_input",
521 | "description": "Create input file for MM-PBSA/GBSA calculation",
522 | "parameters": {
523 | "type": "object",
524 | "properties": {
525 | "method": {
526 | "type": "string",
527 | "description": "Method to use (pb or gb)",
528 | "enum": ["pb", "gb"]
529 | },
530 | "startframe": {
531 | "type": "integer",
532 | "description": "First frame to analyze"
533 | },
534 | "endframe": {
535 | "type": "integer",
536 | "description": "Last frame to analyze"
537 | },
538 | "interval": {
539 | "type": "integer",
540 | "description": "Interval between frames"
541 | },
542 | "ionic_strength": {
543 | "type": "number",
544 | "description": "Ionic strength for calculation"
545 | },
546 | "with_entropy": {
547 | "type": "boolean",
548 | "description": "Whether to include entropy calculation"
549 | }
550 | },
551 | "required": []
552 | }
553 | }
554 | },
555 | {
556 | "type": "function",
557 | "function": {
558 | "name": "run_mmpbsa_calculation",
559 | "description": "Run MM-PBSA/GBSA calculation for protein-ligand binding free energy",
560 | "parameters": {
561 | "type": "object",
562 | "properties": {
563 | "ligand_mol_file": {
564 | "type": "string",
565 | "description": "The Antechamber output mol2 file of ligand parametrization"
566 | },
567 | "index_file": {
568 | "type": "string",
569 | "description": "GROMACS index file containing protein and ligand groups"
570 | },
571 | "topology_file": {
572 | "type": "string",
573 | "description": "GROMACS topology file (tpr) for the system"
574 | },
575 | "protein_group": {
576 | "type": "string",
577 | "description": "Name or index of the protein group in the index file"
578 | },
579 | "ligand_group": {
580 | "type": "string",
581 | "description": "Name or index of the ligand group in the index file"
582 | },
583 | "trajectory_file": {
584 | "type": "string",
585 | "description": "GROMACS trajectory file (xtc) for analysis"
586 | },
587 | "overwrite": {
588 | "type": "boolean",
589 | "description": "Whether to overwrite existing output files",
590 | },
591 | "verbose": {
592 | "type": "boolean",
593 | "description": "Whether to print verbose output",
594 | }
595 | },
596 | "required": ["ligand_mol_file", "index_file", "topology_file", "protein_group", "ligand_group", "trajectory_file"]
597 | }
598 | }
599 | },
600 | {
601 | "type": "function",
602 | "function": {
603 | "name": "parse_mmpbsa_results",
604 | "description": "Parse MM-PBSA/GBSA results",
605 | "parameters": {
606 | "type": "object",
607 | "properties": {},
608 | "required": []
609 | }
610 | }
611 | },
612 | {
613 | "type": "function",
614 | "function": {
615 | "name": "switch_to_mmpbsa_protocol",
616 | "description": "Switch to MM-PBSA protocol for binding free energy calculations",
617 | "parameters": {
618 | "type": "object",
619 | "properties": {},
620 | "required": []
621 | }
622 | }
623 | }
624 | ]
625 |
626 | return tools
627 |
628 | def call_llm(self, messages: List[Dict[str, str]], tools: List[Dict[str, Any]] = None) -> Dict[str, Any]:
629 | """
630 | Call the LLM with messages and tools
631 |
632 | Args:
633 | messages: List of message dictionaries
634 | tools: List of tool schema dictionaries
635 |
636 | Returns:
637 | LLM response
638 | """
639 | tools = tools or self.get_tool_schema()
640 |
641 | headers = {
642 | "Authorization": f"Bearer {self.api_key}",
643 | "Content-Type": "application/json"
644 | }
645 |
646 | data = {
647 | "model": self.model,
648 | "messages": messages,
649 | "tools": tools
650 | }
651 |
652 | response = requests.post(
653 | self.url,
654 | headers=headers,
655 | json=data
656 | )
657 |
658 | if response.status_code != 200:
659 | logging.error(f"LLM API error: {response.status_code} - {response.text}")
660 | raise Exception(f"LLM API error: {response.status_code} - {response.text}")
661 |
662 | return response.json()
663 |
664 | def execute_tool_call(self, tool_call: Dict[str, Any]) -> Dict[str, Any]:
665 | """
666 | Execute a tool call
667 |
668 | Args:
669 | tool_call: Tool call dictionary
670 |
671 | Returns:
672 | Result of the tool call
673 | """
674 | function_name = tool_call["function"]["name"]
675 | arguments = json.loads(tool_call["function"]["arguments"])
676 |
677 | if function_name == "set_ligand" and not isinstance(self.protocol, ProteinLigandProtocol):
678 | # Switch to protein-ligand protocol
679 | old_protocol = self.protocol
680 | self.protocol = ProteinLigandProtocol(self.workspace)
681 |
682 | # Copy relevant state from the old protocol
683 | self.protocol.protein_file = old_protocol.protein_file
684 | self.protocol.stage = old_protocol.stage
685 |
686 | logging.info("Switched to protein-ligand protocol")
687 | elif function_name == "switch_to_mmpbsa_protocol":
688 | return self.switch_to_mmpbsa_protocol()
689 |
690 | # Get the method from the protocol class
691 | if hasattr(self.protocol, function_name):
692 | method = getattr(self.protocol, function_name)
693 | result = method(**arguments)
694 | return result
695 | else:
696 | return {
697 | "success": False,
698 | "error": f"Unknown function: {function_name}"
699 | }
700 |
701 | def run(self, starting_prompt: str = None) -> None:
702 | """
703 | Run the MD LLM agent
704 |
705 | Args:
706 | starting_prompt: Optional starting prompt for the LLM
707 | """
708 | # Initialize conversation with system message
709 | if self.mode == "copilot":
710 | system_message = {
711 | "role": "system",
712 | "content": SYSTEM_MESSAGE_ADVISOR
713 | }
714 | else:
715 | system_message = {
716 | "role": "system",
717 | "content": SYSTEM_MESSAGE_AGENT
718 | }
719 |
720 | self.conversation_history = [system_message]
721 |
722 | # Add starting prompt if provided
723 | if starting_prompt:
724 | self.conversation_history.append({
725 | "role": "user",
726 | "content": starting_prompt
727 | })
728 |
729 | # Get initial response from LLM
730 | response = self.call_llm(self.conversation_history)
731 |
732 | # Main conversation loop
733 | while True:
734 | assistant_message = response["choices"][0]["message"]
735 | self.conversation_history.append(assistant_message)
736 |
737 | # Process tool calls if any
738 | if "tool_calls" in assistant_message:
739 | for tool_call in assistant_message["tool_calls"]:
740 | # Execute the tool call
741 | print_message(f"Executing: {tool_call['function']['name']}", MessageType.TOOL)
742 | result = self.execute_tool_call(tool_call)
743 |
744 | # Add the tool call result to the conversation
745 | self.conversation_history.append({
746 | "role": "tool",
747 | "tool_call_id": tool_call["id"],
748 | "name": tool_call["function"]["name"],
749 | "content": json.dumps(result)
750 | })
751 |
752 | # Get next response from LLM
753 | response = self.call_llm(self.conversation_history)
754 | continue
755 |
756 | # Display the assistant's message
757 | content = assistant_message["content"]
758 |
759 | # Check if it's a final answer
760 | if "This is the final answer at this stage." in content:
761 | # Split at the final answer marker
762 | parts = content.split("This is the final answer at this stage.")
763 |
764 | # Print the main content normally
765 | print_message(parts[0].strip(), MessageType.INFO)
766 |
767 | # Print the final answer part with special formatting
768 | final_part = "This is the final answer at this stage." + parts[1]
769 | print_message(final_part.strip(), MessageType.FINAL, style="box")
770 | else:
771 | # Regular message
772 | print_message(content, MessageType.INFO)
773 |
774 | # Check if we've reached a stopping point
775 | if "This is the final answer at this stage." in content:
776 | # Ask if the user wants to continue
777 | user_input = prompt_user("Do you want to continue with the next stage?", default="yes")
778 | if user_input.lower() not in ["yes", "y", "continue", ""]:
779 | print_message("Exiting the MD agent. Thank you for using GROMACS Copilot!", MessageType.SUCCESS, style="box")
780 | break
781 |
782 | # Ask for the next user prompt
783 | user_input = prompt_user("What would you like to do next?")
784 | else:
785 | # Normal user input
786 | user_input = prompt_user("Your response")
787 |
788 | # Check for exit command
789 | if user_input.lower() in ["exit", "quit", "bye"]:
790 | print_message("Exiting the MD agent. Thank you for using GROMACS Copilot!", MessageType.SUCCESS, style="box")
791 | break
792 |
793 | # Add user input to conversation
794 | self.conversation_history.append({
795 | "role": "user",
796 | "content": user_input
797 | })
798 |
799 | # Get next response from LLM
800 | response = self.call_llm(self.conversation_history)
--------------------------------------------------------------------------------
/gromacs_copilot/mcp_server.py:
--------------------------------------------------------------------------------
1 | from mcp.server.fastmcp import FastMCP
2 | import os
3 | import logging
4 | from typing import Dict, Any, Optional, Union
5 | from gromacs_copilot.core.md_agent import MDLLMAgent
6 |
7 |
8 | # Initialize FastMCP server
9 | mcp = FastMCP("gromacs-copilot")
10 |
11 | # Reference to the agent instance (will be set later)
12 | global agent
13 |
14 | @mcp.tool()
15 | async def init_gromacs_copilot(workspace: str, gmx_bin: str) -> Dict[str, Any]:
16 | """
17 | Initialize the GROMACS Copilot server with a specific workspace and GROMACS binary
18 |
19 | Args:
20 | workspace: Path to the workspace directory
21 | gmx_bin: Path to the GROMACS binary
22 | **kwargs: Additional arguments for agent initialization
23 | """
24 | global agent
25 | agent = MDLLMAgent(workspace=workspace, api_key="dummy", gmx_bin=gmx_bin)
26 |
27 | return {"success": True, "message": f"Initialized GROMACS Copilot with workspace: {workspace}"}
28 |
29 |
30 | @mcp.tool()
31 | async def check_gromacs_installation() -> Dict[str, Any]:
32 | """
33 | Check if GROMACS is installed and available
34 |
35 | Returns:
36 | Dictionary with GROMACS installation information
37 | """
38 | global agent
39 | if agent is None:
40 | return {"success": False, "error": "agent not initialized"}
41 |
42 | return agent.protocol.check_gromacs_installation()
43 |
44 | @mcp.tool()
45 | async def set_protein_file(file_path: str) -> Dict[str, Any]:
46 | """
47 | Set and prepare the protein file for simulation, only use for protein-ligand complex
48 |
49 | Args:
50 | file_path: Path to the protein structure file (PDB or GRO)
51 |
52 | Returns:
53 | Dictionary with result information
54 | """
55 | global agent
56 | if agent is None:
57 | return {"success": False, "error": "agent not initialized"}
58 | elif hasattr(agent.protocol, "set_protein_file"):
59 |
60 | return agent.protocol.set_protein_file(file_path)
61 | else:
62 | return {"success": False, "error": "set_protein_file method not available in agent, is not needed for protein only simulation."}
63 |
64 | @mcp.tool()
65 | async def check_for_ligands(pdb_file: str) -> Dict[str, Any]:
66 | """
67 | Check for potential ligands in the PDB file, only use for protein-ligand complex
68 |
69 | Args:
70 | pdb_file: Path to the PDB file
71 |
72 | Returns:
73 | Dictionary with ligand information
74 | """
75 | global agent
76 | if agent is None:
77 | return {"success": False, "error": "agent not initialized"}
78 |
79 | return agent.protocol.check_for_ligands(pdb_file)
80 |
81 | @mcp.tool()
82 | async def set_ligand(ligand_name: str) -> Dict[str, Any]:
83 | """
84 | Set the ligand for simulation, only use for protein-ligand complex
85 |
86 | Args:
87 | ligand_name: Residue name of the ligand in the PDB file
88 |
89 | Returns:
90 | Dictionary with result information
91 | """
92 | global agent
93 | if agent is None:
94 | return {"success": False, "error": "agent not initialized"}
95 |
96 | return agent.protocol.set_ligand(ligand_name)
97 |
98 | @mcp.tool()
99 | async def generate_topology(force_field: str, water_model: str = "spc") -> Dict[str, Any]:
100 | """
101 | Generate topology for the protein
102 |
103 | Args:
104 | force_field: Name of the force field to use
105 | water_model: Water model to use
106 |
107 | Returns:
108 | Dictionary with result information
109 | """
110 | global agent
111 | if agent is None:
112 | return {"success": False, "error": "agent not initialized"}
113 | return agent.protocol.generate_topology(force_field, water_model)
114 |
115 | @mcp.tool()
116 | async def define_simulation_box(distance: float = 1.0, box_type: str = "cubic") -> Dict[str, Any]:
117 | """
118 | Define the simulation box
119 |
120 | Args:
121 | distance: Minimum distance between protein and box edge (nm)
122 | box_type: Type of box (cubic, dodecahedron, octahedron)
123 |
124 | Returns:
125 | Dictionary with result information
126 | """
127 | global agent
128 | if agent is None:
129 | return {"success": False, "error": "agent not initialized"}
130 |
131 | return agent.protocol.define_simulation_box(distance, box_type)
132 |
133 | @mcp.tool()
134 | async def solvate_system() -> Dict[str, Any]:
135 | """
136 | Solvate the protein in water
137 |
138 | Returns:
139 | Dictionary with result information
140 | """
141 | global agent
142 | if agent is None:
143 | return {"success": False, "error": "agent not initialized"}
144 | return agent.protocol.solvate_system()
145 |
146 | @mcp.tool()
147 | async def create_mdp_file(mdp_type: str, params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
148 | """
149 | Create an MDP parameter file for GROMACS
150 |
151 | Args:
152 | mdp_type: Type of MDP file
153 | params: Optional override parameters
154 |
155 | Returns:
156 | Dictionary with result information
157 | """
158 | global agent
159 | if agent is None:
160 | return {"success": False, "error": "agent not initialized"}
161 |
162 | return agent.protocol.create_mdp_file(mdp_type, params)
163 |
164 | @mcp.tool()
165 | async def add_ions(concentration: float = 0.15, neutral: bool = True) -> Dict[str, Any]:
166 | """
167 | Add ions to the solvated system
168 |
169 | Args:
170 | concentration: Salt concentration in M
171 | neutral: Whether to neutralize the system
172 |
173 | Returns:
174 | Dictionary with result information
175 | """
176 | global agent
177 | if agent is None:
178 | return {"success": False, "error": "agent not initialized"}
179 |
180 | return agent.protocol.add_ions(concentration, neutral)
181 |
182 | @mcp.tool()
183 | async def run_energy_minimization() -> Dict[str, Any]:
184 | """
185 | Run energy minimization
186 |
187 | Returns:
188 | Dictionary with result information
189 | """
190 | global agent
191 | if agent is None:
192 | return {"success": False, "error": "agent not initialized"}
193 | # global agent
194 | return agent.protocol.run_energy_minimization()
195 |
196 | @mcp.tool()
197 | async def run_nvt_equilibration() -> Dict[str, Any]:
198 | """
199 | Run NVT equilibration
200 |
201 | Returns:
202 | Dictionary with result information
203 | """
204 | global agent
205 | if agent is None:
206 | return {"success": False, "error": "agent not initialized"}
207 | # global agent
208 | return agent.protocol.run_nvt_equilibration()
209 |
210 | @mcp.tool()
211 | async def run_npt_equilibration() -> Dict[str, Any]:
212 | """
213 | Run NPT equilibration
214 |
215 | Returns:
216 | Dictionary with result information
217 | """
218 | global agent
219 | if agent is None:
220 | return {"success": False, "error": "agent not initialized"}
221 | # global agent
222 | return agent.protocol.run_npt_equilibration()
223 |
224 | @mcp.tool()
225 | async def run_production_md(length_ns: float = 10.0) -> Dict[str, Any]:
226 | """
227 | Run production MD
228 |
229 | Args:
230 | length_ns: Length of the simulation in nanoseconds
231 |
232 | Returns:
233 | Dictionary with result information
234 | """
235 | global agent
236 | if agent is None:
237 | return {"success": False, "error": "agent not initialized"}
238 | # global agent
239 | return agent.protocol.run_production_md(length_ns)
240 |
241 | @mcp.tool()
242 | async def analyze_rmsd() -> Dict[str, Any]:
243 | """
244 | Perform RMSD analysis
245 |
246 | Returns:
247 | Dictionary with result information
248 | """
249 | global agent
250 | if agent is None:
251 | return {"success": False, "error": "agent not initialized"}
252 | # global agent
253 | return agent.protocol.analyze_rmsd()
254 |
255 | @mcp.tool()
256 | async def analyze_rmsf() -> Dict[str, Any]:
257 | """
258 | Perform RMSF analysis
259 |
260 | Returns:
261 | Dictionary with result information
262 | """
263 | global agent
264 | if agent is None:
265 | return {"success": False, "error": "agent not initialized"}
266 | # global agent
267 | return agent.protocol.analyze_rmsf()
268 |
269 | @mcp.tool()
270 | async def analyze_gyration() -> Dict[str, Any]:
271 | """
272 | Perform radius of gyration analysis
273 |
274 | Returns:
275 | Dictionary with result information
276 | """
277 | global agent
278 | if agent is None:
279 | return {"success": False, "error": "agent not initialized"}
280 | # global agent
281 | return agent.protocol.analyze_gyration()
282 |
283 | @mcp.tool()
284 | async def analyze_ligand_rmsd() -> Dict[str, Any]:
285 | """
286 | Perform RMSD analysis focused on the ligand
287 |
288 | Returns:
289 | Dictionary with result information
290 | """
291 | global agent
292 | if agent is None:
293 | return {"success": False, "error": "agent not initialized"}
294 | # global agent
295 | return agent.protocol.analyze_ligand_rmsd()
296 |
297 | @mcp.tool()
298 | async def analyze_protein_ligand_contacts() -> Dict[str, Any]:
299 | """
300 | Analyze contacts between protein and ligand
301 |
302 | Returns:
303 | Dictionary with result information
304 | """
305 | global agent
306 | if agent is None:
307 | return {"success": False, "error": "agent not initialized"}
308 | # global agent
309 | return agent.protocol.analyze_protein_ligand_contacts()
310 |
311 | @mcp.tool()
312 | async def set_simulation_stage(stage: str) -> Dict[str, Any]:
313 | """
314 | Set the current simulation stage
315 |
316 | Args:
317 | stage: Name of the stage to set
318 |
319 | Returns:
320 | Dictionary with result information
321 | """
322 | global agent
323 | if agent is None:
324 | return {"success": False, "error": "agent not initialized"}
325 | # global agent
326 | return agent.protocol.set_simulation_stage(stage)
327 |
328 | @mcp.tool()
329 | async def run_shell_command(command: str, capture_output: bool = True) -> Dict[str, Any]:
330 | """
331 | Run a shell command
332 |
333 | Args:
334 | command: Shell command to run
335 | capture_output: Whether to capture stdout/stderr
336 |
337 | Returns:
338 | Dictionary with command result information
339 | """
340 | global agent
341 | if agent is None:
342 | return {"success": False, "error": "agent not initialized"}
343 | # global agent
344 | return agent.protocol.run_shell_command(command, capture_output)
345 |
346 | @mcp.tool()
347 | async def get_workspace_info() -> Dict[str, Any]:
348 | """
349 | Get information about the current workspace
350 |
351 | Returns:
352 | Dictionary with workspace information
353 | """
354 | global agent
355 | if agent is None:
356 | return {"success": False, "error": "agent not initialized"}
357 | # global agent
358 | return agent.protocol.get_state()
359 |
360 | # Add additional tools for MM-PBSA functionality
361 | @mcp.tool()
362 | async def switch_agent_protocol(protocol:str) -> Dict[str, Any]:
363 | """
364 | Switch to another protocol
365 | Args:
366 | protocol: Name of the protocol to switch to, [ligand, mmpbsa, analysis]
367 |
368 | Returns:
369 | Dictionary with result information
370 | """
371 | global agent
372 | if agent is None:
373 | return {"success": False, "error": "agent not initialized"}
374 | if protocol not in ["ligand", "mmpbsa, analysis"]:
375 | return {"success": False, "error": "protocol not supported"}
376 | elif protocol == "mmpbsa":
377 | agent.switch_to_mmpbsa_protocol()
378 | return {"success": True, "message": "switched to mmpbsa protocol"}
379 | elif protocol == "ligand":
380 | agent.switch_to_protein_ligand_protocol()
381 | return {"success": True, "message": "switched to ligand protocol"}
382 | elif protocol == "analysis":
383 | agent.switch_to_analysis_protocol()
384 | return {"success": True, "message": "switched to analysis protocol"}
385 |
386 |
387 |
388 | @mcp.tool()
389 | async def create_mmpbsa_index_file(protein_selection: str = "Protein",
390 | ligand_selection: str = "LIG") -> Dict[str, Any]:
391 | """
392 | Create index file for MM-PBSA analysis
393 |
394 | Args:
395 | protein_selection: Selection for protein group
396 | ligand_selection: Selection for ligand group
397 |
398 | Returns:
399 | Dictionary with result information
400 | """
401 | global agent
402 | if agent is None:
403 | return {"success": False, "error": "agent not initialized"}
404 | # global agent
405 | return agent.protocol.create_mmpbsa_index_file(protein_selection, ligand_selection)
406 |
--------------------------------------------------------------------------------
/gromacs_copilot/protocols/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Protocol modules for GROMACS Copilot
3 | """
4 |
5 | from gromacs_copilot.protocols.base import BaseProtocol
6 | from gromacs_copilot.protocols.protein import ProteinProtocol
7 | from gromacs_copilot.protocols.protein_ligand import ProteinLigandProtocol
8 | from gromacs_copilot.protocols.analysis import AnalysisProtocol
9 |
10 | __all__ = [
11 | 'BaseProtocol',
12 | 'ProteinProtocol',
13 | 'ProteinLigandProtocol',
14 | 'AnalysisProtocol',
15 | 'MMPBSAProtocol'
16 | ]
--------------------------------------------------------------------------------
/gromacs_copilot/protocols/analysis.py:
--------------------------------------------------------------------------------
1 | """
2 | Analysis protocol for GROMACS Copilot
3 | """
4 |
5 | import os
6 | import logging
7 | from typing import Dict, Any, List, Optional
8 |
9 | from gromacs_copilot.protocols.base import BaseProtocol
10 | from gromacs_copilot.utils.shell import check_command_exists
11 |
12 |
13 | class AnalysisProtocol(BaseProtocol):
14 | """Protocol for analysis of MD simulation results"""
15 |
16 | def __init__(self, workspace: str = "./md_workspace", has_ligand: bool = False, gmx_bin: str = "gmx"):
17 | """
18 | Initialize the analysis protocol
19 |
20 | Args:
21 | workspace: Directory to use as the working directory
22 | has_ligand: Whether the system includes a ligand
23 | """
24 | super().__init__(workspace)
25 | self.has_ligand = has_ligand
26 | self.production_file = None
27 | self.trajectory_file = None
28 | self.topology_file = None
29 | self.energy_file = None
30 | self.analysis_dir = os.path.join(workspace, "analysis")
31 | self.gmx_bin = gmx_bin
32 |
33 | # Create analysis directory if it doesn't exist
34 | if not os.path.exists(self.analysis_dir):
35 | os.makedirs(self.analysis_dir)
36 |
37 | logging.info(f"Analysis protocol initialized with workspace: {self.workspace}")
38 |
39 | def get_state(self) -> Dict[str, Any]:
40 | """
41 | Get the current state of the protocol
42 |
43 | Returns:
44 | Dictionary with protocol state information
45 | """
46 | try:
47 | analysis_files = []
48 | if os.path.exists(self.analysis_dir):
49 | analysis_files = os.listdir(self.analysis_dir)
50 |
51 | return {
52 | "success": True,
53 | "workspace_path": self.workspace,
54 | "analysis_directory": self.analysis_dir,
55 | "has_ligand": self.has_ligand,
56 | "production_file": self.production_file,
57 | "trajectory_file": self.trajectory_file,
58 | "topology_file": self.topology_file,
59 | "energy_file": self.energy_file,
60 | "analysis_files": analysis_files
61 | }
62 | except Exception as e:
63 | logging.error(f"Error getting analysis state: {str(e)}")
64 | return {
65 | "success": False,
66 | "error": str(e),
67 | "workspace_path": self.workspace
68 | }
69 |
70 | def check_prerequisites(self) -> Dict[str, Any]:
71 | """
72 | Check if prerequisites for analysis are met
73 |
74 | Returns:
75 | Dictionary with prerequisite check information
76 | """
77 | # Check GROMACS installation
78 | gromacs_result = self.run_shell_command(f"{self.gmx_bin} --version", capture_output=True)
79 | gromacs_installed = gromacs_result["success"]
80 |
81 | # Check DSSP installation (optional)
82 | dssp_installed = check_command_exists("dssp") or check_command_exists("mkdssp")
83 |
84 | # Check for required files
85 | required_files = ["md.xtc", "md.tpr", "md.edr"]
86 | missing_files = [file for file in required_files if not os.path.exists(os.path.join(self.workspace, file))]
87 |
88 | if missing_files:
89 | return {
90 | "success": False,
91 | "installed": {
92 | "gromacs": gromacs_installed,
93 | "dssp": dssp_installed
94 | },
95 | "missing_files": missing_files,
96 | "error": f"Missing required files: {', '.join(missing_files)}"
97 | }
98 |
99 | # Set file paths if all required files exist
100 | self.production_file = "md.gro"
101 | self.trajectory_file = "md.xtc"
102 | self.topology_file = "topol.top"
103 | self.energy_file = "md.edr"
104 |
105 | return {
106 | "success": True,
107 | "installed": {
108 | "gromacs": gromacs_installed,
109 | "dssp": dssp_installed
110 | }
111 | }
112 |
113 | def clean_trajectory(self) -> Dict[str, Any]:
114 | """
115 | Clean the trajectory file by removing PBC effects and centering
116 |
117 | Returns:
118 | Dictionary with result information
119 | """
120 | # Create clean trajectory
121 | cmd = f"echo 'Protein System' | {self.gmx_bin} trjconv -s md.tpr -f md.xtc -o analysis/clean_full.xtc -pbc nojump -ur compact -center"
122 | result = self.run_shell_command(cmd)
123 |
124 | if not result["success"]:
125 | return {
126 | "success": False,
127 | "error": f"Failed to clean trajectory: {result['stderr']}"
128 | }
129 |
130 | # Create no-water trajectory
131 | cmd = f"echo 'Protein non-Water' |{self.gmx_bin} trjconv -s md.tpr -f analysis/clean_full.xtc -o analysis/clean_nowat.xtc -fit rot+trans"
132 | result = self.run_shell_command(cmd)
133 |
134 | if not result["success"]:
135 | return {
136 | "success": False,
137 | "error": f"Failed to create no-water trajectory: {result['stderr']}"
138 | }
139 |
140 | # Extract last frame as PDB
141 | cmd = f"echo 'Protein Protein' |{self.gmx_bin} trjconv -s md.tpr -f analysis/clean_nowat.xtc -o analysis/protein_lastframe.pdb -pbc nojump -ur compact -center -dump 9999999999999999"
142 | result = self.run_shell_command(cmd)
143 |
144 | if not result["success"]:
145 | return {
146 | "success": False,
147 | "error": f"Failed to extract last frame: {result['stderr']}"
148 | }
149 |
150 | return {
151 | "success": True,
152 | "clean_trajectory": "analysis/clean_full.xtc",
153 | "nowat_trajectory": "analysis/clean_nowat.xtc",
154 | "last_frame": "analysis/protein_lastframe.pdb"
155 | }
156 |
157 | def analyze_rmsd(self, selection: str = "Backbone", reference: str = "Backbone") -> Dict[str, Any]:
158 | """
159 | Perform RMSD analysis
160 |
161 | Args:
162 | selection: Selection to analyze
163 | reference: Reference selection for fitting
164 |
165 | Returns:
166 | Dictionary with result information
167 | """
168 | output_file = f"analysis/rmsd_{selection.lower()}.xvg"
169 |
170 | cmd = f"echo '{reference} {selection}' |{self.gmx_bin} rms -s md.tpr -f analysis/clean_nowat.xtc -o {output_file} -tu ns"
171 | result = self.run_shell_command(cmd)
172 |
173 | if not result["success"]:
174 | return {
175 | "success": False,
176 | "error": f"RMSD analysis failed: {result['stderr']}"
177 | }
178 |
179 | return {
180 | "success": True,
181 | "output_file": output_file,
182 | "analysis_type": "RMSD",
183 | "selection": selection,
184 | "reference": reference
185 | }
186 |
187 | def analyze_rmsf(self, selection: str = "Backbone") -> Dict[str, Any]:
188 | """
189 | Perform RMSF analysis
190 |
191 | Args:
192 | selection: Selection to analyze
193 |
194 | Returns:
195 | Dictionary with result information
196 | """
197 | output_file = f"analysis/rmsf_{selection.lower()}.xvg"
198 |
199 | cmd = f"echo '{selection}' |{self.gmx_bin} rmsf -s md.tpr -f analysis/clean_nowat.xtc -o {output_file} -res"
200 | result = self.run_shell_command(cmd)
201 |
202 | if not result["success"]:
203 | return {
204 | "success": False,
205 | "error": f"RMSF analysis failed: {result['stderr']}"
206 | }
207 |
208 | return {
209 | "success": True,
210 | "output_file": output_file,
211 | "analysis_type": "RMSF",
212 | "selection": selection
213 | }
214 |
215 | def analyze_gyration(self, selection: str = "Protein") -> Dict[str, Any]:
216 | """
217 | Perform radius of gyration analysis
218 |
219 | Args:
220 | selection: Selection to analyze
221 |
222 | Returns:
223 | Dictionary with result information
224 | """
225 | output_file = f"analysis/gyrate_{selection.lower()}.xvg"
226 |
227 | cmd = f"echo '{selection}' |{self.gmx_bin} gyrate -s md.tpr -f analysis/clean_nowat.xtc -o {output_file}"
228 | result = self.run_shell_command(cmd)
229 |
230 | if not result["success"]:
231 | return {
232 | "success": False,
233 | "error": f"Radius of gyration analysis failed: {result['stderr']}"
234 | }
235 |
236 | return {
237 | "success": True,
238 | "output_file": output_file,
239 | "analysis_type": "Radius of Gyration",
240 | "selection": selection
241 | }
242 |
243 | def analyze_hydrogen_bonds(self, selection1: str = "Protein", selection2: str = "Protein") -> Dict[str, Any]:
244 | """
245 | Perform hydrogen bond analysis
246 |
247 | Args:
248 | selection1: First selection
249 | selection2: Second selection
250 |
251 | Returns:
252 | Dictionary with result information
253 | """
254 | output_file = f"analysis/hbnum_{selection1.lower()}_{selection2.lower()}.xvg"
255 |
256 | cmd = f"echo -e '{selection1}\\n{selection2}' |{self.gmx_bin} hbond -s md.tpr -f analysis/clean_nowat.xtc -num {output_file}"
257 | result = self.run_shell_command(cmd)
258 |
259 | if not result["success"]:
260 | return {
261 | "success": False,
262 | "error": f"Hydrogen bond analysis failed: {result['stderr']}"
263 | }
264 |
265 | return {
266 | "success": True,
267 | "output_file": output_file,
268 | "analysis_type": "Hydrogen Bonds",
269 | "selection1": selection1,
270 | "selection2": selection2
271 | }
272 |
273 | def analyze_secondary_structure(self) -> Dict[str, Any]:
274 | """
275 | Perform secondary structure analysis using DSSP
276 |
277 | Returns:
278 | Dictionary with result information
279 | """
280 | # Check if DSSP is installed
281 | dssp_executable = None
282 | if check_command_exists("dssp"):
283 | dssp_executable = "dssp"
284 | elif check_command_exists("mkdssp"):
285 | dssp_executable = "mkdssp"
286 |
287 | if not dssp_executable:
288 | return {
289 | "success": False,
290 | "error": "DSSP is not installed. Please install DSSP or mkdssp."
291 | }
292 |
293 | # Set environment variable for GROMACS to find DSSP
294 | os.environ["DSSP"] = dssp_executable
295 |
296 | cmd = f"echo 'Protein' |{self.gmx_bin} do_dssp -s md.tpr -f analysis/clean_nowat.xtc -o analysis/ss.xpm -ver 3 -tu ns -dt 0.05"
297 | result = self.run_shell_command(cmd)
298 |
299 | if not result["success"]:
300 | return {
301 | "success": False,
302 | "error": f"Secondary structure analysis failed: {result['stderr']}"
303 | }
304 |
305 | # Convert XPM to PS for better visualization
306 | cmd = f"{self.gmx_bin} xpm2ps -f analysis/ss.xpm -o analysis/ss.ps -by 10 -bx 3"
307 | ps_result = self.run_shell_command(cmd)
308 |
309 | return {
310 | "success": True,
311 | "output_file": "analysis/ss.xpm",
312 | "ps_file": "analysis/ss.ps" if ps_result["success"] else None,
313 | "analysis_type": "Secondary Structure"
314 | }
315 |
316 | def analyze_energy(self, terms: List[str] = ["Potential", "Temperature", "Pressure"]) -> Dict[str, Any]:
317 | """
318 | Perform energy analysis
319 |
320 | Args:
321 | terms: Energy terms to analyze
322 |
323 | Returns:
324 | Dictionary with result information
325 | """
326 | results = {}
327 |
328 | for term in terms:
329 | # Map energy term to its typical number in GROMACS
330 | term_map = {
331 | "Potential": "10",
332 | "Kinetic": "11",
333 | "Total": "12",
334 | "Temperature": "16",
335 | "Pressure": "17",
336 | "Volume": "22"
337 | }
338 |
339 | if term not in term_map:
340 | results[term] = {
341 | "success": False,
342 | "error": f"Unknown energy term: {term}"
343 | }
344 | continue
345 |
346 | output_file = f"analysis/energy_{term.lower()}.xvg"
347 |
348 | cmd = f"echo '{term_map[term]} 0' |{self.gmx_bin} energy -f md.edr -o {output_file}"
349 | result = self.run_shell_command(cmd)
350 |
351 | if not result["success"]:
352 | results[term] = {
353 | "success": False,
354 | "error": f"Energy analysis for {term} failed: {result['stderr']}"
355 | }
356 | else:
357 | results[term] = {
358 | "success": True,
359 | "output_file": output_file,
360 | "analysis_type": "Energy",
361 | "term": term
362 | }
363 |
364 | return {
365 | "success": all(results[term]["success"] for term in terms),
366 | "results": results
367 | }
368 |
369 | def analyze_ligand_rmsd(self) -> Dict[str, Any]:
370 | """
371 | Perform RMSD analysis focused on the ligand
372 |
373 | Returns:
374 | Dictionary with result information
375 | """
376 | if not self.has_ligand:
377 | return {
378 | "success": False,
379 | "error": "No ligand in the system"
380 | }
381 |
382 | output_file = "analysis/ligand_rmsd.xvg"
383 |
384 | cmd = f"echo 'LIG LIG' |{self.gmx_bin} rms -s md.tpr -f analysis/clean_nowat.xtc -o analysis/ligand_rmsd.xvg -tu ns"
385 | result = self.run_shell_command(cmd)
386 |
387 | if not result["success"]:
388 | return {
389 | "success": False,
390 | "error": f"Ligand RMSD analysis failed: {result['stderr']}"
391 | }
392 |
393 | return {
394 | "success": True,
395 | "output_file": output_file,
396 | "analysis_type": "Ligand RMSD"
397 | }
398 |
399 | def analyze_protein_ligand_contacts(self) -> Dict[str, Any]:
400 | """
401 | Analyze contacts between protein and ligand
402 |
403 | Returns:
404 | Dictionary with result information
405 | """
406 | if not self.has_ligand:
407 | return {
408 | "success": False,
409 | "error": "No ligand in the system"
410 | }
411 |
412 | output_file = "analysis/protein_ligand_mindist.xvg"
413 |
414 | cmd = f"echo -e 'Protein\\nLIG' |{self.gmx_bin} mindist -s md.tpr -f analysis/clean_nowat.xtc -od analysis/protein_ligand_mindist.xvg -tu ns"
415 | result = self.run_shell_command(cmd)
416 |
417 | if not result["success"]:
418 | return {
419 | "success": False,
420 | "error": f"Protein-ligand contacts analysis failed: {result['stderr']}"
421 | }
422 |
423 | return {
424 | "success": True,
425 | "output_file": output_file,
426 | "analysis_type": "Protein-Ligand Minimum Distance"
427 | }
428 |
429 | def generate_analysis_report(self) -> Dict[str, Any]:
430 | """
431 | Generate a comprehensive analysis report
432 |
433 | Returns:
434 | Dictionary with result information
435 | """
436 | # Create analysis directory if it doesn't exist
437 | if not os.path.exists(self.analysis_dir):
438 | os.makedirs(self.analysis_dir)
439 |
440 | # Clean trajectories
441 | clean_result = self.clean_trajectory()
442 | if not clean_result["success"]:
443 | return clean_result
444 |
445 | # Perform various analyses
446 | analyses = [
447 | self.analyze_rmsd(selection="Backbone", reference="Backbone"),
448 | self.analyze_rmsd(selection="Protein", reference="Backbone"),
449 | self.analyze_rmsf(selection="C-alpha"),
450 | self.analyze_gyration(selection="Protein"),
451 | self.analyze_energy(terms=["Potential", "Temperature", "Pressure"]),
452 | self.analyze_hydrogen_bonds(selection1="Protein", selection2="Protein")
453 | ]
454 |
455 | # Add ligand-specific analyses if applicable
456 | if self.has_ligand:
457 | analyses.extend([
458 | self.analyze_ligand_rmsd(),
459 | self.analyze_protein_ligand_contacts()
460 | ])
461 |
462 | # Try to do secondary structure analysis if DSSP is available
463 | if check_command_exists("dssp") or check_command_exists("mkdssp"):
464 | analyses.append(self.analyze_secondary_structure())
465 |
466 | # Count successful analyses
467 | successful_analyses = sum(1 for analysis in analyses if analysis["success"])
468 |
469 | return {
470 | "success": successful_analyses > 0,
471 | "total_analyses": len(analyses),
472 | "successful_analyses": successful_analyses,
473 | "analyses": analyses,
474 | "report_directory": self.analysis_dir
475 | }
--------------------------------------------------------------------------------
/gromacs_copilot/protocols/base.py:
--------------------------------------------------------------------------------
1 | """
2 | Base protocol class for GROMACS Copilot
3 | """
4 |
5 | import os
6 | import logging
7 | from abc import ABC, abstractmethod
8 | from typing import Dict, Any, Optional
9 |
10 | from gromacs_copilot.utils.shell import run_shell_command
11 | from gromacs_copilot.core.enums import SimulationStage
12 |
13 |
14 | class BaseProtocol(ABC):
15 | """Base class for simulation protocols"""
16 |
17 | def __init__(self, workspace: str = "./md_workspace", gmx_bin: str = "gmx"):
18 | """
19 | Initialize the base protocol
20 |
21 | Args:
22 | workspace: Directory to use as the working directory
23 | """
24 | self.workspace = os.path.abspath(workspace)
25 | self.stage = SimulationStage.SETUP
26 |
27 | # Create workspace if it doesn't exist
28 | if not os.path.exists(self.workspace):
29 | os.makedirs(self.workspace)
30 |
31 | # Change to workspace directory
32 | os.chdir(self.workspace)
33 | self.gmx_bin = gmx_bin
34 |
35 | logging.info(f"Protocol initialized with workspace: {self.workspace}")
36 |
37 |
38 | def check_gromacs_installation(self) -> Dict[str, Any]:
39 | """
40 | Check if GROMACS is installed and available
41 |
42 | Returns:
43 | Dictionary with GROMACS installation information
44 | """
45 | result = self.run_shell_command(f"{self.gmx_bin} --version", capture_output=True)
46 |
47 | if result["success"]:
48 | version_info = result["stdout"].strip()
49 | return {
50 | "success": True,
51 | "installed": True,
52 | "version": version_info
53 | }
54 | else:
55 | return {
56 | "success": False,
57 | "installed": False,
58 | "error": "GROMACS is not installed or not in PATH"
59 | }
60 |
61 | def run_shell_command(self, command: str, capture_output: bool = True,
62 | suppress_output: bool = False) -> Dict[str, Any]:
63 | """
64 | Run a shell command
65 |
66 | Args:
67 | command: Shell command to run
68 | capture_output: Whether to capture stdout/stderr
69 | suppress_output: Whether to suppress terminal output
70 |
71 | Returns:
72 | Dictionary with command result information
73 | """
74 | return run_shell_command(command, capture_output, suppress_output)
75 |
76 | @abstractmethod
77 | def get_state(self) -> Dict[str, Any]:
78 | """
79 | Get the current state of the protocol
80 |
81 | Returns:
82 | Dictionary with protocol state information
83 | """
84 | pass
85 |
86 | @abstractmethod
87 | def check_prerequisites(self) -> Dict[str, Any]:
88 | """
89 | Check if prerequisites for the protocol are met
90 |
91 | Returns:
92 | Dictionary with prerequisite check information
93 | """
94 | pass
95 |
96 | def create_mdp_file(self, mdp_type: str, params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
97 | """
98 | Create an MDP parameter file for GROMACS
99 |
100 | Args:
101 | mdp_type: Type of MDP file (ions, em, nvt, npt, md)
102 | params: Optional override parameters
103 |
104 | Returns:
105 | Dictionary with result information
106 | """
107 | from gromacs_copilot.config import DEFAULT_MDP_PARAMS, MDP_TYPES
108 |
109 | if mdp_type not in MDP_TYPES:
110 | return {
111 | "success": False,
112 | "error": f"Unknown MDP type: {mdp_type}. Available types: {MDP_TYPES}"
113 | }
114 |
115 | # Start with default parameters for the specified type
116 | mdp_params = DEFAULT_MDP_PARAMS[mdp_type].copy()
117 |
118 | # Override with user-provided parameters if any
119 | if params:
120 | mdp_params.update(params)
121 |
122 | # Create MDP file content
123 | mdp_content = f"; {mdp_type}.mdp - Generated by GROMACS Copilot\n"
124 | for key, value in mdp_params.items():
125 | mdp_content += f"{key:<20} = {value}\n"
126 |
127 | # Write MDP file
128 | file_path = f"{mdp_type}.mdp"
129 | try:
130 | with open(file_path, "w") as f:
131 | f.write(mdp_content)
132 | except Exception as e:
133 | return {
134 | "success": False,
135 | "error": f"Failed to write MDP file: {str(e)}"
136 | }
137 |
138 | return {
139 | "success": True,
140 | "file_path": file_path,
141 | "mdp_type": mdp_type,
142 | "params": mdp_params
143 | }
144 |
145 | def set_simulation_stage(self, stage: str) -> Dict[str, Any]:
146 | """
147 | Set the current simulation stage
148 |
149 | Args:
150 | stage: Name of the stage to set
151 |
152 | Returns:
153 | Dictionary with result information
154 | """
155 | try:
156 | self.stage = SimulationStage[stage]
157 | return {
158 | "success": True,
159 | "stage": self.stage.name,
160 | "previous_stage": self.stage.name
161 | }
162 | except KeyError:
163 | return {
164 | "success": False,
165 | "error": f"Unknown stage: {stage}. Available stages: {[s.name for s in SimulationStage]}"
166 | }
--------------------------------------------------------------------------------
/gromacs_copilot/protocols/mmpbsa.py:
--------------------------------------------------------------------------------
1 | """
2 | MM-PBSA/GBSA binding free energy calculation protocol for GROMACS Copilot
3 | """
4 |
5 | import os
6 | import logging
7 | from typing import Dict, Any, Optional, List
8 |
9 | from gromacs_copilot.protocols.base import BaseProtocol
10 | from gromacs_copilot.utils.shell import check_command_exists, run_shell_command
11 |
12 |
13 | class MMPBSAProtocol(BaseProtocol):
14 | """Protocol for MM-PBSA/GBSA binding free energy calculations"""
15 |
16 | def __init__(self, workspace: str = "./md_workspace", gmx_bin: str = "gmx"):
17 | """
18 | Initialize the MM-PBSA protocol
19 |
20 | Args:
21 | workspace: Directory to use as the working directory
22 | """
23 | super().__init__(workspace)
24 |
25 | # Initialize MM-PBSA specific attributes
26 | self.trajectory_file = None
27 | self.topology_file = None
28 | self.index_file = None
29 | self.protein_group = None
30 | self.ligand_group = None
31 | self.complex_group = None
32 | self.mmpbsa_dir = os.path.join(workspace, "mmpbsa")
33 | self.gmx_bin = gmx_bin
34 |
35 | # Create MM-PBSA directory if it doesn't exist
36 | if not os.path.exists(self.mmpbsa_dir):
37 | os.makedirs(self.mmpbsa_dir)
38 |
39 | logging.info(f"MM-PBSA protocol initialized with workspace: {self.workspace}")
40 |
41 | def get_state(self) -> Dict[str, Any]:
42 | """
43 | Get the current state of the protocol
44 |
45 | Returns:
46 | Dictionary with protocol state information
47 | """
48 | try:
49 | mmpbsa_files = []
50 | if os.path.exists(self.mmpbsa_dir):
51 | mmpbsa_files = os.listdir(self.mmpbsa_dir)
52 |
53 | return {
54 | "success": True,
55 | "workspace_path": self.workspace,
56 | "mmpbsa_directory": self.mmpbsa_dir,
57 | "trajectory_file": self.trajectory_file,
58 | "topology_file": self.topology_file,
59 | "index_file": self.index_file,
60 | "protein_group": self.protein_group,
61 | "ligand_group": self.ligand_group,
62 | "complex_group": self.complex_group,
63 | "mmpbsa_files": mmpbsa_files
64 | }
65 | except Exception as e:
66 | logging.error(f"Error getting MM-PBSA state: {str(e)}")
67 | return {
68 | "success": False,
69 | "error": str(e),
70 | "workspace_path": self.workspace
71 | }
72 |
73 | def check_mmpbsa_prerequisites(self) -> Dict[str, Any]:
74 | """
75 | Check if prerequisites for MM-PBSA analysis are met
76 |
77 | Returns:
78 | Dictionary with prerequisite check information
79 | """
80 | # Check GROMACS installation
81 | gromacs_result = run_shell_command(f"{self.gmx_bin} --version", capture_output=True)
82 | gromacs_installed = gromacs_result["success"]
83 |
84 | # Check gmx_MMPBSA installation
85 | gmx_mmpbsa_installed = check_command_exists("gmx_MMPBSA")
86 |
87 | # Check for required files
88 | required_files = ["md.tpr", "md.xtc"]
89 | missing_files = [file for file in required_files if not os.path.exists(os.path.join(self.workspace, file))]
90 |
91 | if missing_files:
92 | return {
93 | "success": False,
94 | "installed": {
95 | "gromacs": gromacs_installed,
96 | "gmx_mmpbsa": gmx_mmpbsa_installed
97 | },
98 | "missing_files": missing_files,
99 | "error": f"Missing required files: {', '.join(missing_files)}"
100 | }
101 |
102 | # Set file paths if all required files exist
103 | self.trajectory_file = "md.xtc"
104 | self.topology_file = "md.tpr"
105 |
106 | return {
107 | "success": True,
108 | "installed": {
109 | "gromacs": gromacs_installed,
110 | "gmx_mmpbsa": gmx_mmpbsa_installed
111 | }
112 | }
113 |
114 | def create_mmpbsa_index_file(self, protein_selection: str = "Protein",
115 | ligand_selection: str = "LIG") -> Dict[str, Any]:
116 | """
117 | Create index file for MM-PBSA analysis
118 |
119 | Args:
120 | protein_selection: Selection for protein group
121 | ligand_selection: Selection for ligand group
122 |
123 | Returns:
124 | Dictionary with result information
125 | """
126 | if not os.path.exists(os.path.join(self.workspace, "md.tpr")):
127 | return {
128 | "success": False,
129 | "error": "Topology file not found"
130 | }
131 |
132 | # Create index file with protein and ligand groups
133 | cmd = f"""echo -e "name {protein_selection}\\nname {ligand_selection}\\n\\nq" | gmx make_ndx -f md.tpr -o mmpbsa/mmpbsa.ndx"""
134 | result = self.run_shell_command(cmd)
135 |
136 | if not result["success"]:
137 | return {
138 | "success": False,
139 | "error": f"Failed to create index file: {result['stderr']}"
140 | }
141 |
142 | # Get group numbers from the index file
143 | groups_cmd = "grep '\\[' mmpbsa/mmpbsa.ndx | grep -n '\\[' | awk '{print $1, $2, $3}'"
144 | groups_result = self.run_shell_command(groups_cmd)
145 |
146 | if not groups_result["success"]:
147 | return {
148 | "success": False,
149 | "error": f"Failed to extract group numbers: {groups_result['stderr']}"
150 | }
151 |
152 | # Parse the group numbers from output
153 | try:
154 | lines = groups_result["stdout"].strip().split('\n')
155 | group_dict = {}
156 |
157 | for line in lines:
158 | if ':' in line:
159 | parts = line.split(':')
160 | if len(parts) >= 2:
161 | group_num = int(parts[0]) - 1 # Adjust for 0-based indexing
162 | group_name = parts[1].strip()
163 | group_dict[group_name] = group_num
164 |
165 | # Find protein and ligand groups
166 | # protein_group = None
167 | # ligand_group = None
168 | # complex_group = None
169 |
170 | # for group_name, group_num in group_dict.items():
171 | # if protein_selection in group_name:
172 | # protein_group = group_num
173 | # if ligand_selection in group_name:
174 | # ligand_group = group_num
175 | # if f"{protein_selection} | {ligand_selection}" in group_name:
176 | # complex_group = group_num
177 |
178 | # if protein_group is None or ligand_group is None or complex_group is None:
179 | # return {
180 | # "success": False,
181 | # "error": f"Could not identify protein, ligand, or complex groups in index file"
182 | # }
183 |
184 | # self.index_file = "mmpbsa/mmpbsa.ndx"
185 | # self.protein_group = protein_group
186 | # self.ligand_group = ligand_group
187 | # self.complex_group = complex_group
188 | group_dict["success"] = True
189 | return group_dict
190 |
191 | except Exception as e:
192 | return {
193 | "success": False,
194 | "error": f"Error parsing group numbers: {str(e)}"
195 | }
196 |
197 | def create_mmpbsa_input(self, method: str = "pb",
198 | startframe: int = 1,
199 | endframe: int = 1000,
200 | interval: int = 10,
201 | ionic_strength: float = 0.15,
202 | with_entropy: bool = False) -> Dict[str, Any]:
203 | """
204 | Create input file for MM-PBSA/GBSA calculation
205 |
206 | Args:
207 | method: Method to use (pb or gb)
208 | startframe: First frame to analyze
209 | endframe: Last frame to analyze
210 | interval: Interval between frames
211 | ionic_strength: Ionic strength for PB calculation
212 | with_entropy: Whether to include entropy calculation
213 |
214 | Returns:
215 | Dictionary with result information
216 | """
217 | try:
218 | mmpbsa_input = "&general\n"
219 | mmpbsa_input += f" sys_name = Protein_Ligand\n"
220 | mmpbsa_input += f" startframe = {startframe}\n"
221 | mmpbsa_input += f" endframe = {endframe}\n"
222 | mmpbsa_input += f" interval = {interval}\n"
223 |
224 | if with_entropy:
225 | mmpbsa_input += " entropy = 1\n"
226 | mmpbsa_input += " entropy_seg = 25\n" # Number of frames for entropy calculation
227 |
228 | mmpbsa_input += "/\n\n"
229 |
230 | if method.lower() == "pb":
231 | mmpbsa_input += "&pb\n"
232 | mmpbsa_input += f" istrng = {ionic_strength}\n"
233 | mmpbsa_input += " fillratio = 4.0\n"
234 | mmpbsa_input += " inp = 2\n"
235 | mmpbsa_input += " radiopt = 0\n"
236 | mmpbsa_input += "/\n"
237 | elif method.lower() == "gb":
238 | mmpbsa_input += "&gb\n"
239 | mmpbsa_input += f" saltcon = {ionic_strength}\n"
240 | mmpbsa_input += " igb = 5\n" # GB model (5 = OBC2)
241 | mmpbsa_input += "/\n"
242 |
243 | input_file_path = os.path.join(self.mmpbsa_dir, "mmpbsa.in")
244 | with open(input_file_path, "w") as f:
245 | f.write(mmpbsa_input)
246 |
247 | return {
248 | "success": True,
249 | "input_file": input_file_path,
250 | "method": method,
251 | "startframe": startframe,
252 | "endframe": endframe,
253 | "interval": interval,
254 | "with_entropy": with_entropy
255 | }
256 |
257 | except Exception as e:
258 | return {
259 | "success": False,
260 | "error": f"Error creating MM-PBSA input file: {str(e)}"
261 | }
262 |
263 | def run_mmpbsa_calculation(self,
264 | ligand_mol_file: str,
265 | index_file: str,
266 | topology_file: str,
267 | protein_group: str,
268 | ligand_group: str,
269 | trajectory_file: str,
270 | overwrite: bool = True,
271 | verbose: bool = True) -> Dict[str, Any]:
272 | """
273 | Run MM-PBSA/GBSA calculation
274 |
275 | Args:
276 | overwrite: Whether to overwrite existing output files
277 | verbose: Whether to print verbose output
278 |
279 | Returns:
280 | Dictionary with result information
281 | """
282 | if not index_file or not os.path.exists(os.path.join(self.workspace, index_file)):
283 | return {
284 | "success": False,
285 | "error": "Index file not found"
286 | }
287 |
288 | input_file = os.path.join(self.mmpbsa_dir, "mmpbsa.in")
289 | if not os.path.exists(input_file):
290 | return {
291 | "success": False,
292 | "error": "MM-PBSA input file not found. Run create_mmpbsa_input() first."
293 | }
294 |
295 | # Run gmx_MMPBSA
296 | overwrite_flag = "-O" if overwrite else ""
297 | # verbose_flag = "--verbose" if verbose else ""
298 |
299 | cmd = f"cd {self.workspace} && gmx_MMPBSA {overwrite_flag} -i {input_file} -cs {topology_file} -ci {index_file} -cg {protein_group} {ligand_group} -ct {trajectory_file} -lm {ligand_mol_file} -o {self.mmpbsa_dir}/FINAL_RESULTS_MMPBSA.dat -nogui"
300 |
301 | result = self.run_shell_command(cmd)
302 |
303 | if not result["success"]:
304 | return {
305 | "success": False,
306 | "error": f"MM-PBSA calculation failed: {result['stderr']}"
307 | }
308 |
309 | # Check if output file exists
310 | final_results = os.path.join(self.mmpbsa_dir, "FINAL_RESULTS_MMPBSA.dat")
311 | if not os.path.exists(final_results):
312 | return {
313 | "success": False,
314 | "error": "MM-PBSA calculation did not produce expected output file"
315 | }
316 |
317 | return {
318 | "success": True,
319 | "results_file": final_results,
320 | "output_dir": self.mmpbsa_dir
321 | }
322 |
323 | def check_prerequisites(self):
324 | pass
325 |
326 | def parse_mmpbsa_results(self) -> Dict[str, Any]:
327 | """
328 | Parse MM-PBSA/GBSA results
329 |
330 | Returns:
331 | Dictionary with parsed results
332 | """
333 | final_results = os.path.join(self.mmpbsa_dir, "results_FINAL_RESULTS_MMPBSA.dat")
334 | if not os.path.exists(final_results):
335 | return {
336 | "success": False,
337 | "error": "MM-PBSA results file not found"
338 | }
339 |
340 | try:
341 | # Read results file
342 | with open(final_results, "r") as f:
343 | lines = f.readlines()
344 |
345 | # Parse results
346 | results = {}
347 | data_block = False
348 |
349 | for line in lines:
350 | line = line.strip()
351 |
352 | # Skip empty lines and headers
353 | if not line or line.startswith("***") or line.startswith("==="):
354 | continue
355 |
356 | # Start data block
357 | if line.startswith("DELTA TOTAL"):
358 | data_block = True
359 | continue
360 |
361 | if data_block and ":" in line:
362 | parts = line.split(":")
363 | if len(parts) >= 2:
364 | key = parts[0].strip()
365 | value_parts = parts[1].strip().split()
366 |
367 | if len(value_parts) >= 3:
368 | mean = float(value_parts[0])
369 | std = float(value_parts[1])
370 | std_err = float(value_parts[2])
371 |
372 | results[key] = {
373 | "mean": mean,
374 | "std": std,
375 | "std_err": std_err
376 | }
377 |
378 | # Extract binding energy components
379 | binding_energy = results.get("DELTA TOTAL", {}).get("mean", 0)
380 | van_der_waals = results.get("VDWAALS", {}).get("mean", 0)
381 | electrostatic = results.get("EEL", {}).get("mean", 0)
382 | polar_solvation = results.get("EGB/EPB", {}).get("mean", 0)
383 | non_polar_solvation = results.get("ESURF", {}).get("mean", 0)
384 |
385 | return {
386 | "success": True,
387 | "binding_energy": binding_energy,
388 | "components": {
389 | "van_der_waals": van_der_waals,
390 | "electrostatic": electrostatic,
391 | "polar_solvation": polar_solvation,
392 | "non_polar_solvation": non_polar_solvation
393 | },
394 | "detailed_results": results
395 | }
396 |
397 | except Exception as e:
398 | return {
399 | "success": False,
400 | "error": f"Error parsing MM-PBSA results: {str(e)}"
401 | }
402 |
403 | # def run_full_mmpbsa_analysis(self,
404 | # protein_selection: str = "Protein",
405 | # ligand_selection: str = "LIG",
406 | # method: str = "pb",
407 | # startframe: int = 1,
408 | # endframe: int = 1000,
409 | # interval: int = 10,
410 | # ionic_strength: float = 0.15,
411 | # with_entropy: bool = False) -> Dict[str, Any]:
412 | # """
413 | # Run full MM-PBSA/GBSA analysis workflow
414 |
415 | # Args:
416 | # protein_selection: Selection for protein group
417 | # ligand_selection: Selection for ligand group
418 | # method: Method to use (pb or gb)
419 | # startframe: First frame to analyze
420 | # endframe: Last frame to analyze
421 | # interval: Interval between frames
422 | # ionic_strength: Ionic strength for calculation
423 | # with_entropy: Whether to include entropy calculation
424 |
425 | # Returns:
426 | # Dictionary with result information
427 | # """
428 | # # Check prerequisites
429 | # prereq_result = self.check_prerequisites()
430 | # if not prereq_result["success"]:
431 | # return prereq_result
432 |
433 | # if not prereq_result["installed"]["gmx_mmpbsa"]:
434 | # return {
435 | # "success": False,
436 | # "error": "gmx_MMPBSA is not installed. Please install it with: conda install -c conda-forge gmx_mmpbsa"
437 | # }
438 |
439 | # # Create index file
440 | # index_result = self.create_index_file(protein_selection, ligand_selection)
441 | # if not index_result["success"]:
442 | # return index_result
443 |
444 | # # Create MM-PBSA input file
445 | # input_result = self.create_mmpbsa_input(
446 | # method=method,
447 | # startframe=startframe,
448 | # endframe=endframe,
449 | # interval=interval,
450 | # ionic_strength=ionic_strength,
451 | # with_entropy=with_entropy
452 | # )
453 | # if not input_result["success"]:
454 | # return input_result
455 |
456 | # # Run MM-PBSA calculation
457 | # calc_result = self.run_mmpbsa_calculation()
458 | # if not calc_result["success"]:
459 | # return calc_result
460 |
461 | # # Parse results
462 | # parse_result = self.parse_results()
463 | # if not parse_result["success"]:
464 | # return parse_result
465 |
466 | # return {
467 | # "success": True,
468 | # "binding_energy": parse_result["binding_energy"],
469 | # "components": parse_result["components"],
470 | # "detailed_results": parse_result["detailed_results"],
471 | # "results_file": calc_result["results_file"],
472 | # "method": method,
473 | # "with_entropy": with_entropy
474 | # }
--------------------------------------------------------------------------------
/gromacs_copilot/protocols/protein.py:
--------------------------------------------------------------------------------
1 | """
2 | Protein simulation protocol for GROMACS Copilot
3 | """
4 |
5 | import os
6 | import time
7 | import logging
8 | from typing import Dict, Any, Optional, List
9 |
10 | from gromacs_copilot.protocols.base import BaseProtocol
11 | from gromacs_copilot.core.enums import SimulationStage
12 | from gromacs_copilot.config import FORCE_FIELDS
13 |
14 |
15 | class ProteinProtocol(BaseProtocol):
16 | """Protocol for protein-only simulations"""
17 |
18 | def __init__(self, workspace: str = "./md_workspace", gmx_bin: str = "gmx"):
19 | """
20 | Initialize the protein simulation protocol
21 |
22 | Args:
23 | workspace: Directory to use as the working directory
24 | """
25 | super().__init__(workspace)
26 |
27 | # Initialize protein-specific attributes
28 | self.protein_file = None
29 | self.topology_file = None
30 | self.box_file = None
31 | self.solvated_file = None
32 | self.minimized_file = None
33 | self.equilibrated_file = None
34 | self.production_file = None
35 | self.gmx_bin = gmx_bin
36 |
37 | logging.info(f"Protein protocol initialized with workspace: {self.workspace}")
38 |
39 | def get_state(self) -> Dict[str, Any]:
40 | """
41 | Get the current state of the protocol
42 |
43 | Returns:
44 | Dictionary with protocol state information
45 | """
46 | try:
47 | files = os.listdir(self.workspace)
48 |
49 | # Get file sizes and modification times
50 | file_info = []
51 | for file in files:
52 | file_path = os.path.join(self.workspace, file)
53 | if os.path.isfile(file_path):
54 | stats = os.stat(file_path)
55 | file_info.append({
56 | "name": file,
57 | "size_bytes": stats.st_size,
58 | "modified": time.ctime(stats.st_mtime),
59 | "is_directory": False
60 | })
61 | elif os.path.isdir(file_path):
62 | file_info.append({
63 | "name": file,
64 | "is_directory": True,
65 | "modified": time.ctime(os.path.getmtime(file_path))
66 | })
67 |
68 | return {
69 | "success": True,
70 | "workspace_path": self.workspace,
71 | "current_stage": self.stage.name,
72 | "files": file_info,
73 | "protein_file": self.protein_file,
74 | "topology_file": self.topology_file,
75 | "box_file": self.box_file,
76 | "solvated_file": self.solvated_file,
77 | "minimized_file": self.minimized_file,
78 | "equilibrated_file": self.equilibrated_file,
79 | "production_file": self.production_file
80 | }
81 | except Exception as e:
82 | logging.error(f"Error getting protocol state: {str(e)}")
83 | return {
84 | "success": False,
85 | "error": str(e),
86 | "workspace_path": self.workspace,
87 | "current_stage": self.stage.name
88 | }
89 |
90 | def check_prerequisites(self) -> Dict[str, Any]:
91 | """
92 | Check if GROMACS is installed and available
93 |
94 | Returns:
95 | Dictionary with prerequisite check information
96 | """
97 | result = self.run_shell_command(f"{self.gmx_bin} --version", capture_output=True)
98 |
99 | if result["success"]:
100 | version_info = result["stdout"].strip()
101 | return {
102 | "success": True,
103 | "installed": True,
104 | "version": version_info
105 | }
106 | else:
107 | return {
108 | "success": False,
109 | "installed": False,
110 | "error": "GROMACS is not installed or not in PATH"
111 | }
112 |
113 | def set_protein_file(self, file_path: str) -> Dict[str, Any]:
114 | """
115 | Set and prepare the protein file for simulation
116 |
117 | Args:
118 | file_path: Path to the protein structure file (PDB or GRO)
119 |
120 | Returns:
121 | Dictionary with result information
122 | """
123 | if not os.path.exists(file_path):
124 | return {
125 | "success": False,
126 | "error": f"Protein file not found: {file_path}"
127 | }
128 |
129 | # Copy the protein file to the workspace if it's not already there
130 | basename = os.path.basename(file_path)
131 | self.protein_file = basename
132 |
133 | if os.path.abspath(file_path) != os.path.join(self.workspace, basename):
134 | copy_result = self.run_shell_command(f"cp {file_path} {self.workspace}/")
135 | if not copy_result["success"]:
136 | return {
137 | "success": False,
138 | "error": f"Failed to copy protein file to workspace: {copy_result['stderr']}"
139 | }
140 |
141 | # Create directories for topologies
142 | mkdir_result = self.run_shell_command("mkdir -p topologies")
143 |
144 | return {
145 | "success": True,
146 | "protein_file": self.protein_file,
147 | "file_path": os.path.join(self.workspace, self.protein_file)
148 | }
149 |
150 | def generate_topology(self, force_field: str, water_model: str = "spc") -> Dict[str, Any]:
151 | """
152 | Generate topology for the protein
153 |
154 | Args:
155 | force_field: Name of the force field to use
156 | water_model: Water model to use
157 |
158 | Returns:
159 | Dictionary with result information
160 | """
161 | if not self.protein_file:
162 | return {
163 | "success": False,
164 | "error": "No protein file has been set"
165 | }
166 |
167 | # Map user-friendly force field names to GROMACS internal names
168 | if force_field not in FORCE_FIELDS:
169 | return {
170 | "success": False,
171 | "error": f"Unknown force field: {force_field}. Available options: {list(FORCE_FIELDS.keys())}"
172 | }
173 |
174 | ff_name = FORCE_FIELDS[force_field]
175 |
176 | # Generate topology
177 | cmd = f"{self.gmx_bin} pdb2gmx -f {self.protein_file} -o protein.gro -p topology.top -i posre.itp -ff {ff_name} -water {water_model}"
178 | result = self.run_shell_command(cmd)
179 |
180 | if not result["success"]:
181 | return {
182 | "success": False,
183 | "error": f"Failed to generate topology: {result['stderr']}"
184 | }
185 |
186 | self.topology_file = "topology.top"
187 | self.box_file = "protein.gro"
188 |
189 | return {
190 | "success": True,
191 | "topology_file": self.topology_file,
192 | "box_file": self.box_file,
193 | "force_field": force_field,
194 | "water_model": water_model
195 | }
196 |
197 | def define_simulation_box(self, distance: float = 1.0, box_type: str = "cubic") -> Dict[str, Any]:
198 | """
199 | Define the simulation box
200 |
201 | Args:
202 | distance: Minimum distance between protein and box edge (nm)
203 | box_type: Type of box (cubic, dodecahedron, octahedron)
204 |
205 | Returns:
206 | Dictionary with result information
207 | """
208 | if not self.box_file:
209 | return {
210 | "success": False,
211 | "error": "No protein structure file has been processed"
212 | }
213 |
214 | cmd = f"{self.gmx_bin} editconf -f {self.box_file} -o box.gro -c -d {distance} -bt {box_type}"
215 | result = self.run_shell_command(cmd)
216 |
217 | if not result["success"]:
218 | return {
219 | "success": False,
220 | "error": f"Failed to define simulation box: {result['stderr']}"
221 | }
222 |
223 | self.box_file = "box.gro"
224 |
225 | return {
226 | "success": True,
227 | "box_file": self.box_file,
228 | "distance": distance,
229 | "box_type": box_type
230 | }
231 |
232 | def solvate_system(self) -> Dict[str, Any]:
233 | """
234 | Solvate the protein in water
235 |
236 | Returns:
237 | Dictionary with result information
238 | """
239 | if not self.box_file or not self.topology_file:
240 | return {
241 | "success": False,
242 | "error": "Box file or topology file not defined"
243 | }
244 |
245 | cmd = f"{self.gmx_bin} solvate -cp {self.box_file} -cs spc216.gro -o solvated.gro -p {self.topology_file}"
246 | result = self.run_shell_command(cmd)
247 |
248 | if not result["success"]:
249 | return {
250 | "success": False,
251 | "error": f"Failed to solvate the protein: {result['stderr']}"
252 | }
253 |
254 | self.solvated_file = "solvated.gro"
255 |
256 | return {
257 | "success": True,
258 | "solvated_file": self.solvated_file
259 | }
260 |
261 | def add_ions(self, concentration: float = .15, neutral: bool = True) -> Dict[str, Any]:
262 | """
263 | Add ions to the solvated system
264 |
265 | Args:
266 | concentration: Salt concentration in M
267 | neutral: Whether to neutralize the system
268 |
269 | Returns:
270 | Dictionary with result information
271 | """
272 | if not self.solvated_file or not self.topology_file:
273 | return {
274 | "success": False,
275 | "error": "Solvated file or topology file not defined"
276 | }
277 |
278 | # Create ions.mdp file
279 | ions_mdp = self.create_mdp_file("ions")
280 | if not ions_mdp["success"]:
281 | return ions_mdp
282 |
283 | # Prepare for adding ions
284 | cmd = f"{self.gmx_bin} grompp -f ions.mdp -c {self.solvated_file} -p {self.topology_file} -o ions.tpr"
285 | result = self.run_shell_command(cmd)
286 |
287 | if not result["success"]:
288 | return {
289 | "success": False,
290 | "error": f"Failed to prepare for adding ions: {result['stderr']}"
291 | }
292 |
293 | # Add ions
294 | neutral_flag = "-neutral" if neutral else ""
295 | cmd = f"echo 'SOL' | {self.gmx_bin} genion -s ions.tpr -o solvated_ions.gro -p {self.topology_file} -pname NA -nname CL {neutral_flag} -conc {concentration}"
296 | result = self.run_shell_command(cmd)
297 |
298 | if not result["success"]:
299 | return {
300 | "success": False,
301 | "error": f"Failed to add ions: {result['stderr']}"
302 | }
303 |
304 | self.solvated_file = "solvated_ions.gro"
305 |
306 | return {
307 | "success": True,
308 | "solvated_file": self.solvated_file,
309 | "concentration": concentration,
310 | "neutral": neutral
311 | }
312 |
313 | def run_energy_minimization(self) -> Dict[str, Any]:
314 | """
315 | Run energy minimization
316 |
317 | Returns:
318 | Dictionary with result information
319 | """
320 | if not self.solvated_file or not self.topology_file:
321 | return {
322 | "success": False,
323 | "error": "Solvated file or topology file not defined"
324 | }
325 |
326 | # Create em.mdp file
327 | em_mdp = self.create_mdp_file("em")
328 | if not em_mdp["success"]:
329 | return em_mdp
330 |
331 | # Generate tpr file for minimization
332 | cmd = f"{self.gmx_bin} grompp -f em.mdp -c {self.solvated_file} -p {self.topology_file} -o em.tpr"
333 | result = self.run_shell_command(cmd)
334 |
335 | if not result["success"]:
336 | return {
337 | "success": False,
338 | "error": f"Failed to prepare energy minimization: {result['stderr']}"
339 | }
340 |
341 | # Run energy minimization
342 | cmd = f"{self.gmx_bin} mdrun -v -deffnm em"
343 | result = self.run_shell_command(cmd)
344 |
345 | if not result["success"]:
346 | # return {
347 | # "success": False,
348 | # "error": f"Energy minimization failed: {result['stderr']}"
349 | # }
350 | cmd = f"{self.gmx_bin} mdrun -ntmpi 1 -v -deffnm em"
351 | result = self.run_shell_command(cmd)
352 | if not result["success"]:
353 | return {
354 | "success": False,
355 | "error": f"Energy minimization failed: {result['stderr']}"
356 | }
357 |
358 | self.minimized_file = "em.gro"
359 |
360 | return {
361 | "success": True,
362 | "minimized_file": self.minimized_file,
363 | "log_file": "em.log",
364 | "energy_file": "em.edr"
365 | }
366 |
367 | def run_nvt_equilibration(self) -> Dict[str, Any]:
368 | """
369 | Run NVT equilibration
370 |
371 | Returns:
372 | Dictionary with result information
373 | """
374 | if not self.minimized_file or not self.topology_file:
375 | return {
376 | "success": False,
377 | "error": "Minimized file or topology file not defined"
378 | }
379 |
380 | # Create nvt.mdp file
381 | nvt_mdp = self.create_mdp_file("nvt")
382 | if not nvt_mdp["success"]:
383 | return nvt_mdp
384 |
385 | # Generate tpr file for NVT equilibration
386 | cmd = f"{self.gmx_bin} grompp -f nvt.mdp -c {self.minimized_file} -r {self.minimized_file} -p {self.topology_file} -o nvt.tpr"
387 | # print(f"Running command: {cmd}")
388 | result = self.run_shell_command(cmd)
389 |
390 | if not result["success"]:
391 | return {
392 | "success": False,
393 | "error": f"Failed to prepare NVT equilibration: {result['stderr']}"
394 | }
395 |
396 | # Run NVT equilibration
397 | cmd = f"{self.gmx_bin} mdrun -v -deffnm nvt"
398 | result = self.run_shell_command(cmd)
399 |
400 | if not result["success"]:
401 | # return {
402 | # "success": False,
403 | # "error": f"NVT equilibration failed: {result['stderr']}"
404 | # }
405 | cmd = f"{self.gmx_bin} mdrun -ntmpi 1 -v -deffnm nvt"
406 | result = self.run_shell_command(cmd)
407 | if not result["success"]:
408 | return {
409 | "success": False,
410 | "error": f"NVT equilibration failed: {result['stderr']}"
411 | }
412 |
413 | return {
414 | "success": True,
415 | "nvt_file": "nvt.gro",
416 | "nvt_checkpoint": "nvt.cpt",
417 | "log_file": "nvt.log",
418 | "energy_file": "nvt.edr"
419 | }
420 |
421 | def run_npt_equilibration(self) -> Dict[str, Any]:
422 | """
423 | Run NPT equilibration
424 |
425 | Returns:
426 | Dictionary with result information
427 | """
428 | # Create npt.mdp file
429 | npt_mdp = self.create_mdp_file("npt")
430 | if not npt_mdp["success"]:
431 | return npt_mdp
432 |
433 | # Generate tpr file for NPT equilibration
434 | cmd = f"{self.gmx_bin} grompp -f npt.mdp -c nvt.gro -r nvt.gro -t nvt.cpt -p {self.topology_file} -o npt.tpr"
435 | result = self.run_shell_command(cmd)
436 |
437 | if not result["success"]:
438 | return {
439 | "success": False,
440 | "error": f"Failed to prepare NPT equilibration: {result['stderr']}"
441 | }
442 |
443 | # Run NPT equilibration
444 | cmd = f"{self.gmx_bin} mdrun -v -deffnm npt"
445 | result = self.run_shell_command(cmd)
446 |
447 | if not result["success"]:
448 | # return {
449 | # "success": False,
450 | # "error": f"NPT equilibration failed: {result['stderr']}"
451 | # }
452 | cmd = f"{self.gmx_bin} mdrun -ntmpi 1 -v -deffnm npt"
453 | result = self.run_shell_command(cmd)
454 | if not result["success"]:
455 | return {
456 | "success": False,
457 | "error": f"NPT equilibration failed: {result['stderr']}"
458 | }
459 |
460 | self.equilibrated_file = "npt.gro"
461 |
462 | return {
463 | "success": True,
464 | "equilibrated_file": self.equilibrated_file,
465 | "npt_checkpoint": "npt.cpt",
466 | "log_file": "npt.log",
467 | "energy_file": "npt.edr"
468 | }
469 |
470 | def run_production_md(self, length_ns: float = 10.0) -> Dict[str, Any]:
471 | """
472 | Run production MD
473 |
474 | Args:
475 | length_ns: Length of the simulation in nanoseconds
476 |
477 | Returns:
478 | Dictionary with result information
479 | """
480 | if not self.equilibrated_file or not self.topology_file:
481 | return {
482 | "success": False,
483 | "error": "Equilibrated file or topology file not defined"
484 | }
485 |
486 | # Calculate number of steps (2 fs timestep)
487 | nsteps = int(length_ns * 1000000 / 2)
488 |
489 | # Create md.mdp file with custom steps
490 | md_mdp = self.create_mdp_file("md", {"nsteps": nsteps})
491 | if not md_mdp["success"]:
492 | return md_mdp
493 |
494 | # Generate tpr file for production MD
495 | cmd = f"{self.gmx_bin} grompp -f md.mdp -c {self.equilibrated_file} -t npt.cpt -p {self.topology_file} -o md.tpr"
496 | result = self.run_shell_command(cmd)
497 |
498 | if not result["success"]:
499 | return {
500 | "success": False,
501 | "error": f"Failed to prepare production MD: {result['stderr']}"
502 | }
503 |
504 | # Run production MD
505 | cmd = f"{self.gmx_bin} mdrun -v -deffnm md"
506 | result = self.run_shell_command(cmd)
507 |
508 | if not result["success"]:
509 | # return {
510 | # "success": False,
511 | # "error": f"Production MD failed: {result['stderr']}"
512 | # }
513 | cmd = f"{self.gmx_bin} mdrun -ntmpi 1 -v -deffnm md"
514 | result = self.run_shell_command(cmd)
515 | if not result["success"]:
516 | return {
517 | "success": False,
518 | "error": f"Production MD failed: {result['stderr']}"
519 | }
520 |
521 | self.production_file = "md.gro"
522 |
523 | return {
524 | "success": True,
525 | "production_file": self.production_file,
526 | "trajectory_file": "md.xtc",
527 | "log_file": "md.log",
528 | "energy_file": "md.edr",
529 | "length_ns": length_ns
530 | }
531 |
532 | def analyze_rmsd(self) -> Dict[str, Any]:
533 | """
534 | Perform RMSD analysis
535 |
536 | Returns:
537 | Dictionary with result information
538 | """
539 | # Create analysis directory if it doesn't exist
540 | mkdir_result = self.run_shell_command("mkdir -p analysis")
541 |
542 | cmd = f"echo 'Protein Protein' | {self.gmx_bin} rms -s md.tpr -f md.xtc -o analysis/rmsd.xvg -tu ns"
543 | result = self.run_shell_command(cmd)
544 |
545 | if not result["success"]:
546 | return {
547 | "success": False,
548 | "error": f"RMSD analysis failed: {result['stderr']}"
549 | }
550 |
551 | return {
552 | "success": True,
553 | "output_file": "analysis/rmsd.xvg",
554 | "analysis_type": "RMSD"
555 | }
556 |
557 | def analyze_rmsf(self) -> Dict[str, Any]:
558 | """
559 | Perform RMSF analysis
560 |
561 | Returns:
562 | Dictionary with result information
563 | """
564 | # Create analysis directory if it doesn't exist
565 | mkdir_result = self.run_shell_command("mkdir -p analysis")
566 |
567 | cmd = f"echo 'C-alpha' | {self.gmx_bin} rmsf -s md.tpr -f md.xtc -o analysis/rmsf.xvg -res"
568 | result = self.run_shell_command(cmd)
569 |
570 | if not result["success"]:
571 | return {
572 | "success": False,
573 | "error": f"RMSF analysis failed: {result['stderr']}"
574 | }
575 |
576 | return {
577 | "success": True,
578 | "output_file": "analysis/rmsf.xvg",
579 | "analysis_type": "RMSF"
580 | }
581 |
582 | def analyze_gyration(self) -> Dict[str, Any]:
583 | """
584 | Perform radius of gyration analysis
585 |
586 | Returns:
587 | Dictionary with result information
588 | """
589 | # Create analysis directory if it doesn't exist
590 | mkdir_result = self.run_shell_command("mkdir -p analysis")
591 |
592 | cmd = f"echo 'Protein' | {self.gmx_bins} gyrate -s md.tpr -f md.xtc -o analysis/gyrate.xvg"
593 | result = self.run_shell_command(cmd)
594 |
595 | if not result["success"]:
596 | return {
597 | "success": False,
598 | "error": f"Radius of gyration analysis failed: {result['stderr']}"
599 | }
600 |
601 | return {
602 | "success": True,
603 | "output_file": "analysis/gyrate.xvg",
604 | "analysis_type": "Radius of Gyration"
605 | }
--------------------------------------------------------------------------------
/gromacs_copilot/protocols/protein_ligand.py:
--------------------------------------------------------------------------------
1 | """
2 | Protein-ligand simulation protocol for GROMACS Copilot
3 | """
4 |
5 | import os
6 | import logging
7 | from typing import Dict, Any, Optional, List
8 |
9 | from gromacs_copilot.protocols.protein import ProteinProtocol
10 | from gromacs_copilot.core.enums import SimulationStage
11 | from gromacs_copilot.config import FORCE_FIELDS, STANDARD_RESIDUES
12 | from gromacs_copilot.utils.shell import check_command_exists
13 |
14 |
15 | class ProteinLigandProtocol(ProteinProtocol):
16 | """Protocol for protein-ligand simulations"""
17 |
18 | def __init__(self, workspace: str = "./md_workspace", gmx_bin: str = "gmx"):
19 | """
20 | Initialize the protein-ligand simulation protocol
21 |
22 | Args:
23 | workspace: Directory to use as the working directory
24 | """
25 | super().__init__(workspace)
26 |
27 | # Initialize protein-ligand specific attributes
28 | self.ligand_file = None
29 | self.ligand_name = None
30 | self.complex_file = None
31 | self.has_ligand = False
32 | self.index_file = None
33 | self.gmx_bin = gmx_bin
34 |
35 | logging.info(f"Protein-ligand protocol initialized with workspace: {self.workspace}")
36 |
37 | def get_state(self) -> Dict[str, Any]:
38 | """
39 | Get the current state of the protocol
40 |
41 | Returns:
42 | Dictionary with protocol state information
43 | """
44 | # Get base state from parent class
45 | state = super().get_state()
46 |
47 | # Add protein-ligand specific information
48 | if state["success"]:
49 | state.update({
50 | "ligand_file": self.ligand_file,
51 | "ligand_name": self.ligand_name,
52 | "complex_file": self.complex_file,
53 | "has_ligand": self.has_ligand,
54 | "index_file": self.index_file
55 | })
56 |
57 | return state
58 |
59 | def check_prerequisites(self) -> Dict[str, Any]:
60 | """
61 | Check if prerequisites for protein-ligand simulation are met
62 |
63 | Returns:
64 | Dictionary with prerequisite check information
65 | """
66 | # Check GROMACS installation
67 | gromacs_check = super().check_prerequisites()
68 | if not gromacs_check["success"]:
69 | return gromacs_check
70 |
71 | # Check OpenBabel installation
72 | openbabel_installed = check_command_exists("obabel")
73 |
74 | # Check ACPYPE installation
75 | acpype_installed = check_command_exists("acpype")
76 |
77 | return {
78 | "success": gromacs_check["success"],
79 | "gromacs": gromacs_check,
80 | "openbabel": {
81 | "installed": openbabel_installed,
82 | "required": True
83 | },
84 | "acpype": {
85 | "installed": acpype_installed,
86 | "required": True
87 | }
88 | }
89 |
90 | def check_for_ligands(self, pdb_file: str) -> Dict[str, Any]:
91 | """
92 | Check for potential ligands in the PDB file
93 |
94 | Args:
95 | pdb_file: Path to the PDB file
96 |
97 | Returns:
98 | Dictionary with ligand information
99 | """
100 | try:
101 | # Extract unique residue names from the PDB file that aren't standard amino acids or water
102 | cmd = f"grep '^ATOM\\|^HETATM' {pdb_file} | awk '{{print $4}}' | sort | uniq"
103 | result = self.run_shell_command(cmd)
104 |
105 | if not result["success"]:
106 | return {
107 | "success": False,
108 | "error": f"Failed to analyze PDB file: {result['stderr']}"
109 | }
110 |
111 | # Extract potential ligands (non-standard residues)
112 | residues = result["stdout"].strip().split()
113 | potential_ligands = [res for res in residues if res not in STANDARD_RESIDUES]
114 |
115 | return {
116 | "success": True,
117 | "ligands": potential_ligands
118 | }
119 |
120 | except Exception as e:
121 | return {
122 | "success": False,
123 | "error": f"Error checking for ligands: {str(e)}"
124 | }
125 |
126 | def set_ligand(self, ligand_name: str) -> Dict[str, Any]:
127 | """
128 | Set the ligand for simulation
129 |
130 | Args:
131 | ligand_name: Residue name of the ligand in the PDB file
132 |
133 | Returns:
134 | Dictionary with result information
135 | """
136 | if not self.protein_file:
137 | return {
138 | "success": False,
139 | "error": "No protein file has been set"
140 | }
141 |
142 | self.ligand_name = ligand_name
143 |
144 | # Create directory structure for protein-ligand preparation
145 | mkdir_cmd = "mkdir -p param/receptor param/ligand"
146 | mkdir_result = self.run_shell_command(mkdir_cmd)
147 | if not mkdir_result["success"]:
148 | return {
149 | "success": False,
150 | "error": f"Failed to create directories: {mkdir_result['stderr']}"
151 | }
152 |
153 | # Extract protein atoms to receptor.pdb
154 | extract_protein_cmd = f"grep '^ATOM' {self.protein_file} > param/receptor/receptor.pdb"
155 | protein_result = self.run_shell_command(extract_protein_cmd)
156 | if not protein_result["success"]:
157 | return {
158 | "success": False,
159 | "error": f"Failed to extract protein atoms: {protein_result['stderr']}"
160 | }
161 |
162 | # Extract ligand using Python to handle renaming
163 | extract_result = self.extract_ligand(os.path.join(self.workspace, self.protein_file), ligand_name)
164 | if not extract_result["success"]:
165 | return extract_result
166 |
167 | self.ligand_file = "param/ligand/ligand.pdb"
168 | self.has_ligand = True
169 |
170 | return {
171 | "success": True,
172 | "ligand_name": ligand_name,
173 | "ligand_file": self.ligand_file,
174 | "receptor_file": "param/receptor/receptor.pdb"
175 | }
176 |
177 | def extract_ligand(self, pdb_file: str, ligand_name: str) -> Dict[str, Any]:
178 | """
179 | Extract ligand from PDB file and rename it to LIG
180 |
181 | Args:
182 | pdb_file: Path to the PDB file
183 | ligand_name: Residue name of the ligand
184 |
185 | Returns:
186 | Dictionary with result information
187 | """
188 | try:
189 | # Create a Python script to extract the ligand
190 | script_content = f"""
191 | ligand_atom = []
192 | keepLine = []
193 | with open("{pdb_file}","r") as file:
194 | lines = file.readlines()
195 | for line in lines:
196 | if '{ligand_name}' in line[17:20]:
197 | line = line[:17]+"LIG"+line[20:]
198 | keepLine.append(line)
199 | ligand_atom.append(int(line[6:11]))
200 | elif "CONECT" in line[0:6]:
201 | idx = [int(x) for x in line.split()[1:]]
202 | if any(id in idx for id in ligand_atom):
203 | keepLine.append(line)
204 | with open("param/ligand/ligand.pdb","w") as file:
205 | for line in keepLine:
206 | file.write(line)
207 | """
208 | with open("extract_ligand.py", "w") as f:
209 | f.write(script_content)
210 |
211 | # Run the Python script
212 | result = self.run_shell_command("python extract_ligand.py")
213 | if not result["success"]:
214 | return {
215 | "success": False,
216 | "error": f"Failed to extract ligand: {result['stderr']}"
217 | }
218 |
219 | # Clean up the temporary script
220 | os.remove("extract_ligand.py")
221 |
222 | return {
223 | "success": True,
224 | "ligand_file": "param/ligand/ligand.pdb"
225 | }
226 |
227 | except Exception as e:
228 | return {
229 | "success": False,
230 | "error": f"Error extracting ligand: {str(e)}"
231 | }
232 |
233 | def prepare_ligand_topology(self) -> Dict[str, Any]:
234 | """
235 | Prepare ligand topology using OpenBabel and ACPYPE
236 |
237 | Returns:
238 | Dictionary with result information
239 | """
240 | if not self.has_ligand or not self.ligand_file:
241 | return {
242 | "success": False,
243 | "error": "No ligand has been set"
244 | }
245 |
246 | # Check if OpenBabel and ACPYPE are installed
247 | prerequisites = self.check_prerequisites()
248 | if not prerequisites["openbabel"]["installed"]:
249 | return {
250 | "success": False,
251 | "error": "OpenBabel is required for ligand preparation but is not installed"
252 | }
253 |
254 | if not prerequisites["acpype"]["installed"]:
255 | return {
256 | "success": False,
257 | "error": "ACPYPE is required for ligand preparation but is not installed"
258 | }
259 |
260 | # Convert to MOL2 format with OpenBabel (adding hydrogens)
261 | babel_cmd = "cd param/ligand && obabel -ipdb ligand.pdb -omol2 -h > ligand.mol2"
262 | babel_result = self.run_shell_command(babel_cmd)
263 | if not babel_result["success"]:
264 | return {
265 | "success": False,
266 | "error": f"Failed to convert ligand to MOL2 format: {babel_result['stderr']}"
267 | }
268 |
269 | # Run ACPYPE to generate ligand topology
270 | acpype_cmd = "cd param/ligand && acpype -i ligand.mol2"
271 | acpype_result = self.run_shell_command(acpype_cmd)
272 | if not acpype_result["success"]:
273 | return {
274 | "success": False,
275 | "error": f"Failed to generate ligand topology with ACPYPE: {acpype_result['stderr']}"
276 | }
277 |
278 | # Copy necessary files to workspace
279 | copy_cmd = "cp param/ligand/ligand.acpype/ligand_GMX.itp ligand.itp"
280 | copy_result = self.run_shell_command(copy_cmd)
281 | if not copy_result["success"]:
282 | return {
283 | "success": False,
284 | "error": f"Failed to copy ligand topology: {copy_result['stderr']}"
285 | }
286 |
287 | # Generate restraints for ligand
288 | ndx_cmd = f"echo $'r LIG & !a H*\nname 3 LIG-H\nq'| {self.gmx_bin} make_ndx -f param/ligand/ligand.acpype/ligand_NEW.pdb -o lig_noh.ndx"
289 | ndx_result = self.run_shell_command(ndx_cmd)
290 | if not ndx_result["success"]:
291 | return {
292 | "success": False,
293 | "error": f"Failed to create index for ligand restraints: {ndx_result['stderr']}"
294 | }
295 |
296 | # Generate position restraints for ligand
297 | # posre_cmd = """echo "LIG-H" | gmx genrestr -f param/ligand/ligand.acpype/ligand_NEW.pdb -o posre_ligand.itp -n lig_noh.ndx -fc 1000 1000 1000"""
298 | # copying position restrained
299 | posre_cmd = "cp param/ligand/ligand.acpype/posre_ligand.itp ."
300 | posre_result = self.run_shell_command(posre_cmd)
301 | if not posre_result["success"]:
302 | return {
303 | "success": False,
304 | "error": f"Failed to generate position restraints for ligand: {posre_result['stderr']}"
305 | }
306 |
307 | # Append posre_ligand.itp include directive to ligand.itp
308 | append_cmd = '''echo '
309 | ; Include Position restraint file
310 | #ifdef POSRES
311 | #include "posre_ligand.itp"
312 | #endif' >> ligand.itp'''
313 | append_result = self.run_shell_command(append_cmd)
314 | if not append_result["success"]:
315 | return {
316 | "success": False,
317 | "error": f"Failed to update ligand.itp with position restraints: {append_result['stderr']}"
318 | }
319 |
320 | return {
321 | "success": True,
322 | "ligand_topology": "ligand.itp",
323 | "ligand_posre": "posre_ligand.itp"
324 | }
325 |
326 | def prepare_receptor_topology(self, force_field: str, water_model: str = "spc") -> Dict[str, Any]:
327 | """
328 | Generate topology for the receptor
329 |
330 | Args:
331 | force_field: Name of the force field to use
332 | water_model: Water model to use
333 |
334 | Returns:
335 | Dictionary with result information
336 | """
337 | if not os.path.exists("param/receptor/receptor.pdb"):
338 | return {
339 | "success": False,
340 | "error": "Receptor file not found"
341 | }
342 |
343 | # Map user-friendly force field names to GROMACS internal names
344 | if force_field not in FORCE_FIELDS:
345 | return {
346 | "success": False,
347 | "error": f"Unknown force field: {force_field}. Available options: {list(FORCE_FIELDS.keys())}"
348 | }
349 |
350 | ff_name = FORCE_FIELDS[force_field]
351 |
352 | # Generate topology for receptor
353 | cmd = f"cd param/receptor && {self.gmx_bin} pdb2gmx -f receptor.pdb -o receptor_GMX.pdb -p topol.top -i posre.itp -ff {ff_name} -water {water_model}"
354 | result = self.run_shell_command(cmd)
355 |
356 | if not result["success"]:
357 | return {
358 | "success": False,
359 | "error": f"Failed to generate receptor topology: {result['stderr']}"
360 | }
361 |
362 | # Copy files to workspace
363 | copy_cmd = "cp param/receptor/*.itp param/receptor/topol.top ."
364 | copy_result = self.run_shell_command(copy_cmd)
365 | if not copy_result["success"]:
366 | return {
367 | "success": False,
368 | "error": f"Failed to copy receptor topology files: {copy_result['stderr']}"
369 | }
370 |
371 | return {
372 | "success": True,
373 | "receptor_topology": "topol.top"
374 | }
375 |
376 | def merge_protein_ligand(self) -> Dict[str, Any]:
377 | """
378 | Merge protein and ligand structures and update the topology
379 |
380 | Returns:
381 | Dictionary with result information
382 | """
383 | if not self.has_ligand:
384 | return {
385 | "success": False,
386 | "error": "No ligand has been set"
387 | }
388 |
389 | # Merge protein and ligand PDB files
390 | merge_cmd = "grep -h ATOM param/receptor/receptor_GMX.pdb param/ligand/ligand.acpype/ligand_NEW.pdb > complex.pdb"
391 | merge_result = self.run_shell_command(merge_cmd)
392 | if not merge_result["success"]:
393 | return {
394 | "success": False,
395 | "error": f"Failed to merge protein and ligand structures: {merge_result['stderr']}"
396 | }
397 |
398 | # Update topology file to include ligand
399 | update_cmd = """sed -i '/forcefield\\.itp"/a\\
400 | #include "ligand.itp"' topol.top"""
401 | update_result = self.run_shell_command(update_cmd)
402 | if not update_result["success"]:
403 | return {
404 | "success": False,
405 | "error": f"Failed to update topology file: {update_result['stderr']}"
406 | }
407 |
408 | # Add ligand to topology molecules
409 | add_cmd = """echo "ligand 1" >> topol.top"""
410 | add_result = self.run_shell_command(add_cmd)
411 | if not add_result["success"]:
412 | return {
413 | "success": False,
414 | "error": f"Failed to add ligand to topology molecules: {add_result['stderr']}"
415 | }
416 |
417 | self.complex_file = "complex.pdb"
418 | self.topology_file = "topol.top"
419 | self.box_file = self.complex_file
420 |
421 | return {
422 | "success": True,
423 | "complex_file": self.complex_file,
424 | "topology_file": self.topology_file
425 | }
426 |
427 | def create_index_groups(self) -> Dict[str, Any]:
428 | """
429 | Create custom index groups for protein-ligand simulation
430 |
431 | Returns:
432 | Dictionary with result information
433 | """
434 | if not self.has_ligand:
435 | return {
436 | "success": False,
437 | "error": "No ligand has been set"
438 | }
439 |
440 | if not self.solvated_file:
441 | return {
442 | "success": False,
443 | "error": "System must be solvated first"
444 | }
445 |
446 | # Create index groups
447 | ndx_cmd = f"""echo -e "1 | r LIG\\nr SOL | r CL | r NA\\nq" | {self.gmx_bin} make_ndx -f {self.solvated_file} -o index.ndx"""
448 | ndx_result = self.run_shell_command(ndx_cmd)
449 | if not ndx_result["success"]:
450 | return {
451 | "success": False,
452 | "error": f"Failed to create index groups: {ndx_result['stderr']}"
453 | }
454 |
455 | # Rename the groups using Python
456 | script_content = """
457 | import re
458 | with open('index.ndx', 'r') as file:
459 | content = file.read()
460 | matches = re.findall(r'\\[ \\w+ \\]', content)
461 | if matches:
462 | content = content.replace(matches[-1], '[ Water_Ions ]')
463 | content = content.replace(matches[-2], '[ Protein_Ligand ]')
464 | with open('index.ndx', 'w') as file:
465 | file.write(content)
466 | """
467 | with open("rename_groups.py", "w") as f:
468 | f.write(script_content)
469 |
470 | # Run the Python script
471 | rename_result = self.run_shell_command("python rename_groups.py")
472 | if not rename_result["success"]:
473 | return {
474 | "success": False,
475 | "error": f"Failed to rename index groups: {rename_result['stderr']}"
476 | }
477 |
478 | # Clean up the temporary script
479 | os.remove("rename_groups.py")
480 |
481 | # Update MDP files
482 | self.create_mdp_file("nvt")
483 | update_nvt_cmd = "sed -i 's/Protein Non-Protein/Protein_Ligand Water_Ions/g' nvt.mdp"
484 | nvt_result = self.run_shell_command(update_nvt_cmd)
485 |
486 | self.create_mdp_file("npt")
487 | update_npt_cmd = "sed -i 's/Protein Non-Protein/Protein_Ligand Water_Ions/g' npt.mdp"
488 | npt_result = self.run_shell_command(update_npt_cmd)
489 |
490 | self.create_mdp_file("md")
491 | update_md_cmd = "sed -i 's/Protein Non-Protein/Protein_Ligand Water_Ions/g' md.mdp"
492 | md_result = self.run_shell_command(update_md_cmd)
493 |
494 | if not (nvt_result["success"] and npt_result["success"] and md_result["success"]):
495 | return {
496 | "success": False,
497 | "error": "Failed to update MDP files with new index groups"
498 | }
499 |
500 | self.index_file = "index.ndx"
501 |
502 | return {
503 | "success": True,
504 | "index_file": self.index_file,
505 | "groups": ["Protein_Ligand", "Water_Ions"]
506 | }
507 |
508 | def generate_topology(self, force_field: str, water_model: str = "spc") -> Dict[str, Any]:
509 | """
510 | Generate topology for the protein-ligand complex
511 |
512 | Args:
513 | force_field: Name of the force field to use
514 | water_model: Water model to use
515 |
516 | Returns:
517 | Dictionary with result information
518 | """
519 | if not self.protein_file:
520 | return {
521 | "success": False,
522 | "error": "No protein file has been set"
523 | }
524 |
525 | # Handle protein-ligand complex
526 | if self.has_ligand:
527 | # Prepare receptor topology
528 | receptor_result = self.prepare_receptor_topology(force_field, water_model)
529 | if not receptor_result["success"]:
530 | return receptor_result
531 |
532 | # Prepare ligand topology
533 | ligand_result = self.prepare_ligand_topology()
534 | if not ligand_result["success"]:
535 | return ligand_result
536 |
537 | # Merge protein and ligand
538 | merge_result = self.merge_protein_ligand()
539 | if not merge_result["success"]:
540 | return merge_result
541 |
542 | return {
543 | "success": True,
544 | "topology_file": self.topology_file,
545 | "complex_file": self.complex_file,
546 | "force_field": force_field,
547 | "water_model": water_model,
548 | "has_ligand": self.has_ligand
549 | }
550 | else:
551 | # Standard protein-only topology generation
552 | return super().generate_topology(force_field, water_model)
553 |
554 | def solvate_system(self) -> Dict[str, Any]:
555 | """
556 | Solvate the protein-ligand complex in water
557 |
558 | Returns:
559 | Dictionary with result information
560 | """
561 | # Use the parent class solvate_system method
562 | result = super().solvate_system()
563 |
564 | if not result["success"]:
565 | return result
566 |
567 | # If this is a protein-ligand system, create index groups
568 | if self.has_ligand:
569 | index_result = self.create_index_groups()
570 | if not index_result["success"]:
571 | return {
572 | "success": False,
573 | "error": f"Failed to create index groups: {index_result['error']}"
574 | }
575 |
576 | return {
577 | "success": True,
578 | "solvated_file": self.solvated_file,
579 | "has_ligand": self.has_ligand,
580 | "index_file": self.index_file if self.has_ligand else None
581 | }
582 |
583 | def add_ions(self, concentration: float = .15, neutral: bool = True) -> Dict[str, Any]:
584 | """
585 | Add ions to the solvated system
586 |
587 | Args:
588 | concentration: Salt concentration in M
589 | neutral: Whether to neutralize the system
590 |
591 | Returns:
592 | Dictionary with result information
593 | """
594 | # Use the parent class add_ions method
595 | result = super().add_ions(concentration, neutral)
596 |
597 | if not result["success"]:
598 | return result
599 |
600 | # If this is a protein-ligand system, update index groups
601 | if self.has_ligand:
602 | index_result = self.create_index_groups()
603 | if not index_result["success"]:
604 | return {
605 | "success": False,
606 | "error": f"Failed to update index groups: {index_result['error']}"
607 | }
608 |
609 | return {
610 | "success": True,
611 | "solvated_file": self.solvated_file,
612 | "concentration": concentration,
613 | "neutral": neutral,
614 | "has_ligand": self.has_ligand,
615 | "index_file": self.index_file if self.has_ligand else None
616 | }
617 |
618 | def run_energy_minimization(self) -> Dict[str, Any]:
619 | """
620 | Run energy minimization
621 |
622 | Returns:
623 | Dictionary with result information
624 | """
625 | if not self.solvated_file or not self.topology_file:
626 | return {
627 | "success": False,
628 | "error": "Solvated file or topology file not defined"
629 | }
630 |
631 | # Create em.mdp file
632 | em_mdp = self.create_mdp_file("em")
633 | if not em_mdp["success"]:
634 | return em_mdp
635 |
636 | # Generate tpr file for minimization, using index file if available
637 | index_option = f"-n {self.index_file}" if self.has_ligand and self.index_file else ""
638 | cmd = f"{self.gmx_bin} grompp -f em.mdp -c {self.solvated_file} -p {self.topology_file} -o em.tpr {index_option}"
639 | result = self.run_shell_command(cmd)
640 |
641 | if not result["success"]:
642 | return {
643 | "success": False,
644 | "error": f"Failed to prepare energy minimization: {result['stderr']}"
645 | }
646 |
647 | # Run energy minimization
648 | cmd = f"{self.gmx_bin} mdrun -v -deffnm em"
649 | result = self.run_shell_command(cmd)
650 |
651 | if not result["success"]:
652 | return {
653 | "success": False,
654 | "error": f"Energy minimization failed: {result['stderr']}"
655 | }
656 |
657 | self.minimized_file = "em.gro"
658 |
659 | return {
660 | "success": True,
661 | "minimized_file": self.minimized_file,
662 | "log_file": "em.log",
663 | "energy_file": "em.edr"
664 | }
665 |
666 | # Override run_nvt_equilibration to use index file if available
667 | def run_nvt_equilibration(self) -> Dict[str, Any]:
668 | """
669 | Run NVT equilibration
670 |
671 | Returns:
672 | Dictionary with result information
673 | """
674 | if not self.minimized_file or not self.topology_file:
675 | return {
676 | "success": False,
677 | "error": "Minimized file or topology file not defined"
678 | }
679 |
680 | # Create nvt.mdp file
681 | nvt_mdp = self.create_mdp_file("nvt")
682 | if not nvt_mdp["success"]:
683 | return nvt_mdp
684 |
685 | # Generate tpr file for NVT equilibration, using index file if available
686 | index_option = f"-n {self.index_file}" if self.has_ligand and self.index_file else ""
687 | cmd = f"{self.gmx_bin} grompp -f nvt.mdp -c {self.minimized_file} -r {self.minimized_file} -p {self.topology_file} -o nvt.tpr -maxwarn 2 {index_option}"
688 | result = self.run_shell_command(cmd)
689 |
690 | if not result["success"]:
691 | return {
692 | "success": False,
693 | "error": f"Failed to prepare NVT equilibration: {result['stderr']}"
694 | }
695 |
696 | # Run NVT equilibration
697 | cmd = f"{self.gmx_bin} mdrun -v -deffnm nvt"
698 | result = self.run_shell_command(cmd)
699 |
700 | if not result["success"]:
701 | return {
702 | "success": False,
703 | "error": f"NVT equilibration failed: {result['stderr']}"
704 | }
705 |
706 | return {
707 | "success": True,
708 | "nvt_file": "nvt.gro",
709 | "nvt_checkpoint": "nvt.cpt",
710 | "log_file": "nvt.log",
711 | "energy_file": "nvt.edr"
712 | }
713 |
714 | # Override run_npt_equilibration to use index file if available
715 | def run_npt_equilibration(self) -> Dict[str, Any]:
716 | """
717 | Run NPT equilibration
718 |
719 | Returns:
720 | Dictionary with result information
721 | """
722 | # Create npt.mdp file
723 | npt_mdp = self.create_mdp_file("npt")
724 | if not npt_mdp["success"]:
725 | return npt_mdp
726 |
727 | # Generate tpr file for NPT equilibration, using index file if available
728 | index_option = f"-n {self.index_file}" if self.has_ligand and self.index_file else ""
729 | cmd = f"{self.gmx_bin} grompp -f npt.mdp -c nvt.gro -r nvt.gro -t nvt.cpt -p {self.topology_file} -o npt.tpr -maxwarn 2 {index_option}"
730 | result = self.run_shell_command(cmd)
731 |
732 | if not result["success"]:
733 | return {
734 | "success": False,
735 | "error": f"Failed to prepare NPT equilibration: {result['stderr']}"
736 | }
737 |
738 | # Run NPT equilibration
739 | cmd = f"{self.gmx_bin} mdrun -v -deffnm npt"
740 | result = self.run_shell_command(cmd)
741 |
742 | if not result["success"]:
743 | return {
744 | "success": False,
745 | "error": f"NPT equilibration failed: {result['stderr']}"
746 | }
747 |
748 | self.equilibrated_file = "npt.gro"
749 |
750 | return {
751 | "success": True,
752 | "equilibrated_file": self.equilibrated_file,
753 | "npt_checkpoint": "npt.cpt",
754 | "log_file": "npt.log",
755 | "energy_file": "npt.edr"
756 | }
757 |
758 | # Override run_production_md to use index file if available
759 | def run_production_md(self, length_ns: float = 10.0) -> Dict[str, Any]:
760 | """
761 | Run production MD
762 |
763 | Args:
764 | length_ns: Length of the simulation in nanoseconds
765 |
766 | Returns:
767 | Dictionary with result information
768 | """
769 | if not self.equilibrated_file or not self.topology_file:
770 | return {
771 | "success": False,
772 | "error": "Equilibrated file or topology file not defined"
773 | }
774 |
775 | # Calculate number of steps (2 fs timestep)
776 | nsteps = int(length_ns * 1000000 / 2)
777 |
778 | # Create md.mdp file with custom steps
779 | md_mdp = self.create_mdp_file("md", {"nsteps": nsteps})
780 | if not md_mdp["success"]:
781 | return md_mdp
782 |
783 | # Generate tpr file for production MD, using index file if available
784 | index_option = f"-n {self.index_file}" if self.has_ligand and self.index_file else ""
785 | cmd = f"{self.gmx_bin} grompp -f md.mdp -c {self.equilibrated_file} -t npt.cpt -p {self.topology_file} -o md.tpr -maxwarn 2 {index_option}"
786 | result = self.run_shell_command(cmd)
787 |
788 | if not result["success"]:
789 | return {
790 | "success": False,
791 | "error": f"Failed to prepare production MD: {result['stderr']}"
792 | }
793 |
794 | # Run production MD
795 | cmd = f"{self.gmx_bin} mdrun -v -deffnm md"
796 | result = self.run_shell_command(cmd)
797 |
798 | if not result["success"]:
799 | return {
800 | "success": False,
801 | "error": f"Production MD failed: {result['stderr']}"
802 | }
803 |
804 | self.production_file = "md.gro"
805 |
806 | return {
807 | "success": True,
808 | "production_file": self.production_file,
809 | "trajectory_file": "md.xtc",
810 | "log_file": "md.log",
811 | "energy_file": "md.edr",
812 | "length_ns": length_ns
813 | }
814 |
815 | # Add protein-ligand specific analysis methods
816 | def analyze_ligand_rmsd(self) -> Dict[str, Any]:
817 | """
818 | Perform RMSD analysis focused on the ligand
819 |
820 | Returns:
821 | Dictionary with result information
822 | """
823 | if not self.has_ligand:
824 | return {
825 | "success": False,
826 | "error": "No ligand has been set"
827 | }
828 |
829 | # Create analysis directory if it doesn't exist
830 | mkdir_result = self.run_shell_command("mkdir -p analysis")
831 |
832 | cmd = f"echo 'LIG LIG' | {self.gmx_bin} rms -s md.tpr -f md.xtc -o analysis/ligand_rmsd.xvg -tu ns"
833 | result = self.run_shell_command(cmd)
834 |
835 | if not result["success"]:
836 | return {
837 | "success": False,
838 | "error": f"Ligand RMSD analysis failed: {result['stderr']}"
839 | }
840 |
841 | return {
842 | "success": True,
843 | "output_file": "analysis/ligand_rmsd.xvg",
844 | "analysis_type": "Ligand RMSD"
845 | }
846 |
847 | def analyze_protein_ligand_contacts(self) -> Dict[str, Any]:
848 | """
849 | Analyze contacts between protein and ligand
850 |
851 | Returns:
852 | Dictionary with result information
853 | """
854 | if not self.has_ligand:
855 | return {
856 | "success": False,
857 | "error": "No ligand has been set"
858 | }
859 |
860 | # Create analysis directory if it doesn't exist
861 | mkdir_result = self.run_shell_command("mkdir -p analysis")
862 |
863 | cmd = f"echo -e 'Protein\\nLIG' | {self.gmx_bin} mindist -s md.tpr -f md.xtc -od analysis/protein_ligand_mindist.xvg -tu ns"
864 | result = self.run_shell_command(cmd)
865 |
866 | if not result["success"]:
867 | return {
868 | "success": False,
869 | "error": f"Protein-ligand contacts analysis failed: {result['stderr']}"
870 | }
871 |
872 | return {
873 | "success": True,
874 | "output_file": "analysis/protein_ligand_mindist.xvg",
875 | "analysis_type": "Protein-Ligand Minimum Distance"
876 | }
--------------------------------------------------------------------------------
/gromacs_copilot/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Utility modules for GROMACS Copilot
3 | """
4 |
5 | from gromacs_copilot.utils.terminal import Colors, print_message, prompt_user
6 | from gromacs_copilot.utils.shell import run_shell_command, check_command_exists, find_executable
7 | from gromacs_copilot.utils.logging_utils import setup_logging, TerminalLogHandler
8 |
9 | __all__ = [
10 | 'Colors',
11 | 'print_message',
12 | 'prompt_user',
13 | 'run_shell_command',
14 | 'check_command_exists',
15 | 'find_executable',
16 | 'setup_logging',
17 | 'TerminalLogHandler'
18 | ]
--------------------------------------------------------------------------------
/gromacs_copilot/utils/logging_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Logging utilities for GROMACS Copilot
3 | """
4 |
5 | import logging
6 | import sys
7 | from typing import Optional
8 |
9 | from gromacs_copilot.utils.terminal import print_message
10 | from gromacs_copilot.core.enums import MessageType
11 |
12 | class TerminalLogHandler(logging.Handler):
13 | """Custom logging handler that formats log messages for terminal output"""
14 |
15 | def emit(self, record):
16 | msg = self.format(record)
17 | if record.levelno >= logging.ERROR:
18 | print_message(msg, MessageType.ERROR)
19 | elif record.levelno >= logging.WARNING:
20 | print_message(msg, MessageType.WARNING)
21 | else:
22 | print_message(msg, MessageType.INFO)
23 |
24 |
25 | def setup_logging(log_file: Optional[str] = "md_agent.log", level: int = logging.INFO):
26 | """
27 | Set up logging for GROMACS Copilot
28 |
29 | Args:
30 | log_file: Path to log file
31 | level: Logging level
32 | """
33 | # Configure root logger
34 | root_logger = logging.getLogger()
35 | root_logger.setLevel(level)
36 |
37 | # Clear any existing handlers
38 | for handler in root_logger.handlers[:]:
39 | root_logger.removeHandler(handler)
40 |
41 | # Create formatters
42 | file_formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")
43 | terminal_formatter = logging.Formatter("%(message)s")
44 |
45 | # Set up file handler
46 | if log_file:
47 | file_handler = logging.FileHandler(log_file)
48 | file_handler.setLevel(level)
49 | file_handler.setFormatter(file_formatter)
50 | root_logger.addHandler(file_handler)
51 |
52 | # Set up custom terminal handler
53 | terminal_handler = TerminalLogHandler()
54 | terminal_handler.setLevel(level)
55 | terminal_handler.setFormatter(terminal_formatter)
56 | root_logger.addHandler(terminal_handler)
57 |
58 | # Log setup completion
59 | logging.info(f"Logging initialized with level {logging.getLevelName(level)}")
--------------------------------------------------------------------------------
/gromacs_copilot/utils/shell.py:
--------------------------------------------------------------------------------
1 | """
2 | Shell command execution utilities for GROMACS Copilot
3 | """
4 |
5 | import subprocess
6 | import logging
7 | import shutil
8 | from typing import Dict, Any, Optional
9 |
10 | from gromacs_copilot.utils.terminal import print_message
11 | from gromacs_copilot.core.enums import MessageType
12 |
13 | def run_shell_command(command: str, capture_output: bool = True,
14 | suppress_output: bool = False) -> Dict[str, Any]:
15 | """
16 | Run a shell command with proper error handling
17 |
18 | Args:
19 | command: Shell command to run
20 | capture_output: Whether to capture stdout/stderr
21 | suppress_output: Whether to suppress terminal output
22 |
23 | Returns:
24 | Dictionary with command result information
25 | """
26 | logging.info(f"Running command: {command}")
27 |
28 | if not suppress_output:
29 | print_message(command, MessageType.COMMAND)
30 |
31 | try:
32 | if capture_output:
33 | result = subprocess.run(
34 | command,
35 | shell=True,
36 | check=False,
37 | text=True,
38 | capture_output=True
39 | )
40 |
41 | if result.returncode == 0:
42 | # Only show partial output if it's too long
43 | if not suppress_output:
44 | if len(result.stdout) > 500:
45 | trimmed_output = result.stdout[:500] + "...\n[Output trimmed for brevity]"
46 | print_message(f"Command succeeded with output:\n{trimmed_output}", MessageType.SUCCESS)
47 | elif result.stdout.strip():
48 | print_message(f"Command succeeded with output:\n{result.stdout}", MessageType.SUCCESS)
49 | else:
50 | print_message("Command succeeded with no output", MessageType.SUCCESS)
51 | else:
52 | if not suppress_output:
53 | print_message(f"Command failed with error:\n{result.stderr}", MessageType.ERROR)
54 |
55 | return {
56 | "success": result.returncode == 0,
57 | "return_code": result.returncode,
58 | "stdout": result.stdout,
59 | "stderr": result.stderr,
60 | "command": command
61 | }
62 | else:
63 | result = subprocess.run(
64 | command,
65 | shell=True,
66 | check=False
67 | )
68 |
69 | if not suppress_output:
70 | if result.returncode == 0:
71 | print_message("Command succeeded", MessageType.SUCCESS)
72 | else:
73 | print_message("Command failed", MessageType.ERROR)
74 |
75 | return {
76 | "success": result.returncode == 0,
77 | "return_code": result.returncode,
78 | "stdout": "Output not captured",
79 | "stderr": "Error output not captured",
80 | "command": command
81 | }
82 | except Exception as e:
83 | error_msg = str(e)
84 | logging.error(f"Command execution failed: {error_msg}")
85 |
86 | if not suppress_output:
87 | print_message(f"Command execution failed: {error_msg}", MessageType.ERROR)
88 |
89 | return {
90 | "success": False,
91 | "return_code": 1,
92 | "stdout": "",
93 | "stderr": error_msg,
94 | "command": command,
95 | "error": error_msg
96 | }
97 |
98 |
99 | def check_command_exists(command: str) -> bool:
100 | """
101 | Check if a command exists in the system PATH
102 |
103 | Args:
104 | command: Command to check
105 |
106 | Returns:
107 | bool: True if the command exists, False otherwise
108 | """
109 | return shutil.which(command) is not None
110 |
111 |
112 | def find_executable(executable_names: list) -> Optional[str]:
113 | """
114 | Find an executable from a list of possible names
115 |
116 | Args:
117 | executable_names: List of possible executable names
118 |
119 | Returns:
120 | str: Path to the executable if found, None otherwise
121 | """
122 | for name in executable_names:
123 | path = shutil.which(name)
124 | if path:
125 | return path
126 | return None
--------------------------------------------------------------------------------
/gromacs_copilot/utils/terminal.py:
--------------------------------------------------------------------------------
1 | """
2 | Terminal output formatting utilities for GROMACS Copilot
3 | """
4 |
5 | import sys
6 | import shutil
7 | from typing import Optional
8 |
9 | from gromacs_copilot.core.enums import MessageType
10 |
11 | class Colors:
12 | """ANSI color codes for terminal output"""
13 | RESET = "\033[0m"
14 | BOLD = "\033[1m"
15 | UNDERLINE = "\033[4m"
16 |
17 | # Foreground colors
18 | BLACK = "\033[30m"
19 | RED = "\033[31m"
20 | GREEN = "\033[32m"
21 | YELLOW = "\033[33m"
22 | BLUE = "\033[34m"
23 | MAGENTA = "\033[35m"
24 | CYAN = "\033[36m"
25 | WHITE = "\033[37m"
26 |
27 | # Background colors
28 | BG_BLACK = "\033[40m"
29 | BG_RED = "\033[41m"
30 | BG_GREEN = "\033[42m"
31 | BG_YELLOW = "\033[43m"
32 | BG_BLUE = "\033[44m"
33 | BG_MAGENTA = "\033[45m"
34 | BG_CYAN = "\033[46m"
35 | BG_WHITE = "\033[47m"
36 |
37 | # Bright variants
38 | BRIGHT_BLACK = "\033[90m"
39 | BRIGHT_RED = "\033[91m"
40 | BRIGHT_GREEN = "\033[92m"
41 | BRIGHT_YELLOW = "\033[93m"
42 | BRIGHT_BLUE = "\033[94m"
43 | BRIGHT_MAGENTA = "\033[95m"
44 | BRIGHT_CYAN = "\033[96m"
45 | BRIGHT_WHITE = "\033[97m"
46 |
47 | @classmethod
48 | def disable_colors(cls):
49 | """Disable all colors by setting them to empty strings"""
50 | for attr in dir(cls):
51 | if not attr.startswith('__') and not callable(getattr(cls, attr)):
52 | setattr(cls, attr, '')
53 |
54 |
55 | def should_use_colors() -> bool:
56 | """
57 | Determine if colors should be used in terminal output
58 |
59 | Returns:
60 | bool: True if colors should be used, False otherwise
61 | """
62 | return sys.stdout.isatty()
63 |
64 |
65 | def print_message(message: str, msg_type: MessageType = MessageType.INFO,
66 | style: Optional[str] = None, width: Optional[int] = None):
67 | """
68 | Print a formatted message to the console
69 |
70 | Args:
71 | message: The message to print
72 | msg_type: Type of message (info, success, warning, error, etc.)
73 | style: Optional additional styling (box, divider)
74 | width: Width of the message box (defaults to terminal width)
75 | """
76 | # Get terminal width if not specified
77 | if not width:
78 | try:
79 | width = shutil.get_terminal_size().columns
80 | except:
81 | width = 80
82 |
83 | # Configure colors and prefixes based on message type
84 | if msg_type == MessageType.INFO:
85 | color = Colors.CYAN
86 | prefix = "ℹ️ INFO │ "
87 | elif msg_type == MessageType.SUCCESS:
88 | color = Colors.GREEN
89 | prefix = "✓ SUCCESS │ "
90 | elif msg_type == MessageType.WARNING:
91 | color = Colors.YELLOW
92 | prefix = "⚠️ WARNING │ "
93 | elif msg_type == MessageType.ERROR:
94 | color = Colors.RED
95 | prefix = "✗ ERROR │ "
96 | elif msg_type == MessageType.TITLE:
97 | color = Colors.BRIGHT_BLUE + Colors.BOLD
98 | prefix = "🧪 "
99 | elif msg_type == MessageType.SYSTEM:
100 | color = Colors.BRIGHT_MAGENTA
101 | prefix = "🤖 SYSTEM │ "
102 | elif msg_type == MessageType.USER:
103 | color = Colors.BRIGHT_CYAN
104 | prefix = "👤 USER │ "
105 | elif msg_type == MessageType.COMMAND:
106 | color = Colors.BRIGHT_BLACK
107 | prefix = "$ "
108 | elif msg_type == MessageType.TOOL:
109 | color = Colors.BRIGHT_GREEN
110 | prefix = "🔧 TOOL │ "
111 | elif msg_type == MessageType.FINAL:
112 | color = Colors.BRIGHT_GREEN + Colors.BOLD
113 | prefix = "🏁 FINAL │ "
114 | else:
115 | color = ""
116 | prefix = ""
117 |
118 | # Apply styling
119 | if style == "box":
120 | box_width = width - 4 # Account for side margins
121 | print(f"{color}┌{'─' * box_width}┐{Colors.RESET}")
122 |
123 | # Split message into lines that fit within the box
124 | lines = []
125 | curr_line = ""
126 |
127 | for word in message.split():
128 | if len(curr_line) + len(word) + 1 <= box_width - 4: # -4 for margins
129 | curr_line += word + " "
130 | else:
131 | lines.append(curr_line)
132 | curr_line = word + " "
133 | if curr_line:
134 | lines.append(curr_line)
135 |
136 | # Print each line within the box
137 | for line in lines:
138 | padding = box_width - len(line) - 2
139 | print(f"{color}│ {line}{' ' * padding} │{Colors.RESET}")
140 |
141 | print(f"{color}└{'─' * box_width}┘{Colors.RESET}")
142 |
143 | elif style == "divider":
144 | print(f"{color}{'═' * width}{Colors.RESET}")
145 | print(f"{color}{prefix}{message}{Colors.RESET}")
146 | print(f"{color}{'═' * width}{Colors.RESET}")
147 |
148 | else:
149 | # Basic formatting with prefix
150 | print(f"{color}{prefix}{message}{Colors.RESET}")
151 |
152 |
153 | def prompt_user(message: str, default: Optional[str] = None,
154 | choices: Optional[list] = None) -> str:
155 | """
156 | Prompt the user for input with optional default value and choices
157 |
158 | Args:
159 | message: The message to display to the user
160 | default: Optional default value if user hits enter
161 | choices: Optional list of valid choices
162 |
163 | Returns:
164 | str: The user's response
165 | """
166 | # Format message with default value if provided
167 | if default is not None:
168 | prompt = f"{Colors.BRIGHT_CYAN}{message} [{default}]: {Colors.RESET}"
169 | else:
170 | prompt = f"{Colors.BRIGHT_CYAN}{message}: {Colors.RESET}"
171 |
172 | # Print choices if provided
173 | if choices:
174 | for i, choice in enumerate(choices, 1):
175 | print(f"{Colors.BRIGHT_CYAN} {i}. {choice}{Colors.RESET}")
176 |
177 | while True:
178 | response = input(prompt)
179 |
180 | # Use default if empty response and default provided
181 | if not response and default is not None:
182 | return default
183 |
184 | # Try to interpret as a choice number
185 | try:
186 | choice_idx = int(response) - 1
187 | if 0 <= choice_idx < len(choices):
188 | return choices[choice_idx]
189 | else:
190 | print(f"{Colors.YELLOW}Please enter a number between 1 and {len(choices)}{Colors.RESET}")
191 | except ValueError:
192 | # If response matches a choice directly, return it
193 | if response in choices:
194 | return response
195 | print(f"{Colors.YELLOW}Please enter a valid choice{Colors.RESET}")
196 | else:
197 | # Simple prompt without choices
198 | response = input(prompt)
199 |
200 | # Use default if empty response and default provided
201 | if not response and default is not None:
202 | return default
203 |
204 | return response
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | with open("README.md", "r", encoding="utf-8") as fh:
4 | long_description = fh.read()
5 |
6 | setup(
7 | name="gromacs_copilot",
8 | version="0.2.3",
9 | packages=find_packages(),
10 | install_requires=[
11 | "requests>=2.25.0",
12 | "mcp>=1.4.1",
13 | ],
14 | entry_points={
15 | "console_scripts": [
16 | "gmx_copilot=gromacs_copilot.cli:main",
17 | ],
18 | },
19 | author="ChatMol Team",
20 | author_email="jinyuansun@chatmol.org",
21 | description="A molecular dynamics simulation assistant powered by AI using GROMACS.",
22 | long_description=long_description,
23 | long_description_content_type="text/markdown",
24 | url="https://github.com/ChatMol/gromacs_copilot",
25 | classifiers=[
26 | "Programming Language :: Python :: 3",
27 | "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
28 | "Operating System :: OS Independent",
29 | ],
30 | python_requires='>=3.7',
31 | )
--------------------------------------------------------------------------------