├── LICENSE ├── README.md ├── assets ├── demo.gif ├── feynman_quote.png ├── feynman_quote_3.png └── feynman_quote_cut.png ├── docs ├── API.md └── tutorials │ └── Getting_Started.md ├── environment.yaml ├── example ├── 4W52.pdb ├── 4W52.pdb.1 └── 4w52_C_EPE.sdf ├── notebooks ├── __init__.py └── example_tutorial.ipynb ├── pyproject.toml ├── src ├── __init__.py └── easy_md │ ├── main │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ ├── quickrun.cpython-39.pyc │ │ ├── run_energy_minimization.cpython-39.pyc │ │ ├── run_equilibration.cpython-39.pyc │ │ ├── run_forcefield_parameterization.cpython-39.pyc │ │ ├── run_simulation.cpython-39.pyc │ │ └── run_solvation.cpython-39.pyc │ ├── quickrun.py │ ├── run_energy_minimization.py │ ├── run_forcefield_parameterization.py │ ├── run_ligand_preparation.py │ ├── run_simulation.py │ └── run_solvation.py │ └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-39.pyc │ ├── config.cpython-39.pyc │ ├── dcd_image.cpython-39.pyc │ ├── fileparser.cpython-39.pyc │ ├── ligand_util.cpython-39.pyc │ ├── simulation.cpython-39.pyc │ └── simulation_util.cpython-39.pyc │ ├── config.py │ ├── dcd_image.py │ ├── fileparser.py │ ├── ligand_util.py │ ├── log.py │ ├── mmpbsa.py │ ├── openmm_structure_analyzer.py │ ├── rmsd_rmsf.py │ └── simulation_util.py └── tests ├── __pycache__ ├── conftest.cpython-39-pytest-8.4.0.pyc ├── test_energy_minimization.cpython-39-pytest-8.4.0.pyc ├── test_run_simulation.cpython-39-pytest-8.4.0.pyc └── test_setup.cpython-39-pytest-8.4.0.pyc └── test_setup.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Ingrid 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |

3 | Feynman quote 6 |

7 | 8 | # EasyMD 9 | 10 | EasyMD is a Python package that simplifies molecular dynamics simulations, making them accessible to both beginners and experts. It provides an automated, easy-to-use interface for running protein-ligand simulations using OpenMM as the backend. 11 | 12 | ![Demo Gif](assets/demo.gif) 13 | 14 | ## Who is it for? 15 | 16 | - **Computational chemists** who want to streamline their MD workflow 17 | - **Structural biologists** studying protein-ligand interactions 18 | - **Drug discovery researchers** analyzing binding dynamics 19 | - **Students and researchers** learning molecular dynamics 20 | - **Anyone** who wants to run MD simulations without dealing with complex setup 21 | 22 | ## Key Features 23 | 24 | - **Automated Setup**: From structure preparation to production runs 25 | - **Integrated Force Fields**: AMBER14 for proteins, OpenFF 2.0.0 for small molecules 26 | - **Flexible Configuration**: Easy to customize simulation parameters 27 | - **Progress Monitoring**: Real-time updates on simulation progress 28 | - **Analysis Tools**: Built-in tools for RMSD, RMSF calculations 29 | 30 | 31 | ## Tutorial 32 | 33 | Follow our tutorial to learn how to use easy-md in quickrun mode. 34 | [Run the interactive tutorial in Google Colab](https://colab.research.google.com/drive/1H7IQ7mrGBOpuUN4-5XS8Vh09pwK3PuwL?usp=sharing) 35 | 36 | ## Prerequisites 37 | 38 | First, install mamba, a fast package manager. You can use conda, but its extremely slow for some packages: 39 | 40 | ```bash 41 | # If you have conda installed: 42 | conda install mamba -n base -c conda-forge 43 | 44 | # Or install mambaforge (standalone): 45 | # For macOS/Linux: 46 | curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" 47 | bash Mambaforge-$(uname)-$(uname -m).sh 48 | 49 | # For Windows: 50 | # Download Mambaforge from: 51 | # https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Windows-x86_64.exe 52 | ``` 53 | 54 | ## Installation 55 | 56 | ### Using mamba (Recommended) 57 | 58 | ```bash 59 | git clone https://github.com/ingcoder/easy-md.git 60 | cd easy-md 61 | mamba env create -f environment.yaml 62 | mamba activate md_env 63 | pip install -e 64 | ``` 65 | 66 | ## Quick Start 67 | 68 | ```python 69 | from easy_md.main.quickrun import quickrun 70 | 71 | # Run a simple protein-ligand simulation 72 | quickrun( 73 | protein_file="path/to/protein.pdb", 74 | ligand_file="path/to/ligand.sdf", 75 | nsteps=1000 76 | ) 77 | ``` 78 | 79 | ## Step-by-Step Approach 80 | ```python 81 | from easy_md.utils.config import create_config 82 | from easy_md.main import run_solvation, run_forcefield_parameterization, run_energy_minimization, run_simulation 83 | 84 | config = create_config( 85 | protein_file="path/to/protein.pdb", 86 | ligand_file="path/to/ligand.sdf", 87 | 88 | # MD simulation settings. See "Simulation Paramters" section below for all options 89 | md_steps=1000, 90 | md_save_interval=10, 91 | 92 | # Platform settings 93 | platform_name="CPU", # or GPU 94 | platform_precision="mixed", # or "single" or "double" 95 | ) 96 | run_solvation.add_water(config=config) 97 | run_forcefield_parameterization.main(config) 98 | run_energy_minimization.main(config) 99 | run_simulation.main(config, starting_state_path="path/to/state.xml") 100 | # By default `run_simulation.main(config)` loads the energy-minimized state 101 | # saved in `emin.xml`. To resume a previous run instead, supply the path 102 | # to its state file .xml starting_state_path="path/to/state.xml" or checkpoint file: starting_state_path="path/to/state.xml": 103 | ``` 104 | 105 | 106 | ## Simulation Parameters 107 | ```yaml 108 | # Energy Minimization Parameters 109 | emin_heating_interval: 1 # Interval for heating during minimization 110 | emin_heating_step: 300 # Heating step size 111 | emin_steps: 10 # Number of minimization steps 112 | emin_target_temp: 300 # Target temperature for minimization (K) 113 | emin_tolerance: 5 # Energy tolerance (kJ/mol/nm) 114 | 115 | # Force Field Parameters 116 | ff_protein: "amber14-all.xml" # Protein force field 117 | ff_protein_openff: "ff14sb_off_impropers_0.0.3.offxml" # OpenFF protein parameters 118 | ff_small_molecule_openff: "openff-2.0.0.offxml" # Small molecule force field 119 | ff_water: "amber14/tip3pfb.xml" # Water model 120 | 121 | # Integrator Settings 122 | integrator_friction: 1.0 # Langevin integrator friction coefficient (1/ps) 123 | integrator_temperature: 300.0 # Simulation temperature (K) 124 | integrator_timestep: 0.002 # Integration timestep (ps) 125 | 126 | # Molecular Dynamics Settings 127 | md_anisotropic: false # Use anisotropic barostat 128 | md_barostat_freq: 25 # Barostat update frequency 129 | md_harmonic_restraint: true # Apply harmonic restraints 130 | md_load_state: true # Load from previous state if available 131 | md_npt: false # Run in NPT ensemble 132 | md_pressure: 1.0 # System pressure (atm) 133 | md_restrained_residues: [] # List of residues to restrain 134 | md_save_interval: 10 # Trajectory save interval 135 | md_steps: 1000 # Number of MD steps 136 | 137 | # Monitoring Parameters 138 | monitor_energy_threshold: 100.0 # Energy monitoring threshold 139 | monitor_temp_threshold: 2.0 # Temperature monitoring threshold 140 | monitor_window: 10 # Monitoring window size 141 | 142 | # Solvation Parameters 143 | solv_box_buffer: 2.5 # Solvent box padding (Å) 144 | solv_ionic_strength: 0.15 # Ionic strength (M) 145 | solv_model: "tip3p" # Water model 146 | solv_negative_ion: "Cl-" # Negative ion type 147 | solv_pH: 7.0 # System pH 148 | solv_positive_ion: "Na+" # Positive ion type 149 | 150 | # Analysis Settings 151 | rmsd_ligand_selection: "resname UNK" # RMSD selection for ligand 152 | rmsd_selection: "protein and name CA" # RMSD selection for protein 153 | rmsf_selection: "protein and name CA" # RMSF selection 154 | 155 | # Platform Configuration 156 | platform_name: "GPU" # Computation platform (GPU/CPU) 157 | platform_precision: "mixed" # Precision model 158 | 159 | # Paths 160 | path_protein: path/to/protein.pdb # Required by user 161 | path_ligand: path/to/ligand.sdf # Required if you want to simulate protein-ligand interaction 162 | 163 | # Paths set automatically, unless provided by user 164 | 165 | path_base: path/to/project_dir # # Defaults to the parent directory containing the protein structure files 166 | 167 | path_amber_topology: path_base/output/amber_top.prmtop 168 | path_emin_state: path_base/output/emin.xml 169 | path_emin_structure: path_base/output/emin.pdb 170 | path_md_checkpoint: path_base/output/md_checkpoint_id.chk 171 | path_md_image: path_base/output/md_image_id.dcd #! Final processed trajectory file with molecules re-centered in water box. 172 | path_md_log: path_base/output/md_id.log 173 | path_md_state: path_base/output/md_state_id.xml 174 | path_md_trajectory: path_base/output/md_trajetory_id.dcd 175 | path_openff_interchange: path_base/output/openff_interchange.pdb 176 | path_openff_topology: path_base/output/openff_topology.json 177 | path_openmm_system: path_base/output/openmm_system.xml 178 | path_openmm_topology: path_base/output/openmm_topology.pkl 179 | path_protein_solvated: path_base/output/protein_solvated.pdb 180 | path_rmsd_ligand_output: path_base/output/rmsd_ligand.pkl 181 | path_rmsd_output: path_base/output/rmsd.pkl 182 | path_rmsf_output: path_base/output/rmsf.log 183 | ``` 184 | 185 | ## Requirements 186 | 187 | Core dependencies are automatically managed through pyproject.toml: 188 | 189 | - Python >=3.9 190 | - OpenMM >=7.7.0 191 | - OpenFF-Toolkit >=0.11.0 192 | - MDAnalysis >=2.4.0 193 | 194 | Additional development dependencies are available in the conda environment.yaml file. 195 | 196 | ## Project Structure 197 | 198 | ``` 199 | my_project/ 200 | ├── config/ 201 | │ └── simulation_config.yaml # Simulation parameters 202 | ├── structures/ 203 | │ ├── protein.pdb # Input protein structure 204 | │ └── ligand.sdf # Input ligand structure 205 | └── output/ # Simulation outputs 206 | ``` 207 | 208 | ## License 209 | 210 | This project is licensed under the MIT License - see the LICENSE file for details. 211 | 212 | ## Contributing 213 | 214 | We welcome contributions! Please feel free to submit a Pull Request. 215 | 216 | ## Citation 217 | 218 | If you use EasyMD in your research, please cite it as: 219 | 220 | ```bibtex 221 | @software{easymd2025, 222 | author = {Ingrid Barbosa-Farias, SimAtomic}, 223 | title = {EasyMD: A Python Package for Simplified Molecular Dynamics Simulations}, 224 | year = {2025}, 225 | publisher = {GitHub}, 226 | url = {https://github.com/ingcoder/easy-md} 227 | } 228 | ``` 229 | -------------------------------------------------------------------------------- /assets/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/assets/demo.gif -------------------------------------------------------------------------------- /assets/feynman_quote.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/assets/feynman_quote.png -------------------------------------------------------------------------------- /assets/feynman_quote_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/assets/feynman_quote_3.png -------------------------------------------------------------------------------- /assets/feynman_quote_cut.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/assets/feynman_quote_cut.png -------------------------------------------------------------------------------- /docs/API.md: -------------------------------------------------------------------------------- 1 | # EasyMD API Documentation 2 | 3 | This document provides detailed information about the key functions and classes in EasyMD. 4 | 5 | ## Main Module Functions 6 | 7 | ### quickrun 8 | 9 | ```python 10 | def quickrun(protein_file: str, ligand_file: str = None, nsteps: int = 1000) -> None 11 | ``` 12 | 13 | Runs a complete molecular dynamics simulation workflow with minimal configuration. 14 | 15 | **Parameters:** 16 | - `protein_file` (str): Path to the input protein PDB file 17 | - `ligand_file` (str, optional): Path to the ligand SDF file 18 | - `nsteps` (int, optional): Number of simulation steps (default: 1000) 19 | 20 | **Example:** 21 | ```python 22 | from easy_md.main.quickrun import quickrun 23 | 24 | quickrun( 25 | protein_file="protein.pdb", 26 | ligand_file="ligand.sdf", 27 | nsteps=1000 28 | ) 29 | ``` 30 | 31 | ### run_solvation.add_water 32 | 33 | ```python 34 | def add_water(config: dict) -> None 35 | ``` 36 | 37 | Solvates the molecular system by adding water molecules and ions. 38 | 39 | **Parameters:** 40 | - `config` (dict): Configuration dictionary containing simulation parameters 41 | 42 | **Key Configuration Parameters:** 43 | - `solv_box_buffer`: Buffer size around the solute (in Å) 44 | - `solv_ionic_strength`: Ionic concentration (in M) 45 | - `solv_positive_ion`: Type of positive ion (Na+, K+, etc.) 46 | - `solv_negative_ion`: Type of negative ion (Cl-, etc.) 47 | 48 | ### run_forcefield_parameterization.main 49 | 50 | ```python 51 | def main(config: dict, print_detailed_info: bool = False) -> None 52 | ``` 53 | 54 | Applies force field parameters to the molecular system. 55 | 56 | **Parameters:** 57 | - `config` (dict): Configuration dictionary 58 | - `print_detailed_info` (bool): Whether to print detailed force field information 59 | 60 | ### run_energy_minimization.main 61 | 62 | ```python 63 | def main(config: dict) -> None 64 | ``` 65 | 66 | Performs energy minimization of the molecular system. 67 | 68 | **Parameters:** 69 | - `config` (dict): Configuration dictionary containing simulation parameters 70 | 71 | ### run_simulation.main 72 | 73 | ```python 74 | def main(config: dict = None, starting_state_path: str = None, starting_checkpoint_path: str = None, equilibration_only: bool = False) -> None 75 | ``` 76 | 77 | Runs the production molecular dynamics simulation. 78 | 79 | **Parameters:** 80 | - `config` (dict): Configuration dictionary 81 | - `starting_state_path` (str, optional): Path to a starting state file 82 | - `starting_checkpoint_path` (str, optional): Path to a checkpoint file 83 | - `equilibration_only` (bool): Whether to run only the equilibration phase 84 | 85 | ## Utility Functions 86 | 87 | ### config.create_config 88 | 89 | ```python 90 | def create_config( 91 | protein_file: str = None, 92 | ligand_file: str = None, 93 | project_dir: str = None, 94 | output_dir: str = None, 95 | config_dir: str = None, 96 | save_config_as: str = "simulation_config.yaml", 97 | **params 98 | ) -> Dict[str, Any] 99 | ``` 100 | 101 | Creates a configuration dictionary with default and user-provided settings. 102 | 103 | **Parameters:** 104 | - `protein_file` (str): Path to the protein PDB file 105 | - `ligand_file` (str, optional): Path to the ligand file 106 | - `project_dir` (str, optional): Project directory path 107 | - `output_dir` (str, optional): Output directory path 108 | - `config_dir` (str, optional): Configuration directory path 109 | - `save_config_as` (str): Name of the configuration file 110 | - `**params`: Additional configuration parameters 111 | 112 | **Returns:** 113 | - Dict[str, Any]: Configuration dictionary 114 | 115 | ### simulation_util.setup_barostat 116 | 117 | ```python 118 | def setup_barostat(temperature: float, pressure: float, barostat_frequency: int, use_anisotropic: bool = False) -> Union[MonteCarloBarostat, MonteCarloAnisotropicBarostat] 119 | ``` 120 | 121 | Configures and returns a barostat for pressure control. 122 | 123 | **Parameters:** 124 | - `temperature` (float): System temperature in Kelvin 125 | - `pressure` (float): Target pressure in atmospheres 126 | - `barostat_frequency` (int): Frequency of barostat updates 127 | - `use_anisotropic` (bool): Whether to use anisotropic pressure coupling 128 | 129 | **Returns:** 130 | - OpenMM Barostat object 131 | 132 | ## Configuration Parameters 133 | 134 | ### Paths 135 | - `path_base`: Base project directory 136 | - `path_protein`: Input protein structure file 137 | - `path_ligand`: Input ligand structure file 138 | - `path_protein_solvated`: Solvated system output 139 | - `path_openff_topology`: OpenFF topology file 140 | - `path_openmm_system`: OpenMM system file 141 | - `path_emin_structure`: Energy minimized structure 142 | - `path_md_trajectory`: Trajectory output file 143 | - `path_md_checkpoint`: Checkpoint file 144 | 145 | ### Force Fields 146 | - `ff_small_molecule_openff`: Small molecule force field 147 | - `ff_protein_openff`: Protein force field 148 | - `ff_protein`: Alternative protein force field 149 | - `ff_water`: Water model 150 | 151 | ### Integrator Settings 152 | - `integrator_temperature`: System temperature (K) 153 | - `integrator_friction`: Friction coefficient (1/ps) 154 | - `integrator_timestep`: Integration time step (ps) 155 | 156 | ### Equilibration Parameters 157 | - `total_steps`: Total number of steps 158 | - `save_interval`: Frequency of saving coordinates 159 | - `pressure_atm`: Target pressure 160 | - `barostat_freq`: Barostat update frequency 161 | 162 | ### Platform Settings 163 | - `platform_name`: Computation platform (CUDA, OpenCL, CPU) 164 | - `platform_properties`: Platform-specific settings -------------------------------------------------------------------------------- /docs/tutorials/Getting_Started.md: -------------------------------------------------------------------------------- 1 | # Getting Started with EasyMD 2 | 3 | This tutorial will guide you through common use cases of EasyMD, from basic protein-ligand simulations to more advanced scenarios. 4 | 5 | ## Basic Protein-Ligand Simulation 6 | 7 | ### 1. Quick Start 8 | 9 | The simplest way to run a simulation is using the `quickrun` function: 10 | 11 | ```python 12 | from easy_md.main.quickrun import quickrun 13 | 14 | quickrun( 15 | protein_file="protein.pdb", 16 | ligand_file="ligand.sdf", 17 | nsteps=1000 18 | ) 19 | ``` 20 | 21 | This will automatically: 22 | 1. Solvate your system 23 | 2. Apply force field parameters 24 | 3. Perform energy minimization 25 | 4. Run a short simulation 26 | 27 | ### 2. Step-by-Step Approach 28 | 29 | For more control over the process, you can run each step individually: 30 | 31 | ```python 32 | from easy_md.main import run_solvation, run_forcefield_parameterization 33 | from easy_md.main import run_energy_minimization, run_simulation 34 | from easy_md.utils.config import create_config 35 | 36 | # Create configuration 37 | config = create_config( 38 | protein_file="protein.pdb", 39 | ligand_file="ligand.sdf", 40 | project_dir="my_simulation" 41 | ) 42 | 43 | # Step 1: Solvate the system 44 | run_solvation.add_water(config=config) 45 | 46 | # Step 2: Apply force fields 47 | run_forcefield_parameterization.main(config) 48 | 49 | # Step 3: Minimize energy 50 | run_energy_minimization.main(config) 51 | 52 | # Step 4: Run simulation 53 | run_simulation.main(config) 54 | ``` 55 | 56 | ## Advanced Usage 57 | 58 | ### 1. Customizing Simulation Parameters 59 | 60 | Create a custom configuration file (`config.yaml`): 61 | 62 | ```yaml 63 | paths: 64 | base_folder: "my_simulation" 65 | ligand: "ligand.sdf" 66 | solvated_protein: "protein_solvated.pdb" 67 | 68 | integrator: 69 | temperature_kelvin: 310 # Body temperature 70 | friction_coeff_ps: 1 71 | time_step_ps: 0.002 72 | 73 | equilibration: 74 | total_steps: 5000000 # 10 ns 75 | save_interval: 50000 # Save every 100 ps 76 | pressure_atm: 1.0 77 | barostat_freq: 25 78 | ``` 79 | 80 | Load and use the custom configuration: 81 | 82 | ```python 83 | import yaml 84 | from easy_md.utils.config import create_config 85 | 86 | # Load custom settings 87 | with open("config.yaml") as f: 88 | custom_settings = yaml.safe_load(f) 89 | 90 | # Create config with custom settings 91 | config = create_config( 92 | protein_file="protein.pdb", 93 | ligand_file="ligand.sdf", 94 | **custom_settings 95 | ) 96 | 97 | # Run simulation with custom settings 98 | run_simulation.main(config) 99 | ``` 100 | 101 | ### 2. Continuing from a Checkpoint 102 | 103 | ```python 104 | from easy_md.main import run_simulation 105 | 106 | # Continue from a checkpoint 107 | run_simulation.main( 108 | config=config, 109 | starting_checkpoint_path="output/md_checkpoint_0.chk" 110 | ) 111 | ``` 112 | 113 | ### 3. Running NPT Equilibration 114 | 115 | For membrane proteins or when volume equilibration is important: 116 | 117 | ```python 118 | config = create_config( 119 | protein_file="protein.pdb", 120 | ligand_file="ligand.sdf", 121 | md_npt=True, 122 | md_pressure=1.0, # atm 123 | md_barostat_freq=25 124 | ) 125 | 126 | run_simulation.main(config) 127 | ``` 128 | 129 | ### 4. Using Position Restraints 130 | 131 | To restrain specific residues during simulation: 132 | 133 | ```python 134 | config = create_config( 135 | protein_file="protein.pdb", 136 | ligand_file="ligand.sdf", 137 | md_harmonic_restraint=True, 138 | md_restrained_residues=[1, 2, 3, 4, 5] # Residue indices to restrain 139 | ) 140 | 141 | run_simulation.main(config) 142 | ``` 143 | 144 | ## Tips and Best Practices 145 | 146 | 1. **System Preparation** 147 | - Always check your input structures 148 | - Ensure proper protonation states 149 | - Remove any unwanted molecules/ions 150 | 151 | 2. **Simulation Length** 152 | - Start with short test runs (1000 steps) 153 | - Increase length for production runs 154 | - Monitor energy and temperature convergence 155 | 156 | 3. **File Management** 157 | - Use descriptive file names 158 | - Keep input files organized 159 | - Save checkpoints regularly 160 | 161 | 4. **Performance Optimization** 162 | - Use CUDA platform when available 163 | - Adjust save intervals based on needs 164 | - Consider periodic boundary conditions 165 | 166 | ## Troubleshooting 167 | 168 | ### Common Issues 169 | 170 | 1. **System Crashes** 171 | ```python 172 | # Reduce time step 173 | config = create_config( 174 | protein_file="protein.pdb", 175 | integrator_timestep=0.001 # Reduced from 0.002 176 | ) 177 | ``` 178 | 179 | 2. **Energy Instability** 180 | ```python 181 | # Add more minimization steps 182 | run_energy_minimization.main(config, max_iterations=5000) 183 | ``` 184 | 185 | 3. **Memory Issues** 186 | ```python 187 | # Reduce trajectory saving frequency 188 | config = create_config( 189 | protein_file="protein.pdb", 190 | md_save_interval=5000 # Save less frequently 191 | ) 192 | ``` 193 | 194 | ### Getting Help 195 | 196 | - Check the [API Documentation](API.md) 197 | - Look for similar issues in the repository 198 | - Contact the developers with: 199 | - Your configuration file 200 | - Error messages 201 | - System details -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- 1 | name: easymd 2 | channels: 3 | - conda-forge 4 | - defaults 5 | 6 | dependencies: 7 | # Core MD packages 8 | - python=3.9 9 | - openmm=8.0.0 10 | - pdbfixer 11 | - mdanalysis 12 | - openff-toolkit 13 | 14 | # Analysis and visualization 15 | - pandas 16 | - matplotlib 17 | 18 | # Python packages to be installed with pip 19 | - pip 20 | - pip: 21 | - notebook 22 | - rdkit -------------------------------------------------------------------------------- /example/4w52_C_EPE.sdf: -------------------------------------------------------------------------------- 1 | EPE 2 | ModelServer 0.9.12 3 | 4 | 15 15 0 0 0 0 0 0 0 0 0 5 | -29.7830 16.9570 13.1410 N 0 0 0 0 0 0 0 0 0 0 0 0 6 | -29.9710 15.4910 13.1910 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | -28.9220 14.8610 14.0940 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | -27.5810 15.2340 13.6930 N 0 0 0 0 0 0 0 0 0 0 0 0 9 | -27.3780 16.6610 13.5030 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | -28.4500 17.2510 12.5850 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | -26.5210 14.5240 14.4230 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | -26.6300 14.5300 15.9510 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | -26.6080 15.8710 16.4190 O 0 0 0 0 0 0 0 0 0 0 0 0 14 | -30.8290 17.5120 12.2540 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | -30.7540 19.0320 12.3550 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | -32.3280 19.7980 11.8950 S 0 0 0 0 0 0 0 0 0 0 0 0 17 | -32.2980 21.2130 12.2660 O 0 0 0 0 0 0 0 0 0 0 0 0 18 | -32.4970 19.6480 10.4440 O 0 0 0 0 0 0 0 0 0 0 0 0 19 | -33.3810 19.0720 12.6100 O 0 0 0 0 0 0 0 0 0 0 0 0 20 | 1 2 1 0 0 0 0 21 | 1 6 1 0 0 0 0 22 | 1 10 1 0 0 0 0 23 | 2 3 1 0 0 0 0 24 | 3 4 1 0 0 0 0 25 | 4 5 1 0 0 0 0 26 | 4 7 1 0 0 0 0 27 | 5 6 1 0 0 0 0 28 | 7 8 1 0 0 0 0 29 | 8 9 1 0 0 0 0 30 | 10 11 1 0 0 0 0 31 | 11 12 1 0 0 0 0 32 | 12 13 2 0 0 0 0 33 | 12 14 2 0 0 0 0 34 | 12 15 1 0 0 0 0 35 | M END 36 | > 37 | nGCHuIlqOPrIxoP4lpNJjA 38 | 39 | > 40 | 2025-06-03 01:48:06 41 | 42 | > 43 | 0.9.12 44 | 45 | > 46 | ligand 47 | 48 | > 49 | pdb-bcif 50 | 51 | > 52 | 4w52 53 | 54 | > 55 | atom_site 56 | 57 | > 58 | {"label_asym_id":"C","auth_seq_id":201} 59 | 60 | > 61 | 31 62 | 63 | > 64 | 8 65 | 66 | > 67 | 2 68 | 69 | > 70 | 254 71 | 72 | > 73 | 0 74 | 75 | > 76 | 15 77 | 78 | $$$$ 79 | -------------------------------------------------------------------------------- /notebooks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/notebooks/__init__.py -------------------------------------------------------------------------------- /notebooks/example_tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Example Notebook easy-md\n", 8 | "\n", 9 | "Molecular dynamics (MD) simulations are essential tools in computational biology and drug discovery, but setting them up can be challenging and time-consuming. easy-md simplifies this process by providing a streamlined, user-friendly interface to OpenMM, one of the most powerful MD engines available.\n", 10 | "\n", 11 | "## Who is this for?\n", 12 | "- **Computational chemists** and **structural biologists** who want to quickly set up and run protein-ligand simulations\n", 13 | "- **Drug discovery scientists** who need to evaluate ligand binding stability without dealing with complex MD setup\n", 14 | "- **Academic researchers** and **students** new to molecular dynamics who want to focus on the science rather than technical details\n", 15 | "- **Method developers** who need a reliable baseline for comparing simulation protocols\n", 16 | "\n", 17 | "## Why easy-md?\n", 18 | "Traditional MD setup often requires:\n", 19 | "- Deep knowledge of multiple file formats and force fields\n", 20 | "- Manual parameter tweaking and system preparation\n", 21 | "- Complex scripting to combine different tools\n", 22 | "- Extensive debugging of setup issues\n", 23 | "\n", 24 | "easy-md solves these pain points by:\n", 25 | "- Providing a single, coherent interface for the entire workflow\n", 26 | "- Automating force field selection and parameter assignment\n", 27 | "- Handling common setup tasks (missing atoms, solvation, etc.)\n", 28 | "- Including sensible defaults based on best practices\n", 29 | "- Offering both quick-start (`quickrun`) and detailed control options\n", 30 | "\n", 31 | "## What makes it different?\n", 32 | "Unlike existing solutions, easy-md:\n", 33 | "- Requires minimal input (just protein and ligand structures)\n", 34 | "- Produces standardized, reproducible outputs\n", 35 | "- Maintains flexibility for advanced users while being accessible to beginners\n", 36 | "- Focuses specifically on protein-ligand systems common in drug discovery\n", 37 | "\n", 38 | "In this notebook, we'll demonstrate these features by simulating a protein-ligand system:\n", 39 | "- Protein: PDB ID 4W52\n", 40 | "- Ligand: EPE (4-ethylpiperazin-1-ylethanesulfonic acid)\n", 41 | "- Simulation length: 1000 steps\n", 42 | "- Force field: AMBER14 for protein, OpenFF 2.0.0 for ligand\n", 43 | "- Water model: TIP3P\n", 44 | "\n", 45 | "## Workflow Overview\n", 46 | "1. System setup and configuration\n", 47 | "2. Solvation (adding water and ions)\n", 48 | "3. Force field parameterization\n", 49 | "4. Energy minimization\n", 50 | "5. Production MD simulation" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 1, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "name": "stdout", 60 | "output_type": "stream", 61 | "text": [ 62 | "Saving configuration to: /Users/ingrid/Projects/EasyMD/easy-md/example/config/simulation_config.yaml\n", 63 | "\n", 64 | "=== Simulation Configuration ===\n", 65 | "\n", 66 | "\n", 67 | "EMIN:\n", 68 | " emin_heating_interval: 1\n", 69 | " emin_heating_step: 300\n", 70 | " emin_steps: 10\n", 71 | " emin_target_temp: 300\n", 72 | " emin_tolerance: 5 * kilojoule_per_mole / nanometer\n", 73 | "\n", 74 | "FF:\n", 75 | " ff_protein: amber14-all.xml\n", 76 | " ff_protein_openff: ff14sb_off_impropers_0.0.3.offxml\n", 77 | " ff_small_molecule_openff: openff-2.0.0.offxml\n", 78 | " ff_water: amber14/tip3pfb.xml\n", 79 | "\n", 80 | "INTEGRATOR:\n", 81 | " integrator_friction: 1.0\n", 82 | " integrator_temperature: 300.0\n", 83 | " integrator_timestep: 0.002\n", 84 | "\n", 85 | "MD:\n", 86 | " md_anisotropic: False\n", 87 | " md_barostat_freq: 25\n", 88 | " md_harmonic_restraint: True\n", 89 | " md_load_state: True\n", 90 | " md_npt: False\n", 91 | " md_pressure: 1.0\n", 92 | " md_restrained_residues: []\n", 93 | " md_save_interval: 10\n", 94 | " md_steps: 1000\n", 95 | "\n", 96 | "MONITOR:\n", 97 | " monitor_energy_threshold: 100.0\n", 98 | " monitor_temp_threshold: 2.0\n", 99 | " monitor_window: 10\n", 100 | "\n", 101 | "PATH:\n", 102 | " path_amber_topology: /Users/ingrid/Projects/EasyMD/easy-md/example/output/amber_top.prmtop\n", 103 | " path_base: /Users/ingrid/Projects/EasyMD/easy-md/example\n", 104 | " path_emin_state: /Users/ingrid/Projects/EasyMD/easy-md/example/output/emin.xml\n", 105 | " path_emin_structure: /Users/ingrid/Projects/EasyMD/easy-md/example/output/emin.pdb\n", 106 | " path_ligand: /Users/ingrid/Projects/EasyMD/easy-md/example/4w52_C_EPE.sdf\n", 107 | " path_md_checkpoint: /Users/ingrid/Projects/EasyMD/easy-md/example/output/md_checkpoint_id.chk\n", 108 | " path_md_image: /Users/ingrid/Projects/EasyMD/easy-md/example/output/md_image_id.dcd\n", 109 | " path_md_log: /Users/ingrid/Projects/EasyMD/easy-md/example/output/md_id.log\n", 110 | " path_md_state: /Users/ingrid/Projects/EasyMD/easy-md/example/output/md_state_id.xml\n", 111 | " path_md_trajectory: /Users/ingrid/Projects/EasyMD/easy-md/example/output/md_trajetory_id.dcd\n", 112 | " path_openff_interchange: /Users/ingrid/Projects/EasyMD/easy-md/example/output/openff_interchange.pdb\n", 113 | " path_openff_topology: /Users/ingrid/Projects/EasyMD/easy-md/example/output/openff_topology.json\n", 114 | " path_openmm_system: /Users/ingrid/Projects/EasyMD/easy-md/example/output/openmm_system.xml\n", 115 | " path_openmm_topology: /Users/ingrid/Projects/EasyMD/easy-md/example/output/openmm_topology.pkl\n", 116 | " path_protein: /Users/ingrid/Projects/EasyMD/easy-md/example/4W52.pdb\n", 117 | " path_protein_solvated: /Users/ingrid/Projects/EasyMD/easy-md/example/output/protein_solvated.pdb\n", 118 | " path_rmsd_ligand_output: /Users/ingrid/Projects/EasyMD/easy-md/example/output/rmsd_ligand.pkl\n", 119 | " path_rmsd_output: /Users/ingrid/Projects/EasyMD/easy-md/example/output/rmsd.pkl\n", 120 | " path_rmsf_output: /Users/ingrid/Projects/EasyMD/easy-md/example/output/rmsf.log\n", 121 | "\n", 122 | "PLATFORM:\n", 123 | " platform_name: CPU\n", 124 | " platform_precision: mixed\n", 125 | "\n", 126 | "RMSD:\n", 127 | " rmsd_ligand_selection: resname UNK\n", 128 | " rmsd_selection: protein and name CA\n", 129 | "\n", 130 | "RMSF:\n", 131 | " rmsf_selection: protein and name CA\n", 132 | "\n", 133 | "SOLV:\n", 134 | " solv_box_buffer: 2.5\n", 135 | " solv_ionic_strength: 0.15\n", 136 | " solv_model: tip3p\n", 137 | " solv_negative_ion: Cl-\n", 138 | " solv_pH: 7.0\n", 139 | " solv_positive_ion: Na+\n", 140 | "\n", 141 | "Found 18 missing heavy atoms - adding them now...\n", 142 | "Adding missing hydrogens...\n", 143 | "\n", 144 | "Final Box Dimensions (nanometers):\n", 145 | "Width (X-axis): 9.05017375946045\n", 146 | "Height (Y-axis): 6.664648711681366\n", 147 | "Depth (Z-axis): 7.645820617675781\n", 148 | "Adding solvent...\n", 149 | "Saved solvated structure to: /Users/ingrid/Projects/EasyMD/easy-md/example/output/protein_solvated.pdb\n" 150 | ] 151 | }, 152 | { 153 | "name": "stderr", 154 | "output_type": "stream", 155 | "text": [ 156 | "[08:15:48] Warning: molecule is tagged as 2D, but at least one Z coordinate is not zero. Marking the mol as 3D.\n" 157 | ] 158 | }, 159 | { 160 | "name": "stdout", 161 | "output_type": "stream", 162 | "text": [ 163 | "\n", 164 | "=== Creating OpenFF Topology For Solvated Protein-Ligand Complex ===\n", 165 | "Removed [H][O][H] molecule\n", 166 | "Removed [H][O][H] molecule\n", 167 | "Removed [H][O][H] molecule\n", 168 | "Removed [H][O][H] molecule\n", 169 | "Removed [H][O][H] molecule\n", 170 | "Removed [H][O][H] molecule\n", 171 | "Found a large molecule with 2770 atoms, which might be a protein or polymer.\n", 172 | "Done! File saved to /Users/ingrid/Projects/EasyMD/easy-md/example/output/openff_topology.json. File includes protein and ligand.\n", 173 | "Function 'create_openff_topology' took 6.0384 seconds to execute\n", 174 | "\n", 175 | "=== Parameterizing OpenFF System ===\n", 176 | "Done! OpenFF Interchange created.\n", 177 | "Function 'parameterize_openff_system' took 83.4053 seconds to execute\n", 178 | "\n", 179 | "=== Converting to OpenMM System and Topology ===\n", 180 | "\n", 181 | "System Consistency Check:\n", 182 | "Number of particles in Interchange: 43885\n", 183 | "Number of particles in OpenMM System: 43885\n", 184 | "Number of particles in OpenMM Topology: 43885\n", 185 | "\n", 186 | "✓ Particle count is consistent across all representations\n", 187 | "Done! Files saved to:\n", 188 | "OpenFF Interchange: /Users/ingrid/Projects/EasyMD/easy-md/example/output/openff_interchange.pdb\n", 189 | "OpenMM Topology: /Users/ingrid/Projects/EasyMD/easy-md/example/output/openmm_topology.pkl\n", 190 | "OpenMM System: /Users/ingrid/Projects/EasyMD/easy-md/example/output/openmm_system.xml\n", 191 | "Function 'save_openmm_system_topology' took 36.1092 seconds to execute\n", 192 | "Function 'main' took 164.8703 seconds to execute\n", 193 | "Platform being used: CPU\n", 194 | "\n", 195 | "=== Initial Energy Minimization ===\n", 196 | "Initial minimized state has energy -740305.97 kJ/mol with maximum force 2543.1 kJ/(mol nm)\n", 197 | "\n", 198 | "=== Gradual Heating Process ===\n", 199 | "Heating at 0K has energy -740303.04 kJ/mol with maximum force 2544.33 kJ/(mol nm)\n", 200 | "Heating at 300K has energy -735945.03 kJ/mol with maximum force 2855.51 kJ/(mol nm)\n", 201 | "\n", 202 | "=== Final Equilibration at 300K ===\n", 203 | "\n", 204 | "=== Final Energy Minimization ===\n", 205 | "Final minimized state has energy -740964.28 kJ/mol with maximum force 2558.7 kJ/(mol nm)\n", 206 | "Files saved\n", 207 | "Function 'main' took 64.1066 seconds to execute\n", 208 | "Adding harmonic positional restraints with force constant 100 kJ/mol/nm^2...\n", 209 | "Added positional restraints to 172 heavy atoms\n", 210 | "Restraints per chain: {'A': 172}\n", 211 | "Platform being used: CPU\n", 212 | "No starting state or checkpoint provided. Using /Users/ingrid/Projects/EasyMD/easy-md/example/output/emin.xml\n", 213 | "Successfully loaded state from /Users/ingrid/Projects/EasyMD/easy-md/example/output/emin.xml\n", 214 | "\n", 215 | "=== Simulation ===\n", 216 | "#\"Progress (%)\"\t\"Step\"\t\"Time (ps)\"\t\"Potential Energy (kJ/mole)\"\t\"Temperature (K)\"\t\"Box Volume (nm^3)\"\t\"Speed (ns/day)\"\n", 217 | "1.0%\t10\t0.020000000000000004\t-687245.6771970796\t158.3849711859441\t461.16378929373604\t0\n", 218 | "2.0%\t20\t0.04000000000000002\t-684556.863750027\t162.06188405072132\t461.16378929373604\t1.55\n", 219 | "3.0%\t30\t0.06000000000000004\t-682512.7872637622\t161.085942793739\t461.16378929373604\t2.1\n", 220 | "4.0%\t40\t0.08000000000000006\t-678888.1389281786\t157.29060224339202\t461.16378929373604\t2.08\n", 221 | "5.0%\t50\t0.10000000000000007\t-675858.7328551698\t154.0621616028922\t461.16378929373604\t2.07\n", 222 | "6.0%\t60\t0.12000000000000009\t-673922.0259149924\t155.30875852069295\t461.16378929373604\t2.22\n", 223 | "7.0%\t70\t0.1400000000000001\t-672501.7573181267\t157.3182001380063\t461.16378929373604\t2.29\n", 224 | "8.0%\t80\t0.16000000000000011\t-671281.3800127131\t159.21608456345822\t461.16378929373604\t2.36\n", 225 | "9.0%\t90\t0.18000000000000013\t-670534.2868309194\t162.70161081076657\t461.16378929373604\t2.36\n", 226 | "10.0%\t100\t0.20000000000000015\t-670521.1444745564\t168.1735627505904\t461.16378929373604\t2.45\n", 227 | "11.0%\t110\t0.22000000000000017\t-669570.9320585289\t170.8137221365629\t461.16378929373604\t2.47\n", 228 | "12.0%\t120\t0.24000000000000019\t-668919.5598695717\t173.9342926131363\t461.16378929373604\t2.52\n", 229 | "13.0%\t130\t0.2600000000000002\t-668346.2053486005\t177.39745636423402\t461.16378929373604\t2.52\n", 230 | "14.0%\t140\t0.2800000000000002\t-667889.1411243968\t180.70571592155162\t461.16378929373604\t2.52\n", 231 | "15.0%\t150\t0.3000000000000002\t-666697.6126957997\t182.31769179536732\t461.16378929373604\t2.53\n", 232 | "16.0%\t160\t0.32000000000000023\t-665423.0333995527\t183.8182438385848\t461.16378929373604\t2.58\n", 233 | "17.0%\t170\t0.34000000000000025\t-665095.7429981803\t187.22647609204105\t461.16378929373604\t2.55\n", 234 | "18.0%\t180\t0.36000000000000026\t-663689.2249053663\t187.81225110221638\t461.16378929373604\t2.53\n", 235 | "19.0%\t190\t0.3800000000000003\t-663320.4535261746\t191.52764080663292\t461.16378929373604\t2.56\n", 236 | "20.0%\t200\t0.4000000000000003\t-662559.1070479071\t194.07511115436387\t461.16378929373604\t2.53\n", 237 | "21.0%\t210\t0.4200000000000003\t-661210.7546091119\t194.9321885223048\t461.16378929373604\t2.5\n", 238 | "22.0%\t220\t0.44000000000000034\t-660301.2694453005\t196.58825056675448\t461.16378929373604\t2.52\n", 239 | "23.0%\t230\t0.46000000000000035\t-659893.6275862535\t199.54579230764026\t461.16378929373604\t2.53\n", 240 | "24.0%\t240\t0.48000000000000037\t-659409.97548537\t201.65611141592174\t461.16378929373604\t2.56\n", 241 | "25.0%\t250\t0.5000000000000003\t-658424.1123671602\t202.93086212023368\t461.16378929373604\t2.56\n", 242 | "26.0%\t260\t0.5200000000000004\t-657877.5277255446\t206.0687519636778\t461.16378929373604\t2.56\n", 243 | "27.0%\t270\t0.5400000000000004\t-656864.8757256516\t206.6993817407389\t461.16378929373604\t2.58\n", 244 | "28.0%\t280\t0.5600000000000004\t-655866.8247287408\t208.2718121229176\t461.16378929373604\t2.59\n", 245 | "29.0%\t290\t0.5800000000000004\t-655396.7156868107\t210.44365284947088\t461.16378929373604\t2.58\n", 246 | "30.0%\t300\t0.6000000000000004\t-654575.8811563747\t211.2506730238044\t461.16378929373604\t2.6\n", 247 | "31.0%\t310\t0.6200000000000004\t-653705.2070242384\t212.78381552479487\t461.16378929373604\t2.6\n", 248 | "32.0%\t320\t0.6400000000000005\t-653254.3966647245\t215.31701513278975\t461.16378929373604\t2.6\n", 249 | "33.0%\t330\t0.6600000000000005\t-652080.8731968096\t215.03078474165585\t461.16378929373604\t2.62\n", 250 | "34.0%\t340\t0.6800000000000005\t-651973.8708222874\t218.27705036705552\t461.16378929373604\t2.6\n", 251 | "35.0%\t350\t0.7000000000000005\t-651066.0424241053\t218.54494581588705\t461.16378929373604\t2.51\n", 252 | "36.0%\t360\t0.7200000000000005\t-649888.7281007727\t218.1359232615218\t461.16378929373604\t2.5\n", 253 | "37.0%\t370\t0.7400000000000005\t-649559.5447972179\t221.05797384461158\t461.16378929373604\t2.5\n", 254 | "38.0%\t380\t0.7600000000000006\t-649142.5529243209\t222.5694796668022\t461.16378929373604\t2.5\n", 255 | "39.0%\t390\t0.7800000000000006\t-647975.4944884803\t222.5041454143707\t461.16378929373604\t2.49\n", 256 | "40.0%\t400\t0.8000000000000006\t-647638.5229987161\t225.3591836298202\t461.16378929373604\t2.51\n", 257 | "41.0%\t410\t0.8200000000000006\t-646868.7580508392\t226.99481341348044\t461.16378929373604\t2.51\n", 258 | "42.0%\t420\t0.8400000000000006\t-646364.6163487327\t228.33483002112985\t461.16378929373604\t2.53\n", 259 | "43.0%\t430\t0.8600000000000007\t-646075.1252082065\t230.23617544620177\t461.16378929373604\t2.52\n", 260 | "44.0%\t440\t0.8800000000000007\t-645440.5669153205\t231.94717568649529\t461.16378929373604\t2.53\n", 261 | "45.0%\t450\t0.9000000000000007\t-644624.313030838\t233.12744212215026\t461.16378929373604\t2.53\n", 262 | "46.0%\t460\t0.9200000000000007\t-643694.866748954\t233.44861943600057\t461.16378929373604\t2.54\n", 263 | "47.0%\t470\t0.9400000000000007\t-642841.432570169\t233.63384175827645\t461.16378929373604\t2.54\n", 264 | "48.0%\t480\t0.9600000000000007\t-642903.4694090917\t236.71625609092683\t461.16378929373604\t2.55\n", 265 | "49.0%\t490\t0.9800000000000008\t-641476.2953428248\t235.67246075483453\t461.16378929373604\t2.55\n", 266 | "50.0%\t500\t1.0000000000000007\t-641652.7103167303\t238.82355041252185\t461.16378929373604\t2.54\n", 267 | "51.0%\t510\t1.0200000000000007\t-641305.4769422774\t240.22195070964926\t461.16378929373604\t2.51\n", 268 | "52.0%\t520\t1.0400000000000007\t-640341.8790109688\t239.32956337025942\t461.16378929373604\t2.52\n", 269 | "53.0%\t530\t1.0600000000000007\t-640085.4947319749\t240.47406700562652\t461.16378929373604\t2.52\n", 270 | "54.0%\t540\t1.0800000000000007\t-639733.5349140759\t242.09340857790428\t461.16378929373604\t2.52\n", 271 | "55.0%\t550\t1.1000000000000008\t-639197.8369709394\t243.54728265465099\t461.16378929373604\t2.53\n", 272 | "56.0%\t560\t1.1200000000000008\t-638384.3947297352\t242.869218919828\t461.16378929373604\t2.53\n", 273 | "57.0%\t570\t1.1400000000000008\t-638947.9564094341\t245.88373912157323\t461.16378929373604\t2.53\n", 274 | "58.0%\t580\t1.1600000000000008\t-637762.7881218983\t244.0847864458416\t461.16378929373604\t2.55\n", 275 | "59.0%\t590\t1.1800000000000008\t-637580.3995361135\t245.7883812015987\t461.16378929373604\t2.54\n", 276 | "60.0%\t600\t1.2000000000000008\t-637027.0227395778\t246.2326467992223\t461.16378929373604\t2.56\n", 277 | "61.0%\t610\t1.2200000000000009\t-636235.3482408065\t246.47209496903974\t461.16378929373604\t2.55\n", 278 | "62.0%\t620\t1.2400000000000009\t-635700.4696633808\t247.22415458112525\t461.16378929373604\t2.56\n", 279 | "63.0%\t630\t1.260000000000001\t-635881.1654009096\t249.87351600981813\t461.16378929373604\t2.56\n", 280 | "64.0%\t640\t1.280000000000001\t-634849.7613213202\t249.0471381461793\t461.16378929373604\t2.56\n", 281 | "65.0%\t650\t1.300000000000001\t-634723.2363226824\t250.6601601619171\t461.16378929373604\t2.55\n", 282 | "66.0%\t660\t1.320000000000001\t-634014.6191930667\t250.2974806392958\t461.16378929373604\t2.56\n", 283 | "67.0%\t670\t1.340000000000001\t-633327.386527703\t250.37384998301494\t461.16378929373604\t2.54\n", 284 | "68.0%\t680\t1.360000000000001\t-633756.740305737\t253.07703054410214\t461.16378929373604\t2.54\n", 285 | "69.0%\t690\t1.380000000000001\t-632603.9884927486\t252.29772523504062\t461.16378929373604\t2.54\n", 286 | "70.0%\t700\t1.400000000000001\t-631878.9259594147\t252.6431521146993\t461.16378929373604\t2.55\n", 287 | "71.0%\t710\t1.420000000000001\t-631562.7912904532\t253.66952653024367\t461.16378929373604\t2.55\n", 288 | "72.0%\t720\t1.440000000000001\t-630414.5209297839\t252.36351514123893\t461.16378929373604\t2.56\n", 289 | "73.0%\t730\t1.460000000000001\t-630681.7332607186\t254.65533861206185\t461.16378929373604\t2.56\n", 290 | "74.0%\t740\t1.480000000000001\t-630707.4105771349\t256.19226500265074\t461.16378929373604\t2.55\n", 291 | "75.0%\t750\t1.500000000000001\t-630224.6923731533\t256.96537619014646\t461.16378929373604\t2.54\n", 292 | "76.0%\t760\t1.5200000000000011\t-629881.7752764086\t257.955876883782\t461.16378929373604\t2.55\n", 293 | "77.0%\t770\t1.5400000000000011\t-629134.1943663077\t257.3194695409244\t461.16378929373604\t2.55\n", 294 | "78.0%\t780\t1.5600000000000012\t-629035.2953172476\t257.8971806436101\t461.16378929373604\t2.55\n", 295 | "79.0%\t790\t1.5800000000000012\t-628412.873366147\t258.65347211898666\t461.16378929373604\t2.55\n", 296 | "80.0%\t800\t1.6000000000000012\t-628337.1262265679\t259.347375875849\t461.16378929373604\t2.54\n", 297 | "81.0%\t810\t1.6200000000000012\t-627437.0994978311\t259.13519251413084\t461.16378929373604\t2.53\n", 298 | "82.0%\t820\t1.6400000000000012\t-627631.6621973449\t261.1775936819364\t461.16378929373604\t2.52\n", 299 | "83.0%\t830\t1.6600000000000013\t-626987.8130238287\t260.8281433154896\t461.16378929373604\t2.5\n", 300 | "84.0%\t840\t1.6800000000000013\t-627067.4588844598\t262.69951923933155\t461.16378929373604\t2.51\n", 301 | "85.0%\t850\t1.7000000000000013\t-626494.1887315101\t262.14667928971085\t461.16378929373604\t2.51\n", 302 | "86.0%\t860\t1.7200000000000013\t-626136.977423235\t262.79030412230594\t461.16378929373604\t2.51\n", 303 | "87.0%\t870\t1.7400000000000013\t-626043.4968024947\t264.62208685671266\t461.16378929373604\t2.51\n", 304 | "88.0%\t880\t1.7600000000000013\t-625364.6200717108\t265.0995318488803\t461.16378929373604\t2.52\n", 305 | "89.0%\t890\t1.7800000000000014\t-625622.4900780444\t266.88885891903425\t461.16378929373604\t2.52\n", 306 | "90.0%\t900\t1.8000000000000014\t-624322.7179508441\t265.3191938086389\t461.16378929373604\t2.52\n", 307 | "91.0%\t910\t1.8200000000000014\t-623990.6284065398\t265.24133943845277\t461.16378929373604\t2.53\n", 308 | "92.0%\t920\t1.8400000000000014\t-623713.6441985337\t265.9818459051203\t461.16378929373604\t2.52\n", 309 | "93.0%\t930\t1.8600000000000014\t-623608.6367595821\t267.1016569179414\t461.16378929373604\t2.53\n", 310 | "94.0%\t940\t1.8800000000000014\t-623186.6980777871\t267.3770176447112\t461.16378929373604\t2.53\n", 311 | "95.0%\t950\t1.9000000000000015\t-623404.6058917871\t268.44890436006347\t461.16378929373604\t2.52\n", 312 | "96.0%\t960\t1.9200000000000015\t-622505.9128304343\t267.2173191539767\t461.16378929373604\t2.53\n", 313 | "97.0%\t970\t1.9400000000000015\t-622660.2113017081\t269.0655374669412\t461.16378929373604\t2.52\n", 314 | "98.0%\t980\t1.9600000000000015\t-621886.9059693508\t268.832438401663\t461.16378929373604\t2.52\n", 315 | "99.0%\t990\t1.9800000000000015\t-621763.2894402687\t270.2479560806148\t461.16378929373604\t2.52\n", 316 | "100.0%\t1000\t2.0000000000000013\t-621636.7540183021\t270.523573806602\t461.16378929373604\t2.52\n", 317 | "Done! Saving state and image\n", 318 | "load traj\n", 319 | "Start creating image\n", 320 | "Done! File saved to /Users/ingrid/Projects/EasyMD/easy-md/example/output/md_image_id_0.dcd\n", 321 | "Done! File saved to /Users/ingrid/Projects/EasyMD/easy-md/example/output/md_state_id_0.xml\n", 322 | "Trajectory saved to /Users/ingrid/Projects/EasyMD/easy-md/example/output/md_trajetory_id_1.dcd\n", 323 | "Checkpoint saved to /Users/ingrid/Projects/EasyMD/easy-md/example/output/md_checkpoint_id_1.chk\n", 324 | "Log saved to /Users/ingrid/Projects/EasyMD/easy-md/example/output/md_id_1.log\n", 325 | "Function 'main' took 76.1632 seconds to execute\n" 326 | ] 327 | } 328 | ], 329 | "source": [ 330 | "from easy_md.main.quickrun import quickrun\n", 331 | "\n", 332 | "base_dir = \"/Users/ingrid/Projects/EasyMD/easy-md/example\"\n", 333 | "quickrun(protein_file = f\"{base_dir}/4W52.pdb\", ligand_file = f\"{base_dir}/4w52_C_EPE.sdf\", nsteps=1000)" 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "## 2. Step-by-Step Approach\n", 341 | "\n", 342 | "The quickrun attempt shows us the complete workflow. Let's break down the process into individual steps for better understanding and control. We'll use the core functions directly.\n", 343 | "\n", 344 | "System Configuration\n", 345 | "1. The configuration step sets up all the parameters needed for the simulation\n", 346 | "\n", 347 | "System SolvationIn \n", 348 | "1. Add missing atoms and hydrogens to the protein structure\n", 349 | "2. Create a water box around the protein-ligand complex\n", 350 | "3. Add ions to neutralize the system and achieve the desired ionic strength\n", 351 | "\n", 352 | "Force Field Parameterization\n", 353 | "1. Creating OpenFF topology\n", 354 | "2. Parameterizing the system\n", 355 | "3. Converting to OpenMM format\n", 356 | "\n", 357 | "Energy Minimization\n", 358 | "1. Initial minimization\n", 359 | "2. Gradual heating to target temperature (300K)\n", 360 | "3. Final minimization at target temperature\n", 361 | "This ensures a stable starting point for the production simulation.\n", 362 | "\n", 363 | "MD Simulation\n", 364 | "1. Harmonic restraints on protein heavy atoms (optional)\n", 365 | "2. Constant temperature (NVT ensemble)\n", 366 | "3. 1000 steps of simulation\n", 367 | "4. Periodic output of coordinates and system state\n", 368 | "\n", 369 | "The simulation progress shows:\n", 370 | "- Potential energy\n", 371 | "- Temperature\n", 372 | "- Box volume\n", 373 | "- Simulation speed\n", 374 | "\n", 375 | "## Conclusion\n", 376 | "\n", 377 | "The simulation has successfully completed, generating:\n", 378 | "1. Trajectory file (DCD format)\n", 379 | "2. Checkpoint files for restart\n", 380 | "3. Log files with simulation data\n", 381 | "4. System state files\n", 382 | "\n", 383 | "These files can be used for further analysis of the protein-ligand system dynamics." 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": null, 389 | "metadata": {}, 390 | "outputs": [], 391 | "source": [ 392 | "from easy_md.utils.config import create_config\n", 393 | "from easy_md.main import run_solvation, run_forcefield_parameterization, run_energy_minimization, run_simulation\n", 394 | "\n", 395 | "base_dir = \"/Users/ingrid/Projects/EasyMD/easy-md/example\"\n", 396 | "config = create_config(\n", 397 | " protein_file=f\"{base_dir}/4W52.pdb\",\n", 398 | " ligand_file=f\"{base_dir}/4w52_C_EPE.sdf\",\n", 399 | " \n", 400 | " # MD Simulation settings\n", 401 | " md_steps=1000,\n", 402 | " md_save_interval=10,\n", 403 | " \n", 404 | " # Platform settings\n", 405 | " platform_name=\"CPU\", # or \"CUDA\" for GPU\n", 406 | " platform_precision=\"mixed\", # or \"single\" or \"double\"\n", 407 | ")\n", 408 | "run_solvation.add_water(config=config)\n", 409 | "run_forcefield_parameterization.main(config)\n", 410 | "run_energy_minimization.main(config)\n", 411 | "run_simulation.main(config, \n", 412 | " starting_state_path=\"/Users/ingrid/Projects/EasyMD/my_simulation/output/emin.xml\")" 413 | ] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "metadata": {}, 418 | "source": [] 419 | } 420 | ], 421 | "metadata": { 422 | "kernelspec": { 423 | "display_name": "easymd", 424 | "language": "python", 425 | "name": "python3" 426 | }, 427 | "language_info": { 428 | "codemirror_mode": { 429 | "name": "ipython", 430 | "version": 3 431 | }, 432 | "file_extension": ".py", 433 | "mimetype": "text/x-python", 434 | "name": "python", 435 | "nbconvert_exporter": "python", 436 | "pygments_lexer": "ipython3", 437 | "version": "3.9.22" 438 | } 439 | }, 440 | "nbformat": 4, 441 | "nbformat_minor": 2 442 | } 443 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "easy-md" 7 | version = "0.1.0" 8 | description = "A package for molecular dynamics simulations" 9 | readme = "README.md" 10 | requires-python = ">=3.9" 11 | license = {file = "LICENSE"} 12 | authors = [ 13 | {name = "EasyMD Developer", email = "developer@example.com"}, 14 | ] 15 | classifiers = [ 16 | "Programming Language :: Python :: 3", 17 | "License :: OSI Approved :: MIT License", 18 | "Operating System :: OS Independent", 19 | ] 20 | dependencies = [ 21 | "openmm>=7.7.0", 22 | "openff-toolkit>=0.11.0", 23 | "mdanalysis>=2.4.0", 24 | "mdtraj>=1.9.7", 25 | "pytraj>=2.0.6", 26 | "numpy>=1.23.5", 27 | "scipy>=1.9.3", 28 | "pandas>=1.5.3", 29 | "tqdm>=4.64.1", 30 | "pyyaml>=6.0", 31 | "h5py>=3.7.0", 32 | ] 33 | 34 | [project.optional-dependencies] 35 | dev = [ 36 | "black>=22.12", 37 | "flake8>=6.0", 38 | "pytest>=7.0", 39 | "pytest-cov>=4.0", 40 | "jupyter", 41 | "ipywidgets>=8.0", 42 | ] 43 | 44 | test = [ 45 | "pytest>=7.0", 46 | "pytest-cov>=4.0", 47 | "pytest-mock>=3.10.0", 48 | "pytest-timeout>=2.1.0", 49 | ] 50 | 51 | [project.urls] 52 | Repository = "" # Add your repository URL here 53 | 54 | [tool.hatch.build.targets.wheel] 55 | packages = ["src/easy_md"] 56 | 57 | [project.scripts] 58 | easy-md = "easy_md.cli:main" 59 | 60 | [tool.pytest.ini_options] 61 | testpaths = ["tests"] 62 | python_files = ["test_*.py"] 63 | addopts = "-v --cov=easy_md --cov-report=term-missing" 64 | timeout = 300 -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/easy_md/main/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/easy_md/main/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/src/easy_md/main/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/easy_md/main/__pycache__/quickrun.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/src/easy_md/main/__pycache__/quickrun.cpython-39.pyc -------------------------------------------------------------------------------- /src/easy_md/main/__pycache__/run_energy_minimization.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/src/easy_md/main/__pycache__/run_energy_minimization.cpython-39.pyc -------------------------------------------------------------------------------- /src/easy_md/main/__pycache__/run_equilibration.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/src/easy_md/main/__pycache__/run_equilibration.cpython-39.pyc -------------------------------------------------------------------------------- /src/easy_md/main/__pycache__/run_forcefield_parameterization.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/src/easy_md/main/__pycache__/run_forcefield_parameterization.cpython-39.pyc -------------------------------------------------------------------------------- /src/easy_md/main/__pycache__/run_simulation.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/src/easy_md/main/__pycache__/run_simulation.cpython-39.pyc -------------------------------------------------------------------------------- /src/easy_md/main/__pycache__/run_solvation.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/src/easy_md/main/__pycache__/run_solvation.cpython-39.pyc -------------------------------------------------------------------------------- /src/easy_md/main/quickrun.py: -------------------------------------------------------------------------------- 1 | from easy_md.main import run_solvation, run_forcefield_parameterization, run_energy_minimization, run_simulation 2 | from easy_md.utils.config import create_config 3 | 4 | def quickrun(protein_file, ligand_file=None, nsteps=1000, **kwargs): 5 | """Run a quick molecular dynamics simulation with customizable parameters. 6 | 7 | Args: 8 | protein_file (str): Path to protein PDB file 9 | ligand_file (str, optional): Path to ligand file. Defaults to None. 10 | nsteps (int, optional): Number of MD steps. Defaults to 1000. 11 | **kwargs: Additional configuration parameters. Can include any parameter supported by create_config: 12 | 13 | Platform Settings: 14 | platform_name (str): Platform for computation. Default: "CPU". Options: ['CPU', 'CUDA'] 15 | platform_precision (str): Precision mode. Default: "mixed". Options: ['single', 'mixed', 'double'] 16 | 17 | Integrator Settings: 18 | integrator_temperature (float): Temperature in Kelvin. Default: 300.0 19 | integrator_friction (float): Friction coefficient in ps^-1. Default: 1.0 20 | integrator_timestep (float): Time step in ps. Default: 0.002 21 | 22 | Solvation Settings: 23 | solv_box_buffer (float): Buffer size in angstroms. Default: 2.5 24 | solv_ionic_strength (float): Ionic strength in molar. Default: 0.15 25 | solv_positive_ion (str): Type of positive ion. Default: "Na+" 26 | solv_negative_ion (str): Type of negative ion. Default: "Cl-" 27 | solv_model (str): Water model. Default: "tip3p" 28 | solv_pH (float): pH of the solvent. Default: 7.0 29 | 30 | MD Simulation Settings: 31 | md_save_interval (int): Save interval for trajectory. Default: 10 32 | md_pressure (float): Pressure in atmospheres. Default: 1.0 33 | md_anisotropic (bool): Use anisotropic pressure. Default: False 34 | md_barostat_freq (int): Barostat frequency. Default: 25 35 | md_harmonic_restraint (bool): Use harmonic restraints. Default: True 36 | md_load_state (bool): Load previous state if available. Default: True 37 | md_restrained_residues (list): List of residues to restrain. Default: [] 38 | md_npt (bool): Use NPT ensemble. Default: False 39 | 40 | And many more - see create_config documentation for full list. 41 | """ 42 | # Create configuration with all provided parameters 43 | config = create_config( 44 | protein_file=protein_file, 45 | ligand_file=ligand_file, 46 | md_steps=nsteps, 47 | **kwargs 48 | ) 49 | 50 | # Run the simulation pipeline 51 | run_solvation.add_water(config=config) 52 | run_forcefield_parameterization.main(config) 53 | run_energy_minimization.main(config) 54 | run_simulation.main(config) -------------------------------------------------------------------------------- /src/easy_md/main/run_energy_minimization.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script performs energy minimization and equilibration of a molecular system using OpenMM. 3 | It loads a pre-prepared system and topology, performs energy minimization, gradual heating, 4 | equilibration, and saves the final minimized structure as pdb and xml openMM state files. 5 | """ 6 | 7 | # Standard library imports 8 | import numpy as np 9 | from openff.toolkit import Topology 10 | 11 | # OpenMM imports 12 | import openmm 13 | from openmm.app import PDBFile 14 | from openmm.unit import kelvin 15 | 16 | # Custom imports 17 | from easy_md.utils import fileparser 18 | from easy_md.utils import simulation_util 19 | from easy_md.utils.fileparser import time_tracker 20 | 21 | 22 | # -------------------------------------------------------------------------- 23 | # Helper Functions 24 | # -------------------------------------------------------------------------- 25 | def energy_force_post_simulation(simulation): 26 | """Calculates and prints the potential energy and top 10 force magnitudes after simulation.""" 27 | potential_energy = simulation.context.getState(getEnergy=True).getPotentialEnergy() 28 | # Calculate the magnitude of the force vectors and find the maximum 29 | forces = simulation.context.getState(getForces=True).getForces() # This is a list of Vec3 objects 30 | max_force_magnitude = [(f.x**2 + f.y**2 + f.z**2)**0.5 for f in forces] 31 | top_10_force_magnitudes = np.sort(max_force_magnitude)[-10:] 32 | 33 | print(f"Potential Energy after Minimization: {potential_energy}") 34 | print(f"Maximum Force Magnitude after Minimization: {top_10_force_magnitudes}") 35 | 36 | def describe_state(state: openmm.State, name: str = "State"): 37 | """Prints the potential energy and maximum force for a given state.""" 38 | max_force = max(np.sqrt(v.x**2 + v.y**2 + v.z**2) for v in state.getForces()) 39 | print( 40 | f"{name} has energy {round(state.getPotentialEnergy()._value, 2)} kJ/mol " 41 | f"with maximum force {round(max_force, 2)} kJ/(mol nm)" 42 | ) 43 | 44 | def save_min_structure(simulation, emin_pdb_output, emin_xml_output): 45 | """Saves the minimized structure as PDB and XML files.""" 46 | positions = simulation.context.getState(getPositions=True).getPositions() 47 | PDBFile.writeFile(simulation.topology, positions, open(emin_pdb_output, 'w')) 48 | simulation.saveState(emin_xml_output) 49 | print('Files saved') 50 | 51 | # -------------------------------------------------------------------------- 52 | # Main Simulation Setup 53 | # -------------------------------------------------------------------------- 54 | @time_tracker 55 | def main(config): 56 | """ 57 | Performs energy minimization and equilibration of the molecular system. 58 | 59 | Args: 60 | config (dict): Configuration dictionary with flat structure using prefixed keys 61 | 62 | Returns: 63 | openmm.app.Simulation: The simulation object after minimization and equilibration 64 | """ 65 | 66 | # Load files using the flat config structure 67 | off_top = Topology.from_json(open(config['path_openff_topology']).read()) 68 | omm_system, omm_top, _ = fileparser.load_files(config['path_openmm_system'], config['path_openmm_topology']) 69 | 70 | # Set up simulation using flat config structure 71 | emin_simulation = simulation_util.setup_simulation( 72 | omm_system, 73 | omm_top, 74 | config['platform_name'], 75 | {'Precision': config['platform_precision']}, 76 | config['integrator_temperature'], 77 | config['integrator_friction'], 78 | config['integrator_timestep'] 79 | ) 80 | 81 | emin_simulation.context.setPositions(off_top.get_positions().to_openmm()) 82 | 83 | # Initial minimization before heating 84 | print("\n=== Initial Energy Minimization ===") 85 | emin_simulation.minimizeEnergy() 86 | initial_state = emin_simulation.context.getState(getEnergy=True, getForces=True) 87 | describe_state(initial_state, "Initial minimized state") 88 | 89 | # Gradual heating process to prevent energy spikes and to let the system naturally adapt to new temperature. 90 | print("\n=== Gradual Heating Process ===") 91 | for temp in range(0, config['emin_target_temp'] + 1, config['emin_heating_step']): 92 | emin_simulation.context.setVelocitiesToTemperature(temp * kelvin) 93 | emin_simulation.step(config['emin_heating_interval']) 94 | current_state = emin_simulation.context.getState(getEnergy=True, getForces=True) 95 | describe_state(current_state, f"Heating at {temp}K") 96 | 97 | # Final equilibration 98 | print(f"\n=== Final Equilibration at {config['emin_target_temp']}K ===") 99 | emin_simulation.step(config['emin_steps']) 100 | 101 | # Final minimization. Adjusts atomic positions to reduce energy without considering time evolution. 102 | print("\n=== Final Energy Minimization ===") 103 | emin_simulation.minimizeEnergy() 104 | final_state = emin_simulation.context.getState(getEnergy=True, getForces=True) 105 | describe_state(final_state, "Final minimized state") 106 | 107 | save_min_structure(emin_simulation, config['path_emin_structure'], config['path_emin_state']) 108 | 109 | #------------------------------------------------------------------------------ 110 | # Main Execution 111 | #------------------------------------------------------------------------------ 112 | if __name__== '__main__': 113 | main() 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /src/easy_md/main/run_forcefield_parameterization.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module handles the creation and parameterization of molecular systems for OpenMM simulations. 3 | It performs the following key tasks: 4 | 1. Loads and processes protein and ligand structures combined in a single complex 5 | 2. Creates OpenFF topology for the protein-ligand complex 6 | 3. Parameterizes the complex system using OpenFF forcefields 7 | 4. Converts and validates OpenMM system and topology and saves PDB from OpenFF Topology 8 | """ 9 | 10 | # Standard library imports 11 | from typing import Iterable 12 | import pickle 13 | import warnings 14 | import numpy as np 15 | 16 | # OpenMM imports 17 | from openmm import unit, XmlSerializer 18 | from openmm.app import PDBFile 19 | 20 | # OpenFF imports 21 | from openff.toolkit import Molecule, Topology, ForceField 22 | from openff.units import Quantity, unit 23 | 24 | # Costum imports 25 | from easy_md.utils.fileparser import time_tracker 26 | from easy_md.utils import ligand_util 27 | 28 | # -------------------------------------------------------------------------- 29 | # Helper Functions 30 | # -------------------------------------------------------------------------- 31 | def check_for_large_molecules(topology, atom_count_threshold=100): 32 | """Sanity check if there are any large molecules in the topology, 33 | which might indicate the presence of proteins or polymers.""" 34 | for molecule in topology.molecules: 35 | if len(molecule.atoms) > atom_count_threshold: 36 | print(f"Found a large molecule with {len(molecule.atoms)} atoms, which might be a protein or polymer.") 37 | return True 38 | print("No large molecules found that could indicate a protein or polymer.") 39 | return False 40 | 41 | def load_files(pdb_solv_file, sdf_file): 42 | try: 43 | openff_protein_top = Topology.from_pdb(pdb_solv_file) 44 | except Exception as e: 45 | print(e) 46 | 47 | if sdf_file == "": 48 | openff_ligand_mol = None 49 | else: 50 | openff_ligand_mol = Molecule.from_file(sdf_file) 51 | 52 | 53 | return openff_protein_top, openff_ligand_mol 54 | 55 | def validate_system_consistency(interchange, omm_system, omm_topology, pdb_file, print_detailed_info=False): 56 | """ 57 | Validate that the number of particles is consistent across different representations 58 | of the system. 59 | 60 | Args: 61 | interchange: OpenFF Interchange object 62 | omm_system: OpenMM System object 63 | omm_topology: OpenMM Topology object 64 | """ 65 | # Get particle counts from different representations 66 | interchange_n_particles = len(interchange.positions) 67 | system_n_particles = omm_system.getNumParticles() 68 | topology_n_particles = omm_topology.getNumAtoms() 69 | 70 | # Hide just that exact message (case-sensitive) 71 | warnings.filterwarnings( 72 | "ignore", 73 | message=r"WARNING: duplicate atom", # regex allowed 74 | module="openmm" # safer than silencing everything 75 | ) 76 | 77 | # Read the saved PDB file and get its particle count 78 | pdb = PDBFile(pdb_file) 79 | pdb_n_particles = pdb.topology.getNumAtoms() 80 | 81 | print("\nSystem Consistency Check:") 82 | print(f"Number of particles in Interchange: {interchange_n_particles}") 83 | print(f"Number of particles in OpenMM System: {system_n_particles}") 84 | print(f"Number of particles in OpenMM Topology: {topology_n_particles}") 85 | # print(f"Number of particles in saved PDB: {pdb_n_particles}") 86 | 87 | if print_detailed_info: 88 | # Additional diagnostic information 89 | print("\nDetailed System Information:") 90 | # Check for NaN or undefined coordinates in interchange 91 | if hasattr(interchange, 'positions'): 92 | nan_coords = np.isnan(interchange.positions).any(axis=1) 93 | if nan_coords.any(): 94 | print(f"Warning: Found {nan_coords.sum()} particles with NaN coordinates in interchange") 95 | 96 | # Compare residue counts 97 | pdb_n_residues = pdb.topology.getNumResidues() 98 | omm_n_residues = omm_topology.getNumResidues() 99 | print(f"\nResidue counts:") 100 | print(f"OpenMM Topology: {omm_n_residues}") 101 | print(f"PDB file: {pdb_n_residues}") 102 | 103 | # Compare chain counts 104 | pdb_n_chains = pdb.topology.getNumChains() 105 | omm_n_chains = omm_topology.getNumChains() 106 | print(f"\nChain counts:") 107 | print(f"OpenMM Topology: {omm_n_chains}") 108 | print(f"PDB file: {pdb_n_chains}") 109 | 110 | # Check for ligand (UNK residue) 111 | omm_unk_residues = [res for res in omm_topology.residues() if res.name == 'UNK'] 112 | pdb_unk_residues = [res for res in pdb.topology.residues() if res.name == 'UNK'] 113 | print("\nLigand (UNK) Information:") 114 | print(f"Number of UNK residues in OpenMM Topology: {len(omm_unk_residues)}") 115 | print(f"Number of UNK residues in PDB: {len(pdb_unk_residues)}") 116 | 117 | if omm_unk_residues and pdb_unk_residues: 118 | # Compare atom counts in UNK residues 119 | omm_unk_atoms = sum(len(list(res.atoms())) for res in omm_unk_residues) 120 | pdb_unk_atoms = sum(len(list(res.atoms())) for res in pdb_unk_residues) 121 | print(f"Number of atoms in UNK residues (OpenMM): {omm_unk_atoms}") 122 | print(f"Number of atoms in UNK residues (PDB): {pdb_unk_atoms}") 123 | else: 124 | if not omm_unk_residues: 125 | print("Warning: No UNK residue found in OpenMM Topology!") 126 | if not pdb_unk_residues: 127 | print("Warning: No UNK residue found in PDB file!") 128 | 129 | # Count water molecules 130 | pdb_waters = sum(1 for res in pdb.topology.residues() if res.name == 'HOH') 131 | omm_waters = sum(1 for res in omm_topology.residues() if res.name == 'HOH') 132 | print(f"\nWater molecule counts:") 133 | print(f"OpenMM Topology: {omm_waters}") 134 | print(f"PDB file: {pdb_waters}") 135 | 136 | # Check consistency across all representations 137 | if not (interchange_n_particles == system_n_particles == topology_n_particles): 138 | raise ValueError( 139 | "Inconsistent number of particles found!\n" 140 | f"Interchange: {interchange_n_particles}\n" 141 | f"OpenMM System: {system_n_particles}\n" 142 | f"OpenMM Topology: {topology_n_particles}\n" 143 | # f"Interchange PDB: {pdb_n_particles} {pdb_file}\n\n" 144 | "Please check the detailed system information above for potential causes." 145 | ) 146 | else: 147 | print("\n✓ Particle count is consistent across all representations") 148 | 149 | return True 150 | 151 | 152 | # -------------------------------------------------------------------------- 153 | # Create OpenFF Topology For Solvated Protein-Ligand Complex 154 | # -------------------------------------------------------------------------- 155 | @time_tracker 156 | def create_openff_topology( 157 | protein_topology: Topology, 158 | ligand_mol: Molecule, 159 | output_path: str, 160 | radius: Quantity = 2.5 * unit.angstrom, 161 | keep: Iterable[Molecule] = [], 162 | ) -> Topology: 163 | """Create complex topology by combining protein and ligand while handling clashes.""" 164 | 165 | print("\n=== Creating OpenFF Topology For Solvated Protein-Ligand Complex ===") 166 | # If no ligand is provided, use the protein topology. No need to check for clashes with ligand. 167 | if ligand_mol is None: 168 | if check_for_large_molecules(protein_topology): 169 | with open(output_path, "w") as f: 170 | print(protein_topology.to_json(), file=f) 171 | print(f"Done! File saved to {output_path}. No ligand was provided, only protein was used.") 172 | return 173 | 174 | new_top_mols = [] 175 | ligand_coordinates = ligand_mol.conformers[0][:, None, :] 176 | 177 | for molecule in protein_topology.molecules: 178 | # Keep molecules that are in the keep list 179 | if any(keep_mol.is_isomorphic_with(molecule) for keep_mol in keep): 180 | new_top_mols.append(molecule) 181 | continue 182 | 183 | if len(molecule.atoms) >= 50: 184 | new_top_mols.append(molecule) 185 | continue 186 | 187 | # For molecules not in the keep list, e.g. water molecules, 188 | # check if they are too close to the ligand. If they are, remove them. 189 | molecule_coordinates = molecule.conformers[0][None, :, :] 190 | diff_matrix = molecule_coordinates - ligand_coordinates 191 | working_unit = unit.nanometer 192 | distance_matrix = ( 193 | np.linalg.norm(diff_matrix.m_as(working_unit), axis=-1) * working_unit 194 | ) 195 | 196 | # If the molecule is not too close to the ligand, keep it. 197 | if distance_matrix.min() > radius: 198 | new_top_mols.append(molecule) 199 | else: 200 | print(f"Removed {molecule.to_smiles()} molecule") 201 | 202 | # Add the ligand to the topology at the end 203 | new_top_mols.append(ligand_mol) 204 | 205 | # Create the new OpenFF Topology from the list of molecules 206 | new_top = Topology.from_molecules(new_top_mols) 207 | new_top.box_vectors = protein_topology.box_vectors 208 | 209 | # Check if there are any large molecules in the topology 210 | if check_for_large_molecules(new_top): 211 | with open(output_path, "w") as f: 212 | print(new_top.to_json(), file=f) 213 | print(f"Done! File saved to {output_path}. File includes protein and ligand.") 214 | else: 215 | raise ValueError("Protein was removed from topology. Topology saving failed.") 216 | 217 | # -------------------------------------------------------------------------- 218 | # Parameterize OpenFF System. Create OpenMM System and Topology 219 | # -------------------------------------------------------------------------- 220 | @time_tracker 221 | def save_openmm_system_topology(interchange, 222 | openff_interchange_path: str, 223 | openmm_topology_path: str, 224 | openmm_system_path: str, 225 | print_detailed_info): 226 | 227 | print("\n=== Converting to OpenMM System and Topology ===") 228 | try: 229 | interchange.to_pdb(openff_interchange_path) 230 | 231 | omm_top = interchange.to_openmm_topology() 232 | with open(openmm_topology_path, 'wb') as f: 233 | pickle.dump(omm_top, f) 234 | 235 | omm_system = interchange.to_openmm() 236 | 237 | with open(openmm_system_path, 'w') as xml_file: 238 | xml_file.write(XmlSerializer.serialize(omm_system)) 239 | 240 | # Perform system consistency validation 241 | validate_system_consistency(interchange, omm_system, omm_top, openff_interchange_path, print_detailed_info) 242 | print("Done! Files saved to:") 243 | print(f"OpenFF Interchange: {openff_interchange_path}") 244 | print(f"OpenMM Topology: {openmm_topology_path}") 245 | print(f"OpenMM System: {openmm_system_path}") 246 | except Exception as e: 247 | raise e 248 | 249 | return omm_system, omm_top 250 | 251 | @time_tracker 252 | def parameterize_openff_system(openff_topology_path: str): 253 | """Create and parameterize OpenFF system from topology.""" 254 | 255 | print("\n=== Parameterizing OpenFF System ===") 256 | try: 257 | with open(openff_topology_path, 'r') as file: 258 | json_string = file.read() 259 | top = Topology.from_json(json_string) 260 | 261 | sage_ff14sb = ForceField("openff-2.2.0.offxml", # ← ligand parameters 262 | "ff14sb_off_impropers_0.0.3.offxml") # ← protein parameters 263 | 264 | interchange = sage_ff14sb.create_interchange(top) 265 | print("Done! OpenFF Interchange created.") 266 | return interchange 267 | except Exception as e: 268 | raise e 269 | 270 | @time_tracker 271 | def main(config, print_detailed_info=False): 272 | """Main execution function to create and parameterize the system.""" 273 | 274 | # Check if ligand is provided in config. 275 | if config['path_ligand'] == "": 276 | ligand_mol = None 277 | else: 278 | ligand_mol = ligand_util.prepare_ligand_from_sdf(config['path_ligand']) 279 | 280 | protein_top_object = None # Initialize first 281 | try: 282 | protein_top_object = Topology.from_pdb(config['path_protein_solvated']) 283 | except Exception as e: 284 | print(f"Error loading protein topology: {e}") 285 | return # Exit the function if we can't load the protein topology 286 | 287 | create_openff_topology(protein_top_object, ligand_mol, config['path_openff_topology']) 288 | 289 | interchange = parameterize_openff_system(config['path_openff_topology']) 290 | 291 | save_openmm_system_topology(interchange, 292 | config['path_openff_interchange'], 293 | config['path_openmm_topology'], 294 | config['path_openmm_system'], 295 | print_detailed_info) 296 | 297 | if __name__ == "__main__": 298 | main() 299 | 300 | 301 | 302 | -------------------------------------------------------------------------------- /src/easy_md/main/run_ligand_preparation.py: -------------------------------------------------------------------------------- 1 | import utils 2 | utils.prepare_ligand_for_md(input_file, output_file) -------------------------------------------------------------------------------- /src/easy_md/main/run_simulation.py: -------------------------------------------------------------------------------- 1 | """This script performs simulation of a molecular system using OpenMM.""" 2 | 3 | # Standard library imports 4 | from collections import deque 5 | from pathlib import Path 6 | 7 | # Custom imports 8 | from easy_md.utils import fileparser, simulation_util, dcd_image 9 | from easy_md.utils.fileparser import time_tracker 10 | 11 | # -------------------------------------------------------------------------- 12 | # Helper Functions 13 | # -------------------------------------------------------------------------- 14 | def next_free_state(path: str) -> str: 15 | """Return a file name with an incremented _N postfix. 16 | 17 | Rules 18 | ----- 19 | 1. If has no postfix -> create _0, _1, … until free. 20 | 2. If ends in _N (integer) -> bump N until the name is free. 21 | """ 22 | p = Path(path) 23 | parent = p.parent 24 | parent.mkdir(parents=True, exist_ok=True) # ensure folder exists 25 | 26 | stem, suffix = p.stem, p.suffix # 'md_state_1', '.xml' 27 | 28 | # Split at the *last* underscore 29 | if '_' in stem and stem.rsplit('_', 1)[1].isdigit(): 30 | core, num = stem.rsplit('_', 1) 31 | num = int(num) + 1 # start with next integer 32 | else: 33 | core, num = stem, 0 # start fresh at _0 34 | 35 | # Bump until we find a non-existing file 36 | while True: 37 | new_path = parent / f'{core}_{num}{suffix}' 38 | if not new_path.exists(): 39 | return str(new_path) 40 | num += 1 41 | 42 | # -------------------------------------------------------------------------- 43 | # Main Simulation Setup 44 | # -------------------------------------------------------------------------- 45 | @time_tracker 46 | def main(config=None, starting_state_path=None, starting_checkpoint_path=None, equilibration_only=False): 47 | """Runs NVT equilibration with monitoring of convergence.""" 48 | 49 | # -------------------------------------------------------------------------- 50 | # Setup simulation 51 | # -------------------------------------------------------------------------- 52 | omm_system, omm_top, off_top = fileparser.load_files(config['path_openmm_system'], config['path_openmm_topology']) 53 | # simulation_util.print_constraint_info(omm_system, omm_top) # Uncomment to print constraint information 54 | 55 | if config['md_npt']: 56 | # Set up barostat using flat config structure 57 | barostat = simulation_util.setup_barostat( 58 | config['integrator_temperature'], 59 | config['md_pressure'], 60 | config['md_barostat_freq'], 61 | config['md_anisotropic'] 62 | ) 63 | omm_system.addForce(barostat) 64 | 65 | if config['md_harmonic_restraint']: 66 | force_restraints = simulation_util.setup_force_restraints(reference_structure=config['path_emin_structure'], 67 | residue_indices=config['md_restrained_residues']) 68 | omm_system.addForce(force_restraints) 69 | 70 | # Set up simulation using flat config structure 71 | simulation = simulation_util.setup_simulation( 72 | omm_system, 73 | omm_top, 74 | config['platform_name'], 75 | {'Precision': config['platform_precision']}, 76 | config['integrator_temperature'], 77 | config['integrator_friction'], 78 | config['integrator_timestep'] 79 | ) 80 | 81 | # -------------------------------------------------------------------------- 82 | # Load state or checkpoint and setup reporters 83 | # -------------------------------------------------------------------------- 84 | if starting_state_path is None and starting_checkpoint_path is None: 85 | starting_state_path = config['path_emin_state'] 86 | print(f"No starting state or checkpoint provided. Using {starting_state_path}") 87 | 88 | simulation = simulation_util.load_state_or_checkpoint( 89 | simulation, 90 | temp=config['integrator_temperature'], 91 | state_file=starting_state_path, 92 | checkpoint_file=starting_checkpoint_path 93 | ) 94 | 95 | path_md_image = next_free_state(config['path_md_image']) 96 | path_md_trajectory = next_free_state(config['path_md_trajectory']) 97 | path_md_checkpoint = next_free_state(config['path_md_checkpoint']) 98 | path_md_log = next_free_state(config['path_md_log']) 99 | 100 | 101 | # Set up reporters 102 | simulation_util.setup_reporters( 103 | simulation, 104 | path_md_log, 105 | path_md_trajectory, 106 | path_md_checkpoint, 107 | config['md_save_interval'], 108 | config['md_steps'] 109 | ) 110 | 111 | # -------------------------------------------------------------------------- 112 | # Run Equilibration 113 | # -------------------------------------------------------------------------- 114 | 115 | # Initialize monitoring queues 116 | temp_window = deque(maxlen=config['monitor_window']) 117 | energy_window = deque(maxlen=config['monitor_window']) 118 | 119 | if equilibration_only: 120 | print("\n=== Equilibration ===") 121 | for step in range(0, config['md_steps'], config['md_save_interval']): 122 | simulation.step(config['md_save_interval']) 123 | 124 | # Stops equilibration if temperature and energy are within thresholds 125 | if simulation_util.check_equilibration( 126 | simulation, 127 | config['monitor_temp_threshold'], 128 | config['monitor_energy_threshold'], 129 | temp_window, 130 | energy_window, 131 | config['monitor_window'] 132 | ): 133 | break 134 | else: 135 | print("\n=== Simulation ===") 136 | simulation.step(config['md_steps']) 137 | 138 | print("Done! Saving state and image") 139 | simulation.saveState(path_md_image) 140 | dcd_image.image_molecules(path_md_trajectory, config['path_openff_interchange'], path_md_image) 141 | 142 | print(f"Done! File saved to {next_free_state(config['path_md_state'])}") 143 | print(f"Trajectory saved to {next_free_state(config['path_md_trajectory'])}") 144 | print(f"Checkpoint saved to {next_free_state(config['path_md_checkpoint'])}") 145 | print(f"Log saved to {next_free_state(config['path_md_log'])}") 146 | 147 | # -------------------------------------------------------------------------- 148 | # Main Execution 149 | # -------------------------------------------------------------------------- 150 | if __name__ == '__main__': 151 | main() -------------------------------------------------------------------------------- /src/easy_md/main/run_solvation.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script loads a PDB file, calculates the bounding box dimensions, and solvates the system using OpenMM. 3 | 4 | ### Overview: 5 | 1. **Load PDB file**: Reads the protein structure from a PDB file. 6 | 2. **Compute bounding box**: Determines the dimensions of the protein. 7 | 3. **Define simulation box**: Adds a buffer and sets up box vectors. 8 | 4. **Add solvent**: Uses OpenMM's `Modeller` to solvate the system. 9 | 5. **Save solvated structure**: Outputs the solvated system to a PDB file. 10 | 11 | ### Parameters: 12 | - `PDB_FILE`: Path to the input PDB file. 13 | - `SOLVATED_FILE`: Path to save the solvated PDB file. 14 | - `BUFFER`: Extra space added to the bounding box for solvation (default: 2.5 nm). 15 | - `IONIC_STRENGTH`: The ionic concentration for solvation (default: 0.15 M). 16 | - `FORCEFIELD_FILES`: Force field files used for modeling the system. 17 | 18 | ### Usage: 19 | Run the script: 20 | ```bash 21 | python script.py 22 | """ 23 | 24 | from openmm.app import * 25 | from openmm import * 26 | from openmm.unit import nanometer as nm, molar 27 | from openmm.app import PDBFile, Modeller, ForceField 28 | import numpy as np 29 | from pdbfixer import PDBFixer 30 | from openmm.app import PDBFile, Modeller, ForceField 31 | 32 | 33 | def add_water(config): 34 | # Initialize PDBFixer 35 | fixer = PDBFixer(filename=config.get('path_protein')) 36 | 37 | # First find and add missing residues 38 | fixer.findMissingResidues() 39 | 40 | # Find missing atoms 41 | fixer.findMissingAtoms() 42 | n_missing_heavy = sum(len(v) for v in fixer.missingAtoms.values()) 43 | 44 | if n_missing_heavy > 0: 45 | print(f"Found {n_missing_heavy} missing heavy atoms - adding them now...") 46 | fixer.addMissingAtoms() 47 | print("Adding missing hydrogens...") 48 | fixer.addMissingHydrogens(pH=config.get('solv_pH')) 49 | else: 50 | print("No missing heavy atoms found") 51 | print("Adding missing hydrogens...") 52 | fixer.addMissingHydrogens(pH=config.get('solv_pH')) 53 | 54 | # Create Modeller instance from fixed structure 55 | modeller = Modeller(fixer.topology, fixer.positions) 56 | 57 | # Extract positions and convert to numpy array 58 | positions = np.array([[atom.x, atom.y, atom.z] for atom in modeller.positions.value_in_unit(nm)]) 59 | 60 | # Calculate the min and max along each axis 61 | min_coords = np.min(positions, axis=0) 62 | max_coords = np.max(positions, axis=0) 63 | box_dimensions = max_coords - min_coords 64 | 65 | # Get forcefield from config 66 | forcefield = ForceField(config.get('ff_protein'), config.get('ff_water')) 67 | 68 | # Define box dimensions 69 | x_dimension = box_dimensions[0] + config.get('solv_box_buffer') 70 | y_dimension = box_dimensions[1] + config.get('solv_box_buffer') 71 | z_dimension = box_dimensions[2] + config.get('solv_box_buffer') 72 | 73 | print("\nFinal Box Dimensions (nanometers):") 74 | print("Width (X-axis):", x_dimension) 75 | print("Height (Y-axis):", y_dimension) 76 | print("Depth (Z-axis):", z_dimension) 77 | 78 | box_vecs = ( 79 | Vec3(x_dimension, 0, 0) * nm, 80 | Vec3(0, y_dimension, 0) * nm, 81 | Vec3(0, 0, z_dimension) * nm 82 | ) 83 | 84 | print('Adding solvent...') 85 | try: 86 | modeller.addSolvent(forcefield, 87 | boxVectors=box_vecs, 88 | ionicStrength=config.get('solv_ionic_strength')*molar, 89 | positiveIon=config.get('solv_positive_ion'), 90 | negativeIon=config.get('solv_negative_ion'), 91 | model=config.get('solv_model', 'tip3p')) 92 | except Exception as e: 93 | print(f"Error adding solvent: {e}") 94 | raise 95 | 96 | with open(config.get('path_protein_solvated'), 'w') as file: 97 | PDBFile.writeFile(modeller.topology, modeller.positions, file) 98 | print(f"Saved solvated structure to: {config.get('path_protein_solvated')}") 99 | 100 | if __name__ == "__main__": 101 | add_water() 102 | 103 | -------------------------------------------------------------------------------- /src/easy_md/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/src/easy_md/utils/__init__.py -------------------------------------------------------------------------------- /src/easy_md/utils/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/src/easy_md/utils/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/easy_md/utils/__pycache__/config.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/src/easy_md/utils/__pycache__/config.cpython-39.pyc -------------------------------------------------------------------------------- /src/easy_md/utils/__pycache__/dcd_image.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/src/easy_md/utils/__pycache__/dcd_image.cpython-39.pyc -------------------------------------------------------------------------------- /src/easy_md/utils/__pycache__/fileparser.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/src/easy_md/utils/__pycache__/fileparser.cpython-39.pyc -------------------------------------------------------------------------------- /src/easy_md/utils/__pycache__/ligand_util.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/src/easy_md/utils/__pycache__/ligand_util.cpython-39.pyc -------------------------------------------------------------------------------- /src/easy_md/utils/__pycache__/simulation.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/src/easy_md/utils/__pycache__/simulation.cpython-39.pyc -------------------------------------------------------------------------------- /src/easy_md/utils/__pycache__/simulation_util.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/src/easy_md/utils/__pycache__/simulation_util.cpython-39.pyc -------------------------------------------------------------------------------- /src/easy_md/utils/config.py: -------------------------------------------------------------------------------- 1 | # In your package (e.g., src/easy_md/config.py) 2 | import yaml 3 | import os 4 | from pathlib import Path 5 | from typing import Dict, Any, Optional 6 | 7 | # Default configuration values - using prefixes for grouping 8 | DEFAULT_CONFIG = { 9 | # Paths 10 | "path_base": "", # Will be set in create_config 11 | "path_protein": "", # Will be set in create_config 12 | "path_ligand": "", # Will be set in create_config 13 | "path_protein_solvated": "", # Will be set in create_config 14 | "path_openff_topology": "", # Will be set in create_config 15 | "path_openff_interchange": "", # Will be set in create_config 16 | "path_openmm_topology": "", # Will be set in create_config 17 | "path_openmm_system": "", # Will be set in create_config 18 | "path_emin_structure": "", # Will be set in create_config 19 | "path_emin_state": "", # Will be set in create_config 20 | "path_md_log": "", # Will be set in create_config 21 | "path_md_trajectory": "", # Will be set in create_config 22 | "path_md_checkpoint": "", # Will be set in create_config 23 | "path_md_state": "", # Will be set in create_config 24 | "path_rmsd_output": "", # Will be set in create_config 25 | "path_rmsd_ligand_output": "", # Will be set in create_config 26 | "path_rmsf_output": "", # Will be set in create_config 27 | "path_amber_topology": "", # Will be set in create_config 28 | 29 | # Forcefields 30 | "ff_small_molecule_openff": "openff-2.0.0.offxml", 31 | "ff_protein_openff": "ff14sb_off_impropers_0.0.3.offxml", 32 | "ff_protein": "amber14-all.xml", 33 | "ff_water": "amber14/tip3pfb.xml", 34 | 35 | # Integrator 36 | "integrator_temperature": 300.0, 37 | "integrator_friction": 1.0, 38 | "integrator_timestep": 0.002, 39 | 40 | # Solvation 41 | "solv_box_buffer": 2.5, # angstroms 42 | "solv_ionic_strength": 0.15, # molar 43 | "solv_positive_ion": "Na+", # the type of positive ion to add. Allowed values are 'Cs+', 'K+', 'Li+', 'Na+', and 'Rb+' 44 | "solv_negative_ion": "Cl-", # the type of negative ion to add. Allowed values are 'Cl-', 'Br-', 'F-', and 'I-'. Be aware that not all force fields support all ion types. 45 | "solv_model": "tip3p", # Supported values are 'tip3p', 'spce', 'tip4pew', and 'tip5p'. 46 | "solv_pH": 7.0, # pH of the solvent 47 | 48 | # MD Simulation 49 | "md_steps": 1000, 50 | "md_save_interval": 10, 51 | "md_pressure": 1.0, 52 | "md_anisotropic": False, 53 | "md_barostat_freq": 25, 54 | "md_harmonic_restraint": True, 55 | "md_load_state": True, 56 | "md_restrained_residues": [], 57 | "md_npt": False, 58 | 59 | # Monitor 60 | "monitor_window": 10, 61 | "monitor_temp_threshold": 2.0, 62 | "monitor_energy_threshold": 100.0, 63 | 64 | # Platform 65 | "platform_name": "CPU", 66 | "platform_precision": "mixed", 67 | 68 | # Analysis - RMSD 69 | "rmsd_selection": "protein and name CA", 70 | "rmsd_ligand_selection": "resname UNK", 71 | 72 | # Analysis - RMSF 73 | "rmsf_selection": "protein and name CA", 74 | 75 | # Energy minimization 76 | "emin_tolerance": "5 * kilojoule_per_mole / nanometer", 77 | "emin_heating_step": 300, 78 | "emin_target_temp": 300, 79 | "emin_heating_interval": 1, 80 | "emin_steps": 10 81 | } 82 | 83 | def create_config( 84 | protein_file: str = None, 85 | ligand_file: str = None, 86 | project_dir: str = None, 87 | output_dir: str = None, 88 | config_dir: str = None, 89 | save_config_as: str = "simulation_config.yaml", 90 | **params 91 | ) -> Dict[str, Any]: 92 | """Create a simulation configuration with user-provided settings. 93 | 94 | Args: 95 | protein_file (str): Path to protein PDB file 96 | ligand_file (str, optional): Path to ligand file 97 | project_dir (str, optional): Main project directory. If None, uses protein file directory 98 | output_dir (str, optional): Output directory. If None, creates 'output' in project_dir 99 | config_dir (str, optional): Directory to save config file. If None, saves in project_dir 100 | save_config_as (str, optional): Name of config file. Defaults to "simulation_config.yaml" 101 | **params: Override any default settings using the following parameters: 102 | 103 | Forcefields: 104 | ff_small_molecule_openff (str): Forcefield for small molecules. Default: "openff-2.0.0.offxml" 105 | ff_protein_openff (str): Forcefield for protein. Default: "ff14sb_off_impropers_0.0.3.offxml" 106 | ff_protein (str): Forcefield for protein. Default: "amber14-all.xml" 107 | ff_water (str): Forcefield for water. Default: "amber14/tip3pfb.xml" 108 | 109 | Integrator Settings: 110 | integrator_temperature (float): Temperature in Kelvin. Default: 300.0 111 | integrator_friction (float): Friction coefficient in ps^-1. Default: 1.0 112 | integrator_timestep (float): Time step in ps. Default: 0.002 113 | 114 | Solvation Settings: 115 | solv_box_buffer (float): Buffer size in angstroms. Default: 2.5 116 | solv_ionic_strength (float): Ionic strength in molar. Default: 0.15 117 | solv_positive_ion (str): Type of positive ion. Default: "Na+" 118 | Allowed: ['Cs+', 'K+', 'Li+', 'Na+', 'Rb+'] 119 | solv_negative_ion (str): Type of negative ion. Default: "Cl-" 120 | Allowed: ['Cl-', 'Br-', 'F-', 'I-'] 121 | solv_model (str): Water model. Default: "tip3p" 122 | Allowed: ['tip3p', 'spce', 'tip4pew', 'tip5p'] 123 | solv_pH (float): pH of the solvent. Default: 7.0 124 | 125 | MD Simulation Settings: 126 | md_steps (int): Total simulation steps. Default: 1000 127 | md_save_interval (int): Save interval for trajectory. Default: 10 128 | md_pressure (float): Pressure in atmospheres. Default: 1.0 129 | md_anisotropic (bool): Use anisotropic pressure. Default: False 130 | md_barostat_freq (int): Barostat frequency. Default: 25 131 | md_harmonic_restraint (bool): Use harmonic restraints. Default: True 132 | md_load_state (bool): Load previous state if available. Default: True 133 | md_restrained_residues (list): List of residues to restrain. Default: [] 134 | md_npt (bool): Use NPT ensemble. Default: False 135 | 136 | Monitoring Settings: 137 | monitor_window (int): Window size for monitoring. Default: 10 138 | monitor_temp_threshold (float): Temperature threshold. Default: 2.0 139 | monitor_energy_threshold (float): Energy threshold. Default: 100.0 140 | 141 | Platform Settings: 142 | platform_name (str): Platform for computation. Default: "CPU" 143 | Allowed: ['CPU', 'CUDA'] 144 | platform_precision (str): Precision mode. Default: "mixed" 145 | Allowed: ['single', 'mixed', 'double'] 146 | 147 | Analysis Settings: 148 | rmsd_selection (str): Atom selection for RMSD. Default: "protein and name CA" 149 | rmsd_ligand_selection (str): Atom selection for ligand RMSD. Default: "resname UNK" 150 | rmsf_selection (str): Atom selection for RMSF. Default: "protein and name CA" 151 | 152 | Energy Minimization Settings: 153 | emin_tolerance (str): Energy minimization tolerance. 154 | Default: "5 * kilojoule_per_mole / nanometer" 155 | emin_heating_step (int): Heating step size. Default: 300 156 | emin_target_temp (float): Target temperature. Default: 300 157 | emin_heating_interval (int): Steps per heating interval. Default: 1 158 | emin_steps (int): Total minimization steps. Default: 10 159 | 160 | Returns: 161 | Dict[str, Any]: Complete simulation configuration 162 | 163 | Example: 164 | >>> config = create_config( 165 | ... protein_file="protein.pdb", 166 | ... ligand_file="ligand.mol2", 167 | ... platform_name="CUDA", 168 | ... integrator_temperature=310.0 169 | ... ) 170 | """ 171 | # Convert paths to absolute paths 172 | 173 | protein_file = Path(protein_file).absolute() 174 | project_dir = Path(project_dir).absolute() if project_dir else Path(protein_file).parent.absolute() 175 | 176 | if ligand_file: 177 | ligand_file = Path(ligand_file).absolute() 178 | 179 | # Set up directories 180 | output_dir = Path(output_dir).absolute() if output_dir else project_dir / "output" 181 | config_dir = Path(config_dir).absolute() if config_dir else project_dir / "config" 182 | 183 | # Validate paths 184 | if not protein_file.exists(): 185 | print(f"Protein file not found: {protein_file}. Upload protein file to folder") 186 | if ligand_file and not ligand_file.exists(): 187 | print(f"Ligand file not found: {ligand_file}. Upload ligand file to folder") 188 | 189 | # Create directories 190 | os.makedirs(output_dir, exist_ok=True) 191 | os.makedirs(config_dir, exist_ok=True) 192 | 193 | # Start with default configuration 194 | config = DEFAULT_CONFIG.copy() 195 | 196 | # Update with user parameters 197 | for param, value in params.items(): 198 | if param in config: 199 | config[param] = value 200 | else: 201 | raise ValueError(f"Unknown parameter: {param}") 202 | 203 | # Set up paths 204 | config.update({ 205 | "path_base": str(project_dir), 206 | "path_protein": str(protein_file), 207 | "path_ligand": str(ligand_file) if ligand_file else "", 208 | "path_protein_solvated": str(output_dir / "protein_solvated.pdb"), 209 | "path_openff_topology": str(output_dir / "openff_topology.json"), 210 | "path_openff_interchange": str(output_dir / "openff_interchange.pdb"), 211 | "path_openmm_topology": str(output_dir / "openmm_topology.pkl"), 212 | "path_openmm_system": str(output_dir / "openmm_system.xml"), 213 | "path_emin_structure": str(output_dir / "emin.pdb"), 214 | "path_emin_state": str(output_dir / "emin.xml"), 215 | "path_md_log": str(output_dir / "md_id.log"), 216 | "path_md_trajectory": str(output_dir / "md_trajetory_id.dcd"), 217 | "path_md_checkpoint": str(output_dir / "md_checkpoint_id.chk"), 218 | "path_md_state": str(output_dir / "md_state_id.xml"), 219 | "path_md_image": str(output_dir / "md_image_id.dcd"), 220 | "path_rmsd_output": str(output_dir / "rmsd.pkl"), 221 | "path_rmsd_ligand_output": str(output_dir / "rmsd_ligand.pkl"), 222 | "path_rmsf_output": str(output_dir / "rmsf.log"), 223 | "path_amber_topology": str(output_dir / "amber_top.prmtop") 224 | }) 225 | 226 | # Save configuration 227 | config_path = config_dir / save_config_as 228 | print(f"Saving configuration to: {config_path}") 229 | 230 | # Sort keys by prefix for better readability in YAML 231 | sorted_config = dict(sorted(config.items())) 232 | 233 | with open(config_path, 'w') as f: 234 | yaml.dump(sorted_config, f, default_flow_style=False, sort_keys=False) 235 | 236 | # Display the configuration 237 | print_config(config) 238 | 239 | return config 240 | 241 | def load_config(config_file: str, print: bool = True) -> Dict[str, Any]: 242 | """ 243 | Load configuration from YAML file. 244 | 245 | Args: 246 | config_file (str): Path to the YAML configuration file 247 | print (bool): Whether to print the configuration 248 | 249 | Returns: 250 | dict: Configuration dictionary with flat structure 251 | """ 252 | with open(config_file, 'r') as f: 253 | config = yaml.safe_load(f) 254 | 255 | if print: 256 | print_config(config) 257 | 258 | return config 259 | 260 | def print_config(config: Dict[str, Any]): 261 | """Print all configuration settings in a readable format. 262 | 263 | Args: 264 | config (dict): Configuration dictionary with flat structure using prefixes 265 | """ 266 | print("\n=== Simulation Configuration ===\n") 267 | 268 | # Group parameters by prefix 269 | current_prefix = None 270 | for key in sorted(config.keys()): 271 | # Get prefix (everything up to first underscore) 272 | prefix = key.split('_')[0] 273 | 274 | # Print section header when prefix changes 275 | if prefix != current_prefix: 276 | print(f"\n{prefix.upper()}:") 277 | current_prefix = prefix 278 | 279 | # Get the value and format it 280 | value = config[key] 281 | if isinstance(value, (list, tuple)): 282 | if not value: 283 | print(f" {key}: []") 284 | else: 285 | print(f" {key}:") 286 | for item in value: 287 | print(f" - {item}") 288 | else: 289 | print(f" {key}: {value}") 290 | 291 | print() 292 | 293 | 294 | # Example usage: 295 | project_structure = """ 296 | my_project/ 297 | ├── config/ 298 | │ └── simulation_config.yaml 299 | ├── structures/ 300 | │ ├── protein.pdb 301 | │ └── ligand.sdf 302 | └── output/ 303 | """ 304 | -------------------------------------------------------------------------------- /src/easy_md/utils/dcd_image.py: -------------------------------------------------------------------------------- 1 | # Create trajectory image 2 | import mdtraj as md 3 | 4 | def image_molecules(trajectory_input, topology_input, image_output, fraction_of_frames=1.0): 5 | print("load traj") 6 | traj = md.load_dcd(trajectory_input, top=topology_input) 7 | # Slice the trajectory to take only X% of frames. Optional. 8 | subset = traj[:int(traj.n_frames * fraction_of_frames)] 9 | 10 | print("Start creating image") 11 | try: 12 | subset.image_molecules(inplace=True) # This re-wraps or images the molecules 13 | except Exception as e: 14 | print(e) 15 | 16 | try: 17 | subset.save_dcd(image_output) # Save the processed trajectory 18 | print(f"Done! File saved to {image_output}") 19 | except Exception as e: 20 | print(e) -------------------------------------------------------------------------------- /src/easy_md/utils/fileparser.py: -------------------------------------------------------------------------------- 1 | import MDAnalysis as mda 2 | import subprocess 3 | from rdkit import Chem 4 | from openmm.app import * 5 | from openmm import * 6 | from openff.toolkit import ForceField, Topology 7 | import time 8 | from functools import wraps 9 | 10 | import pickle 11 | import openmm 12 | import parmed 13 | 14 | from openmm import XmlSerializer 15 | from openmm.app import PDBFile, Simulation 16 | from openff.toolkit import Topology 17 | from openmm.unit import kelvin, picosecond, picoseconds 18 | 19 | from openmm import( 20 | XmlSerializer, 21 | 22 | ) 23 | import pickle 24 | from openff.toolkit import ForceField 25 | # from openff.interchange.interop import parmed 26 | 27 | 28 | def time_tracker(func): 29 | @wraps(func) 30 | def wrapper(*args, **kwargs): 31 | start_time = time.time() 32 | result = func(*args, **kwargs) 33 | end_time = time.time() 34 | execution_time = end_time - start_time 35 | print(f"Function '{func.__name__}' took {execution_time:.4f} seconds to execute") 36 | return result 37 | return wrapper 38 | 39 | def pqr_to_pdb(input_file_pqr, output_file_pdb): 40 | system = mda.Universe(input_file_pqr) 41 | system.atoms.write(output_file_pdb) 42 | 43 | def pqr_to_pdb_obabel(input_file_pqr, output_file_pdb): 44 | command = ["obabel", input_file_pqr, "-O", output_file_pdb, "-xn"] 45 | try: 46 | subprocess.run(command, check=True) 47 | print(f"Conversion successful! Output saved as {output_file_pdb}") 48 | except subprocess.CalledProcessError as e: 49 | print(f"Error during conversion: {e}") 50 | 51 | def pdbqt_to_sdf(input_file, output_file, config): 52 | command = ["obabel", input_file, "-O", output_file, "-p", config['solv_pH'], "-h", "-m"] 53 | try: 54 | subprocess.run(command, check=True) 55 | print(f"Conversion successful! Output saved as {output_file}") 56 | except subprocess.CalledProcessError as e: 57 | print(f"Error during conversion: {e}") 58 | 59 | def pdbqt_to_pdb(input_file, output_file): 60 | command = ["obabel", input_file, "-O", output_file, "-m"] 61 | try: 62 | subprocess.run(command, check=True) 63 | print(f"Conversion successful! Output saved as {output_file}") 64 | except subprocess.CalledProcessError as e: 65 | print(f"Error during conversion: {e}") 66 | 67 | def prepare_ligand_for_md(input_file, output_file): 68 | """ 69 | Ligand is needed to create the Protein-Ligand complex topology with OpenFF. 70 | The OpenFF Molecule throws an error because it falsely detect free radicals in an aromatic ring. 71 | 1. Convert webina output.pdbqt to pdb with obabel. 72 | 2. This functions takes as input a pdb for a single position. 73 | 3. Postprocess it; fix valency and only add implicit hydrogens without changing the protonation used for docking. 74 | 4. Saves it as a SDF. 75 | 76 | This approach ensures that the ligand SDF has all implict hydrogens, so it does not throw the free radical error when loaded with OpenFF Molecule. 77 | It does not work converting webina.pdbqt directly to SDF with Openbabel. 78 | """ 79 | 80 | # Load PDB with existing hydrogens (from pH 6.2 preparation) 81 | mol = Chem.MolFromPDBFile(input_file, removeHs=False) 82 | 83 | # Sanitize the molecule (fix valency & bond perception) 84 | Chem.SanitizeMol(mol, Chem.SanitizeFlags.SANITIZE_ALL) 85 | 86 | # Add **only missing implicit hydrogens** (does not alter protonation state) 87 | mol = Chem.AddHs(mol, onlyOnAtoms=[a.GetIdx() for a in mol.GetAtoms() if a.GetTotalNumHs() == 0]) 88 | 89 | # Save as an SDF 90 | writer = Chem.SDWriter(output_file) 91 | writer.write(mol) 92 | writer.close() 93 | print("✅ SDF saved with explicit and necessary implicit hydrogens!") 94 | 95 | def interachange_top_to_amber_prmtop(config): 96 | top = Topology.from_json(open(config['path_openff_topology']).read()) 97 | sage_ff14sb = ForceField(config['ff_small_molecule_openff'], config['ff_protein_openff']) 98 | interchange = sage_ff14sb.create_interchange(top) 99 | interchange.to_prmtop(config['path_amber_topology'], writer='internal') 100 | print(f"Done! File saved to {config['path_amber_topology']}") 101 | 102 | def openmm_to_amber_topology(config): 103 | """ 104 | Convert an OpenMM topology to an Amber topology using OpenFF Interchange. 105 | This ensures proper handling of force field parameters. 106 | 107 | Args: 108 | omm_system_xml_path (str): Path to the OpenMM system XML file 109 | omm_top_pkl_path (str): Path to the OpenMM topology pickle file 110 | off_topology_json_path (str): Path to the OpenFF topology JSON file 111 | output_prmtop_path (str): Path where the output Amber topology file will be saved 112 | """ 113 | 114 | # Load the OpenMM system and topology 115 | off_top = load_openff_topology_from_json(config['path_openff_topology']) 116 | 117 | # Create an Interchange object from the OpenFF topology 118 | # Use the same force field combination you used to create the system 119 | sage_ff14sb = ForceField(config['ff_small_molecule_openff'], config['ff_protein_openff']) 120 | interchange = sage_ff14sb.create_interchange(off_top) 121 | 122 | # Convert to ParmEd Structure using the correct method 123 | parmed_structure = parmed.from_interchange(interchange) 124 | 125 | # Save as Amber prmtop file 126 | parmed_structure.save(config['path_amber_topology'], overwrite=True) 127 | print(f"✅ Successfully saved Amber topology to {config['path_amber_topology']}") 128 | 129 | return parmed_structure 130 | 131 | def save_openmm_system_to_pdb(config): 132 | """ This function was used in "run_forcefield_parameterization.py:" but is now deprecated and replaced by 133 | interchange.to_pdb(). The save_openmm_system_to_pdb function appears to be legacy code that's no longer used. It was likely replaced by the more integrated approach in save_openmm_system_topology which handles both system and topology conversion along with PDB file creation. I 134 | 135 | This function can still be used to load existing OpenMM system and save as PDB. 136 | Save the initial positions from a openmm system file to PDB. 137 | 138 | Args: 139 | system_xml_path (str): Path to the OpenMM system XML file 140 | topology_pkl_path (str): Path to the OpenMM topology pickle file 141 | off_topology_json_path (str): Path to the OpenFF topology JSON file 142 | output_pdb_path (str): Path where the output PDB file will be saved 143 | """ 144 | 145 | OMM_SYS_XML_INPUT = config['path_openmm_system'] 146 | OMM_TOP_PKL_INPUT = config['path_openmm_topology'] 147 | OFF_TOP_JSON_INPUT = config['path_openff_topology'] 148 | PDB_OUTPUT = config['path_openmm_structure'] 149 | 150 | omm_system = load_openmm_system_from_xml(OMM_SYS_XML_INPUT) 151 | omm_top = load_openmm_topology_from_pickle(OMM_TOP_PKL_INPUT) 152 | off_top = load_openff_topology_from_json(OFF_TOP_JSON_INPUT) 153 | 154 | # 4) Integrator not used, but needed to create simulation object 155 | integrator = openmm.LangevinIntegrator( 156 | 300 * kelvin, 157 | 1 / picosecond, 158 | 0.002 * picoseconds, 159 | ) 160 | 161 | # Combine the topology, system, integrator and initial positions into a simulation 162 | simulation = Simulation(omm_top, omm_system, integrator) 163 | simulation.context.setPositions(off_top.get_positions().to_openmm()) 164 | 165 | # Write the positions to a PDB file 166 | with open(PDB_OUTPUT, 'w') as file: 167 | PDBFile.writeFile(simulation.topology, omm_top.get_positions().to_openmm(), file) 168 | 169 | def load_openmm_system_from_xml(xml_file_path): 170 | with open(xml_file_path) as input: 171 | system = XmlSerializer.deserialize(input.read()) 172 | return system 173 | 174 | def load_openmm_topology_from_pickle(pickle_file_path): 175 | with open(pickle_file_path, 'rb') as f: 176 | return pickle.load(f) 177 | 178 | def load_openff_topology_from_json(json_file_path): 179 | with open(json_file_path, 'r') as file: 180 | json_string = file.read() 181 | top = Topology.from_json(json_string) 182 | return top 183 | 184 | 185 | def load_files(system_xml_input, top_pkl_input, off_top_json_input=None): 186 | """Loads system and topology files required for the simulation.""" 187 | with open(system_xml_input) as input: 188 | system = XmlSerializer.deserialize(input.read()) 189 | 190 | with open(top_pkl_input, 'rb') as f: 191 | omm_top = pickle.load(f) 192 | 193 | if off_top_json_input: 194 | off_top = Topology.from_json(open(off_top_json_input).read()) 195 | else: 196 | off_top = None 197 | 198 | return system, omm_top, off_top 199 | -------------------------------------------------------------------------------- /src/easy_md/utils/ligand_util.py: -------------------------------------------------------------------------------- 1 | # Prepare ligand for forcefield parameterization 2 | 3 | 4 | from openff.toolkit import Molecule 5 | from openff.toolkit.topology import Topology 6 | from openff.toolkit.typing.engines.smirnoff import ForceField 7 | 8 | 9 | SUPPORTED = {"H","C","N","O","F","P","S","Cl","Br","I"} 10 | 11 | def prepare_ligand_from_sdf(ligand_path: str) -> Molecule: 12 | lig = Molecule.from_file(ligand_path, allow_undefined_stereo=True) 13 | # lig = lig.with_aromaticity("MDL") 14 | 15 | # # element coverage check 16 | # if any(a.element.symbol not in SUPPORTED for a in lig.atoms): 17 | # raise ValueError("Ligand contains elements outside Sage coverage") 18 | 19 | # ensure 3-D coords 20 | if not lig.conformers: 21 | try: 22 | lig.generate_conformers(n_conformers=1) 23 | except Exception as e: 24 | raise RuntimeError("RDKit failed to embed a conformer") from e 25 | 26 | # AM1-BCC charges 27 | try: 28 | lig.assign_partial_charges("am1bcc") 29 | except Exception as e: 30 | raise RuntimeError("AM1-BCC charge assignment failed") from e 31 | 32 | return lig 33 | -------------------------------------------------------------------------------- /src/easy_md/utils/log.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | # Function to parse the log file 5 | def parse_log_file(log_file_path): 6 | print("Parsing log file") 7 | steps = [] 8 | energies = [] 9 | temperatures = [] # List to store temperature values 10 | with open(log_file_path, 'r') as file: 11 | # Skip header or lines without numeric data 12 | next(file) 13 | for line in file: 14 | cols = line.strip().split('\t') 15 | # print(cols[2]) 16 | try: 17 | steps.append(float(cols[2])) 18 | energies.append(float(cols[3])) 19 | temperatures.append(float(cols[4])) # Parse temperature 20 | except ValueError as e: 21 | print(e) 22 | return steps, energies, temperatures 23 | 24 | def plot_energy_temp(steps, energies, temperatures): 25 | # Create figure and first axis 26 | plt.figure(figsize=(10, 6)) 27 | ax1 = plt.gca() # Get current axis 28 | ax2 = ax1.twinx() # Create another axis that shares the same x-axis 29 | 30 | # Plot energy on the first y-axis 31 | ax1.plot(steps, energies, marker='o', linestyle='-', color='blue', label='Potential Energy') 32 | ax1.set_xlabel('Time Step') 33 | ax1.set_ylabel('Energy (kJ/mol)', color='blue') 34 | ax1.tick_params(axis='y', labelcolor='blue') 35 | 36 | # Plot temperature on the second y-axis 37 | ax2.plot(steps, temperatures, marker='x', linestyle='-', color='red', label='Temperature') 38 | ax2.set_ylabel('Temperature (F)', color='red') 39 | ax2.tick_params(axis='y', labelcolor='red') 40 | 41 | # Flip the temperature axis 42 | ax1.invert_yaxis() 43 | 44 | # Title and grid 45 | plt.title('Energy and Temperature vs. Time Step') 46 | ax1.grid(True) 47 | 48 | # Optional: add a legend. Comment these lines if you find the legend unnecessary. 49 | ax1.legend(loc='upper left') 50 | # ax2.legend(loc='upper right') 51 | 52 | plt.show() 53 | 54 | def analyze_distributions(temperatures, energies): 55 | fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4)) 56 | 57 | # Temperature distribution 58 | ax1.hist(temperatures, bins=50, density=True, alpha=0.7) 59 | ax1.set_xlabel('Temperature (K)') 60 | ax1.set_ylabel('Density') 61 | ax1.set_title('Temperature Distribution') 62 | 63 | # Energy distribution 64 | ax2.hist(energies, bins=50, density=True, alpha=0.7) 65 | ax2.set_xlabel('Potential Energy (kJ/mol)') 66 | ax2.set_ylabel('Density') 67 | ax2.set_title('Energy Distribution') 68 | 69 | plt.tight_layout() 70 | plt.show() 71 | 72 | def calculate_running_averages(times, temperatures, energies, window=10): 73 | temp_running_avg = [] 74 | energy_running_avg = [] 75 | 76 | for i in range(len(times)): 77 | start_idx = max(0, i - window) 78 | temp_running_avg.append(np.mean(temperatures[start_idx:i+1])) 79 | energy_running_avg.append(np.mean(energies[start_idx:i+1])) 80 | 81 | plt.figure(figsize=(10, 6)) 82 | plt.plot(times, temp_running_avg, label='Temperature Running Average') 83 | plt.xlabel('Time (ps)') 84 | plt.ylabel('Temperature (K)') 85 | plt.legend() 86 | plt.grid(True) 87 | plt.show() 88 | 89 | def calculate_autocorrelation(data): 90 | data = np.array(data) 91 | data = data - np.mean(data) 92 | autocorr = np.correlate(data, data, mode='full') 93 | autocorr = autocorr[len(autocorr)//2:] 94 | autocorr = autocorr / autocorr[0] 95 | 96 | # Calculate decorrelation time 97 | decay_idx = np.where(autocorr < np.exp(-1))[0] 98 | if len(decay_idx) > 0: 99 | decorr_time = decay_idx[0] 100 | else: 101 | decorr_time = None 102 | 103 | return autocorr, decorr_time 104 | 105 | def plot_autocorrelation(temperatures, energies): 106 | temp_autocorr, temp_decorr = calculate_autocorrelation(temperatures) 107 | energy_autocorr, energy_decorr = calculate_autocorrelation(energies) 108 | 109 | fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8)) 110 | 111 | ax1.plot(temp_autocorr[:1000]) 112 | ax1.set_title('Temperature Autocorrelation') 113 | if temp_decorr: 114 | ax1.axvline(x=temp_decorr, color='r', linestyle='--', 115 | label=f'Decorrelation time: {temp_decorr} steps') 116 | ax1.legend() 117 | 118 | ax2.plot(energy_autocorr[:1000]) 119 | ax2.set_title('Energy Autocorrelation') 120 | if energy_decorr: 121 | ax2.axvline(x=energy_decorr, color='r', linestyle='--', 122 | label=f'Decorrelation time: {energy_decorr} steps') 123 | ax2.legend() 124 | 125 | plt.tight_layout() 126 | plt.show() -------------------------------------------------------------------------------- /src/easy_md/utils/mmpbsa.py: -------------------------------------------------------------------------------- 1 | import parmed 2 | 3 | def openmm_to_amber_topology(openmm_topology, amber_output_path): 4 | """ 5 | Convert an OpenMM topology to an Amber topology. 6 | Assuming you have already created the Interchange object 7 | Generate OpenMM objects from the Interchange object 8 | """ 9 | 10 | # Assuming you have already created the Interchange object 11 | # Generate OpenMM objects from the Interchange object 12 | # Load interchange 13 | omm_system = interchange.to_openmm() 14 | omm_top = interchange.to_openmm_topology() 15 | 16 | # Convert to ParmEd Structure 17 | parmed_structure = parmed.openmm.load_topology(omm_top, omm_system) 18 | 19 | # Save as Amber prmtop file 20 | parmed_structure.save('output.prmtop', overwrite=True) 21 | pass 22 | 23 | def check_residues_in_amber_topology(amber_topology_path): 24 | """ 25 | Check if the residues in the Amber topology are correct. 26 | """ 27 | # Print all unique residue names in the structure 28 | interchange.to_prmtop("out.prmtop") => ligand is missing in final prmtop 29 | interchange.to_inpcrd("out.inpcrd") 30 | 31 | amber_structure = parmed.load_file('out.prmtop', 'out.inpcrd') 32 | residue_names = set(residue.name for residue in amber_structure.residues) 33 | print("Residue names:", residue_names) 34 | 35 | 36 | # Check for water and ions 37 | water_residues = [res for res in amber_structure.residues if res.name in ['WAT', 'HOH']] 38 | sodium_ions = [res for res in amber_structure.residues if res.name == 'NA'] 39 | chloride_ions = [res for res in amber_structure.residues if res.name == 'CL'] 40 | 41 | print(f"Number of water molecules: {len(water_residues)}") 42 | print(f"Number of sodium ions: {len(sodium_ions)}") 43 | print(f"Number of chloride ions: {len(chloride_ions)}") 44 | pass 45 | 46 | def openmm_to_amber_parameters(openmm_parameters): 47 | """ 48 | Convert an OpenMM parameters to an Amber parameters. 49 | """ 50 | pass -------------------------------------------------------------------------------- /src/easy_md/utils/openmm_structure_analyzer.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script provides utilities for analyzing and validating force field parameters in molecular systems. 3 | It focuses on examining nonbonded interactions, detecting atomic clashes between protein and ligand 4 | molecules, and checking simulation system configurations. 5 | 6 | In simpler terms, this script helps: 7 | - Check if atoms in a molecular model are positioned correctly without unrealistic overlaps 8 | - Extract and analyze parameters that describe how atoms interact with each other 9 | - Validate settings for computer simulations of molecules 10 | 11 | These validations are important before running computationally expensive molecular simulations 12 | to ensure the results will be reliable and scientifically sound. 13 | """ 14 | 15 | from openmm import NonbondedForce, XmlSerializer 16 | import numpy as np 17 | from Bio.PDB import PDBParser 18 | from openmm import unit 19 | from openmm import unit 20 | import warnings 21 | warnings.filterwarnings("ignore") 22 | 23 | 24 | def get_sigma_epsilon(nonbonded_force, particle_index): 25 | """ 26 | Extract and correct Lennard-Jones parameters for a particle. 27 | 28 | Purpose: 29 | Gets parameters that describe how atoms interact when they're not bonded to each other. 30 | 31 | What are Lennard-Jones parameters? 32 | - Sigma: Represents the size of an atom (distance where interaction energy is zero) 33 | - Epsilon: Represents how strongly atoms attract each other 34 | These parameters model how atoms repel at close distances but attract at moderate distances. 35 | 36 | Why adjustment is needed: 37 | OpenMM uses a slightly different definition of sigma than the standard Lennard-Jones 38 | equation. We adjust sigma by multiplying by 2^(1/6) to convert between these 39 | conventions. Without this correction, calculations of atom overlaps would be inaccurate. 40 | 41 | Args: 42 | nonbonded_force: OpenMM NonbondedForce object 43 | particle_index: Index of the particle 44 | 45 | Returns: 46 | sigma_corrected: Corrected sigma value (atom size parameter) 47 | epsilon: Epsilon value (attraction strength parameter) 48 | """ 49 | charge, sigma, epsilon = nonbonded_force.getParticleParameters(particle_index) 50 | sigma_corrected = sigma * (2 ** (1/6)) # Adjust sigma to match the standard Lennard-Jones sigma 51 | return sigma_corrected, epsilon 52 | 53 | 54 | def find_clashes(protein_atoms, ligand_coords, nonbonded_force): 55 | """ 56 | Identify atomic clashes between protein and ligand atoms using Lennard-Jones parameters. 57 | The sigma value, as defined in the standard Lennard-Jones formula, represents the distance at which the interaction 58 | energy between two non-bonded atoms is zero - essentially defining the "effective size" of an atom. 59 | This corresponds to the van der Waals radius, which describes how close atoms can approach each other 60 | before strong repulsion occurs. 61 | 62 | 63 | Purpose: 64 | Finds places where atoms are too close to each other, which isn't physically realistic. 65 | 66 | Why useful: 67 | In real molecules, atoms can't overlap significantly due to electron repulsion. 68 | Finding these "clashes" helps identify problems in the molecular model that need 69 | to be fixed before simulation. Think of it like finding places where two billiard 70 | balls are trying to occupy the same space - that can't happen in reality. 71 | 72 | Args: 73 | protein_atoms: List of protein atoms 74 | ligand_coords: Coordinates of ligand atoms 75 | nonbonded_force: OpenMM NonbondedForce object 76 | 77 | Returns: 78 | clash_pairs: List of details about detected clashes 79 | """ 80 | # Ensure coordinates are in OpenMM units (nanometers typically) 81 | protein_coords = np.array([atom.coord for atom in protein_atoms]) * unit.angstroms # if coords are in angstroms 82 | clash_pairs = [] 83 | 84 | # Iterate over all ligand atoms and check if clashes with protein atoms, based on sigma 85 | for lig_idx, lig_coord in enumerate(ligand_coords): 86 | lig_coord = np.array(lig_coord) * unit.angstroms # Convert ligand coords to proper OpenMM units 87 | lig_sigma, lig_epsilon = get_sigma_epsilon(nonbonded_force, lig_idx + 188832) # Adjust index appropriately 88 | # lig_sigma *= unit.nanometers # Ensure sigma is in nanometers if needed 89 | 90 | for prot_idx, prot_coord in enumerate(protein_coords): 91 | prot_sigma, prot_epsilon = get_sigma_epsilon(nonbonded_force, prot_idx) 92 | # prot_sigma *= unit.nanometers # Ensure sigma is in nanometers if needed 93 | if lig_idx == 0: 94 | if prot_idx == 0: 95 | radius_sum = lig_sigma + prot_sigma 96 | distance = np.linalg.norm(prot_coord - lig_coord) * unit.nanometers 97 | distance /=10 98 | radius_sum = (lig_sigma + prot_sigma) / 2 99 | distance = np.linalg.norm(prot_coord - lig_coord) * unit.nanometers # No unit conversion needed if both are already in nanometers 100 | distance /=10 101 | 102 | if distance <= radius_sum: 103 | overlap = (radius_sum - distance) 104 | if overlap > 0.1 * unit.nanometers: 105 | print(f"lig_sigma {lig_sigma}") 106 | print(f"prot_sigma {prot_sigma}") 107 | clash_pairs.append( 108 | (f"Protein Atom Index: {prot_idx}", 109 | f"Ligand Atom Index: {lig_idx}", 110 | f"Distance: {distance * unit.nanometers} Å", 111 | f"Radius Sum: {radius_sum * unit.nanometers} Å", 112 | f"Overlap: {(radius_sum - distance) * unit.nanometers} Å") 113 | ) 114 | print(f"Number of clashes: {len(clash_pairs)}") 115 | return clash_pairs 116 | 117 | def extract_protein_and_ligand_atoms(pdb_file, protein_chain_ids, ligand_resname): 118 | """ 119 | Extract protein and ligand atoms from a PDB file. 120 | 121 | Purpose: 122 | Separates the protein parts from the drug/ligand parts in a molecular structure file. 123 | 124 | Why useful: 125 | It's like sorting the pieces of a complex puzzle into groups. This separation 126 | allows us to analyze how the drug interacts with the protein, making it easier 127 | to focus on the important interactions without getting lost in all the details 128 | of the entire molecular system. 129 | 130 | Args: 131 | pdb_file: Path to the PDB file (a standard file format for 3D molecular structures) 132 | protein_chain_ids: List of chain IDs to extract protein atoms from 133 | ligand_resname: Name that identifies the ligand in the file 134 | 135 | Returns: 136 | protein_atoms: List of protein atoms 137 | ligand_coords: Coordinates of ligand atoms 138 | ligand_elements: Chemical elements of ligand atoms 139 | """ 140 | parser = PDBParser() 141 | structure = parser.get_structure('ID', pdb_file) 142 | # Select the first model 143 | model = structure[0] 144 | # Extracting protein atoms from specified chains 145 | protein_atoms = [atom for atom in model.get_atoms() 146 | if atom.get_parent().get_parent().id in protein_chain_ids] 147 | # Extracting ligand atoms (assuming the ligand is identified by a specific residue name) 148 | ligand_atoms = [atom for atom in model.get_atoms() 149 | if atom.get_parent().resname == ligand_resname] 150 | # Extract ligand coordinates and elements 151 | ligand_coords = [atom.get_coord() for atom in ligand_atoms] 152 | ligand_elements = [atom.element for atom in ligand_atoms] 153 | print(f"Protein length: {len(protein_atoms)}") 154 | 155 | return protein_atoms, ligand_coords, ligand_elements 156 | 157 | 158 | # Check if the system uses periodic boundary conditions 159 | def check_periodic_boundaries(system): 160 | """ 161 | Check and print periodic boundary conditions of an OpenMM system. 162 | 163 | Purpose: 164 | Examines how the simulation handles the edges of the simulation box. 165 | 166 | Why useful: 167 | In simulations, molecules are placed in a box. Periodic boundaries act like 168 | portals - when a molecule leaves one side of the box, it reappears on the 169 | opposite side. This prevents unwanted edge effects and mimics a larger system. 170 | Checking these settings ensures the simulation won't have artificial boundary 171 | problems, especially for simulations in water or other solvents. 172 | 173 | Args: 174 | system: OpenMM System object 175 | """ 176 | box_vectors = system.getDefaultPeriodicBoxVectors() 177 | if box_vectors is None: 178 | print("No periodic boundary conditions are set.") 179 | else: 180 | print("Periodic boundary conditions are set.") 181 | print("Box Vectors:") 182 | for vec in box_vectors: 183 | print(vec) 184 | 185 | def run_molecule_analysis(): 186 | """ 187 | The system's force objects contain the mathematical rules and parameters that govern molecular interactions in the 188 | simulation. 189 | 190 | The NonbondedForce object specifically contains: 191 | - Electrostatic parameters: 192 | Atomic charges for each particle 193 | Method for handling long-range electrostatic interactions (PME - Particle Mesh Ewald) 194 | - Van der Waals parameters: 195 | Sigma values (atomic size parameters) 196 | Epsilon values (interaction strength parameters) 197 | - Interaction cutoffs: 198 | Distance cutoffs for calculations (beyond which interactions are ignored) 199 | Method for handling interactions (shown in "Current Nonbonded Method") 200 | - Particle information: 201 | Total count of particles in the system 202 | Individual parameter sets for each atom 203 | """ 204 | 205 | # Set Variables 206 | protein_chain_ids = ['A', 'B'] # Chains from which to extract protein atoms 207 | ligand_resname = 'UNK' # Residue name for the ligand 208 | openmm_system_filepath = '../files/ATP5IF1/complex/complex_if1_dimer_ua_ph6.2_faspr_solvated_system3.xml' 209 | pdb_filepath = '../files/ATP5IF1/complex/complex_if1_dimer_ua_ph6.2_faspr_solvated_emin.pdb' 210 | 211 | with open(openmm_system_filepath) as input: 212 | system = XmlSerializer.deserialize(input.read()) 213 | 214 | # Access NonbondedForce parametersfrom the system 215 | nonbonded_force = None 216 | for force in system.getForces(): 217 | force_type = type(force).__name__ 218 | print(f"\n{force_type} Parameters:") 219 | if isinstance(force, NonbondedForce): 220 | nonbonded_force = force 221 | method = force.getNonbondedMethod() 222 | print(force.PME) 223 | print("Current Nonbonded Method:", type(method)) 224 | print("Cutoff distance for nonbonded interactions (nm):", force.getCutoffDistance()) 225 | break 226 | if nonbonded_force is None: 227 | raise ValueError("NonbondedForce not found in the system") 228 | 229 | print(f"Total number of particles in the nonbonded_force: {nonbonded_force.getNumParticles()}") 230 | protein_atoms, ligand_coords, _ = extract_protein_and_ligand_atoms(pdb_filepath, protein_chain_ids, ligand_resname) 231 | find_clashes(protein_atoms, ligand_coords, nonbonded_force) 232 | check_periodic_boundaries(system) 233 | 234 | 235 | if __name__=="__main__": 236 | run_molecule_analysis() 237 | -------------------------------------------------------------------------------- /src/easy_md/utils/rmsd_rmsf.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import MDAnalysis as mda 4 | from MDAnalysis.analysis import rms, align 5 | from MDAnalysis.coordinates.DCD import DCDWriter 6 | 7 | # File Operations 8 | def concat_traj(traj_files_input, top_file_input, concat_file_output): 9 | """Concatenate multiple trajectory files into a single DCD file.""" 10 | # Check if all dcd files exist 11 | for file in traj_files_input: 12 | if not file.exists(): 13 | raise FileNotFoundError(f"Directory {file} not found.") 14 | 15 | # Load universe from trajectory and topology 16 | u = mda.Universe(top_file_input, traj_files_input) 17 | 18 | try: 19 | with DCDWriter(concat_file_output, n_atoms=u.atoms.n_atoms) as writer: 20 | for timestep in u.trajectory: 21 | writer.write(u.atoms) 22 | print(f"Combined trajectory saved to {concat_file_output}") 23 | except Exception as e: 24 | print(f"An error occurred: {e}") 25 | 26 | def load_universe_and_reference(top_file_input, traj_file_input): 27 | """Load an MDAnalysis Universe and create a reference frame.""" 28 | universe = mda.Universe(top_file_input, traj_file_input) 29 | # universe.trajectory[0] # Ensure we are at the first frame 30 | # ref = universe.copy() # Creates a copy of the universe at the first frame 31 | ref = mda.Universe(top_file_input) 32 | return universe, ref 33 | 34 | # Atom Selection and Structure Analysis 35 | def get_seg_ids(universe): 36 | """Get unique segment IDs for protein atoms in the universe.""" 37 | protein_atoms = universe.select_atoms("protein") 38 | if len(protein_atoms) == 0: 39 | raise ValueError("No protein atoms found in the universe") 40 | return {atom.segid for atom in protein_atoms} 41 | 42 | def align_universe_to_reference(universe, reference_frame, atoms_to_select, align_to_avg=False): 43 | """Align a trajectory to either a reference frame or average structure.""" 44 | if align_to_avg: 45 | # Calculate and align to average structure (for RMSF) 46 | average = align.AverageStructure(universe, universe, 47 | select=atoms_to_select, 48 | ref_frame=0).run() 49 | ref = average.universe 50 | align.AlignTraj(universe, ref, 51 | select=atoms_to_select, 52 | in_memory=True).run() 53 | else: 54 | # Align to reference frame (for RMSD) 55 | align.AlignTraj(universe, reference_frame, 56 | select=atoms_to_select, 57 | in_memory=True).run() 58 | return universe 59 | 60 | # RMSD Analysis 61 | def calculate_rmsd(aligned_universe, atoms_to_select, ref): 62 | """Calculate RMSD between aligned trajectory and first frame.""" 63 | # Create reference from first frame 64 | # aligned_universe.trajectory[0] 65 | # ref = aligned_universe.copy() 66 | 67 | # Calculate RMSD 68 | rmsd_calc = rms.RMSD(aligned_universe, ref, select=atoms_to_select) 69 | rmsd_calc.run() 70 | 71 | return pd.DataFrame(rmsd_calc.rmsd, 72 | columns=['Frame', 'Time (ps)', 'RMSD']) 73 | 74 | def plot_rmsd_graph(rmsd_df): 75 | """Plot RMSD over time.""" 76 | plt.figure(figsize=(10, 6)) 77 | plt.plot(rmsd_df.index*0.2, rmsd_df['RMSD'], label='RMSD vs Time') 78 | plt.xlabel('Time (ns)') 79 | plt.ylabel('RMSD (Å)') 80 | plt.title('RMSD Over Time') 81 | plt.legend() 82 | plt.grid(True) 83 | plt.show() 84 | 85 | def run_rmsd_analysis(top_file, traj_file, atoms_to_select): 86 | """ 87 | Run complete RMSD analysis workflow. 88 | 89 | Parameters 90 | ---------- 91 | top_file : str 92 | Path to topology file 93 | traj_file : str 94 | Path to trajectory file 95 | atoms_to_select : str 96 | MDAnalysis selection string for atoms to analyze 97 | 98 | Returns 99 | ------- 100 | pandas.DataFrame 101 | DataFrame containing Frame, Time, and RMSD values 102 | """ 103 | # Load trajectory and get reference frame 104 | universe, reference_frame = load_universe_and_reference(top_file, traj_file) 105 | 106 | # Align trajectory to reference frame 107 | aligned_universe = align_universe_to_reference( 108 | universe=universe, 109 | reference_frame=reference_frame, 110 | atoms_to_select=atoms_to_select, 111 | align_to_avg=False 112 | ) 113 | 114 | # Calculate RMSD 115 | rmsd_df = calculate_rmsd( 116 | aligned_universe=aligned_universe, 117 | atoms_to_select=atoms_to_select, 118 | ref=reference_frame 119 | ) 120 | 121 | return rmsd_df 122 | 123 | # RMSF Analysis 124 | def calculate_rmsf(aligned_universe, protein_segids): 125 | """Calculate RMSF for CA atoms in each protein segment.""" 126 | rmsf_dict = {} 127 | 128 | for seg in protein_segids: 129 | # Select CA atoms for the current segment 130 | seg_atoms = aligned_universe.select_atoms(f'segid {seg} and protein and name CA') 131 | 132 | if len(seg_atoms) == 0: 133 | print(f"Warning: No CA atoms found for segment {seg}") 134 | continue 135 | 136 | # Calculate RMSF 137 | rmsf = rms.RMSF(seg_atoms).run() 138 | 139 | # Store results 140 | rmsf_dict[seg] = { 141 | 'residues': seg_atoms.residues.resids, 142 | 'rmsf': rmsf.results.rmsf 143 | } 144 | 145 | if not rmsf_dict: 146 | raise ValueError("No RMSF values could be calculated for any segment") 147 | 148 | return rmsf_dict 149 | 150 | def plot_rmsf(rmsf_dict): 151 | """Plot RMSF for each protein segment.""" 152 | fig, ax = plt.subplots(figsize=(10, 5)) 153 | 154 | for key, value in rmsf_dict.items(): 155 | resids = rmsf_dict[key]['residues'] 156 | rmsf_vals = rmsf_dict[key]['rmsf'] 157 | ax.plot(resids, rmsf_vals, label=f'Chain {key}') 158 | 159 | ax.set_xlabel('Unique Residue Number') 160 | ax.set_ylabel('RMSF') 161 | ax.set_title('RMSF for Each Chain') 162 | ax.legend() 163 | ax.grid(True) 164 | plt.show() 165 | 166 | def run_rmsf_analysis(top_file, traj_file, atoms_to_select): 167 | """ 168 | Run complete RMSF analysis workflow. 169 | 170 | Parameters 171 | ---------- 172 | top_file : str 173 | Path to topology file 174 | traj_file : str 175 | Path to trajectory file 176 | atoms_to_select : str 177 | MDAnalysis selection string for atoms to align 178 | 179 | Returns 180 | ------- 181 | dict 182 | Standardized results dictionary containing: 183 | - analysis_type: 'RMSF' 184 | - data: Dictionary of RMSF results per segment 185 | - selection: Atom selection string used 186 | - timestamp: When analysis was performed 187 | - metadata: Additional analysis information 188 | """ 189 | # Load trajectory and get reference frame 190 | universe, reference_frame = load_universe_and_reference(top_file, traj_file) 191 | 192 | # Get protein segment IDs 193 | protein_segids = get_seg_ids(universe) 194 | 195 | # Align trajectory to average structure (required for RMSF) 196 | aligned_universe = align_universe_to_reference( 197 | universe=universe, 198 | reference_frame=reference_frame, 199 | atoms_to_select=atoms_to_select, 200 | align_to_avg=True # Always True for RMSF 201 | ) 202 | 203 | # Calculate RMSF. Returns a dictionary of RMSF values for each protein segment 204 | return calculate_rmsf( 205 | aligned_universe=aligned_universe, 206 | protein_segids=protein_segids 207 | ) -------------------------------------------------------------------------------- /src/easy_md/utils/simulation_util.py: -------------------------------------------------------------------------------- 1 | # Third-party imports 2 | import numpy as np 3 | import os 4 | 5 | # from openmm.app import PDBFile, ForceField, NoCutoff, HBonds 6 | from openmm import( 7 | LangevinMiddleIntegrator, 8 | MonteCarloBarostat, 9 | MonteCarloAnisotropicBarostat, 10 | Platform, 11 | CustomExternalForce, 12 | openmm, 13 | unit 14 | ) 15 | from openmm.unit import(kilojoule_per_mole, nanometer) 16 | from openmm.app import(Simulation, 17 | StateDataReporter, 18 | DCDReporter, 19 | PDBFile, 20 | NoCutoff, 21 | HBonds, 22 | ForceField, 23 | PDBFile 24 | ) 25 | 26 | #------------------------------------------------------------------------------ 27 | # Helper Classes 28 | #------------------------------------------------------------------------------ 29 | class CheckpointReporter: 30 | """Reporter for saving simulation checkpoints.""" 31 | def __init__(self, file, reportInterval): 32 | self._reportInterval = reportInterval 33 | self._out = open(file, 'wb') 34 | 35 | def describeNextReport(self, simulation): 36 | steps = self._reportInterval - simulation.currentStep % self._reportInterval 37 | return (steps, False, False, False, False) 38 | 39 | def report(self, simulation, state): 40 | simulation.saveCheckpoint(self._out.name) 41 | 42 | 43 | #------------------------------------------------------------------------------ 44 | # Helper Functions 45 | #------------------------------------------------------------------------------ 46 | def setup_reporters(simulation, log_output, trajectory_output, checkpoint_output, saving_steps, total_steps): 47 | """Configures and adds state and trajectory reporters to the simulation.""" 48 | # Clear any existing reporters 49 | simulation.reporters.clear() 50 | 51 | reporter_args = { 52 | 'reportInterval': saving_steps, 53 | 'step': True, 54 | 'time': True, 55 | 'potentialEnergy': True, 56 | 'temperature': True, 57 | 'volume': True, 58 | 'separator': '\t', 59 | 'totalSteps': total_steps, 60 | 'speed': True, 61 | 'progress': True 62 | } 63 | 64 | # Add reporters for file and stdout 65 | for output in [log_output, None]: # None for stdout 66 | simulation.reporters.append(StateDataReporter(output, **reporter_args)) 67 | 68 | simulation.reporters.append(DCDReporter(trajectory_output, saving_steps)) 69 | 70 | if checkpoint_output is not None: 71 | simulation.reporters.append(CheckpointReporter(checkpoint_output, saving_steps)) 72 | 73 | 74 | def setup_simulation(omm_system, omm_top, platform_name, platform_properties, temperature, friction_coef, timestep): 75 | """Sets up the simulation with specified parameters and returns the configured simulation object.""" 76 | platform = Platform.getPlatformByName(platform_name) 77 | print(f"Platform being used: {platform.getName()}") 78 | 79 | # Initialize integrator 80 | integrator = LangevinMiddleIntegrator( 81 | temperature, 82 | friction_coef, 83 | timestep 84 | ) 85 | 86 | # Create simulation 87 | if platform_name == "CUDA": 88 | simulation = Simulation(omm_top, omm_system, integrator, platform, platform_properties) 89 | precision = platform.getPropertyValue(simulation.context, "Precision") 90 | print(f"Precision being used: {precision}") 91 | else: 92 | simulation = Simulation(omm_top, omm_system, integrator, platform) 93 | 94 | return simulation 95 | 96 | def setup_force_restraints(reference_structure, residue_indices, force_constant=100): 97 | """Sets up force restraints on the simulation.""" 98 | ref_pdb = PDBFile(reference_structure) 99 | ref_positions = ref_pdb.getPositions() 100 | 101 | print(f"Adding harmonic positional restraints with force constant {force_constant} kJ/mol/nm^2...") 102 | # Reduce force constant to avoid instability 103 | force_k = force_constant * kilojoule_per_mole/nanometer**2 104 | restraint_force = CustomExternalForce("0.5 * k * ((x-x0)^2 + (y-y0)^2 + (z-z0)^2)") 105 | restraint_force.addGlobalParameter("k", force_k) 106 | 107 | # Add per-particle parameters 108 | restraint_force.addPerParticleParameter("x0") 109 | restraint_force.addPerParticleParameter("y0") 110 | restraint_force.addPerParticleParameter("z0") 111 | 112 | # Add per-particle reference coordinates 113 | num_restrained = 0 114 | chain_restraints = {} 115 | 116 | # Single loop handling both cases 117 | for idx, atom in enumerate(ref_pdb.topology.atoms()): 118 | # Combine conditions: atom must be CA and either residue_indices is empty (apply to all) 119 | # or the residue id is within the specified range 120 | if (atom.name == "CA" and 121 | (len(residue_indices) == 0 or 122 | (residue_indices[0] <= int(atom.residue.id) <= residue_indices[1]))): 123 | 124 | x, y, z = ref_positions[idx].value_in_unit(nanometer) 125 | restraint_force.addParticle(idx, (x, y, z)) 126 | num_restrained += 1 127 | 128 | # Update chain restraints using dict.get() 129 | chain_id = atom.residue.chain.id 130 | chain_restraints[chain_id] = chain_restraints.get(chain_id, 0) + 1 131 | 132 | print(f"Added positional restraints to {num_restrained} heavy atoms") 133 | print(f"Restraints per chain: {chain_restraints}") 134 | 135 | return restraint_force 136 | 137 | def setup_barostat(temperature, pressure, barostat_frequency, use_anisotropic=False): 138 | """Configure and return appropriate barostat based on simulation type.""" 139 | print(f"Setting up barostat with pressure {pressure} bar and temperature {temperature} K") 140 | if use_anisotropic: 141 | pressure_tuple = (pressure, pressure, pressure) 142 | scaleXYZ = (True, True, True) 143 | return MonteCarloAnisotropicBarostat( 144 | pressure_tuple, temperature, 145 | scaleXYZ[0], scaleXYZ[1], scaleXYZ[2], 146 | barostat_frequency 147 | ) 148 | else: 149 | return MonteCarloBarostat(pressure, temperature, barostat_frequency) 150 | 151 | def load_state_or_checkpoint(simulation, temp, state_file=None, checkpoint_file=None): 152 | """Loads a state or checkpoint file into the simulation to continue equilibration or simulation.""" 153 | try: 154 | # If load_from_state is explicitly True, or if state_file is provided and exists 155 | if (state_file and os.path.exists(state_file)): 156 | if not state_file: 157 | raise ValueError("state_file must be provided when load_from_state is True") 158 | if not isinstance(state_file, str): 159 | raise TypeError("state_file must be a string path") 160 | 161 | try: 162 | simulation.loadState(state_file) 163 | simulation.currentStep = 0 164 | simulation.context.setTime(0) 165 | simulation.context.setVelocitiesToTemperature(temp) 166 | print(f"Successfully loaded state from {state_file}") 167 | except Exception as e: 168 | raise RuntimeError(f"Failed to load state file {state_file}: {str(e)}") 169 | 170 | elif checkpoint_file: 171 | if not isinstance(checkpoint_file, str): 172 | raise TypeError("checkpoint_file must be a string path") 173 | 174 | try: 175 | simulation.loadCheckpoint(checkpoint_file) 176 | print(f"Successfully loaded checkpoint from {checkpoint_file}") 177 | except Exception as e: 178 | raise RuntimeError(f"Failed to load checkpoint file {checkpoint_file}: {str(e)}") 179 | else: 180 | raise ValueError("Either state_file (with load_from_state=True) or checkpoint_file must be provided") 181 | 182 | except FileNotFoundError as e: 183 | print(f"Error: File not found - {str(e)}") 184 | raise 185 | except (ValueError, TypeError) as e: 186 | print(f"Error: Invalid input - {str(e)}") 187 | raise 188 | except RuntimeError as e: 189 | print(f"Error: {str(e)}") 190 | raise 191 | except Exception as e: 192 | print(f"Unexpected error occurred: {str(e)}") 193 | raise 194 | 195 | return simulation 196 | 197 | def print_constraint_info(system, top): 198 | """Prints constraint information from the system.""" 199 | # Debug: Print topology constraint information 200 | print(f"Number of constraints: {system.getNumConstraints()}") 201 | for i in range(min(5, system.getNumConstraints())): # Print first 5 constraints as example 202 | constraint = system.getConstraintParameters(i) 203 | print(f"Constraint {i}: particles {constraint[0]}-{constraint[1]}, distance {constraint[2]}") 204 | 205 | bonds = list(top.bonds()) 206 | print(f"Number of bonds in topology: {len(bonds)}") 207 | if bonds: 208 | print("First few bonds:") 209 | for bond in bonds[:5]: 210 | print(f"Bond: {bond[0].name}-{bond[1].name}") 211 | 212 | def print_system_charge(pdb_filepath, config): 213 | pdb = PDBFile(pdb_filepath) 214 | 215 | # Load a standard force field (AMBER in this case) 216 | forcefield = ForceField(config['forcefield_protein'], config['forcefield_solvent']) 217 | 218 | # Create a system from the PDB topology and the force field 219 | system = forcefield.createSystem(pdb.topology, nonbondedMethod=NoCutoff, constraints=HBonds) 220 | 221 | # Initialize a variable to hold the total charge 222 | total_charge = 0.0 223 | 224 | # Iterate over all forces to find the NonbondedForce, which contains charge information 225 | for force in system.getForces(): 226 | if isinstance(force, openmm.NonbondedForce): 227 | for i in range(force.getNumParticles()): 228 | # For each particle (atom) in the force, extract the charge (the first element of the tuple returned by getParticleParameters) 229 | charge, _, _ = force.getParticleParameters(i) 230 | # Sum up the charges 231 | total_charge += charge.value_in_unit(unit.elementary_charge) 232 | 233 | # Print the total charge of the system 234 | print(f"Total system charge: {total_charge} e") 235 | 236 | def check_equilibration(simulation, temp_std_threshold, energy_std_threshold, temp_window, energy_window, window_size): 237 | """Checks if the system has reached equilibrium based on temperature and energy fluctuations.""" 238 | potential_energy, temperature = get_state_info(simulation) 239 | # Monitor equilibration 240 | temp_window.append(temperature) 241 | energy_window.append(potential_energy) 242 | 243 | if len(temp_window) == window_size: 244 | temp_std = np.std(temp_window) 245 | energy_std = np.std(energy_window) 246 | print(f"Temperature std dev: {temp_std:.2f} K") 247 | print(f"Potential Energy std dev: {energy_std:.2f} kJ/mol") 248 | if temp_std < temp_std_threshold and energy_std < energy_std_threshold: 249 | print("\nSystem has reached equilibrium!") 250 | print(f"Temperature std dev: {temp_std:.2f} K (threshold: {temp_std_threshold} K)") 251 | print(f"Potential Energy std dev: {energy_std:.2f} kJ/mol (threshold: {energy_std_threshold} kJ/mol)") 252 | return True 253 | 254 | return False 255 | 256 | def get_state_info(simulation): 257 | """Get state information from the simulation.""" 258 | state = simulation.context.getState(getEnergy=True) 259 | potential_energy = state.getPotentialEnergy().value_in_unit(unit.kilojoules_per_mole) 260 | dof = 3 * simulation.system.getNumParticles() - simulation.system.getNumConstraints() 261 | temperature = (2*state.getKineticEnergy()/(dof*unit.MOLAR_GAS_CONSTANT_R)).value_in_unit(unit.kelvin) 262 | return potential_energy, temperature 263 | 264 | def transfer_state_with_precision(temp_omm_system, temp_omm_top, checkpoint_file, config): 265 | """ 266 | If you continue a simulation from a checkpoint that used another precision than the one you want to use. 267 | First determines the source checkpoint precision by trying different precisions. Then get the state 268 | from the checkpoint which you can apply to a new simulation object with any precision. 269 | 270 | Parameters: 271 | ----------- 272 | checkpoint_file : str 273 | Path to the checkpoint file 274 | omm_top : object 275 | OpenMM topology object 276 | system : object 277 | OpenMM system object 278 | target_precision : str 279 | Target precision for the new simulation ('mixed', 'single', or 'double') 280 | 281 | Returns: 282 | -------- 283 | Simulation 284 | New simulation object with the loaded state and specified precision 285 | 286 | return state 287 | 288 | Example: 289 | # Create new simulation with target precision 290 | integrator = LangevinIntegrator(temperature, friction, time_step) 291 | integrator.setRandomNumberSeed(random_seed) 292 | platform_properties = {'Precision': target_precision} 293 | simulation = Simulation(omm_top, system, integrator, platform, platform_properties) 294 | 295 | # Transfer state to new simulation 296 | state = transfer_state_with_precision(....) 297 | simulation.context.setState(state) 298 | """ 299 | # Try loading with different precisions to determine the source precision 300 | precisions = ['double', 'mixed', 'single'] 301 | source_precision = None 302 | 303 | for precision in precisions: 304 | try: 305 | # We need to create a simulation object with the same precision and platform properties as the checkpoint file. 306 | # For example if we ran our first simulation on a CPU with mixed precision and now. 307 | # Checkpoint does not store precision information we have to use trial and error. Provide the settings for the checkpoint. 308 | # want to convert it to GPU with double precision. The temporation simulation 309 | # has to match the first simulation (CPU, mixed precision) 310 | temp_integrator = LangevinMiddleIntegrator(config.get('integrator_temperature'), 311 | config.get('integrator_friction'), 312 | config.get('integrator_timestep')) 313 | temp_integrator.setRandomNumberSeed(config.get('random_seed')) 314 | temp_platform = Platform.getPlatformByName('CPU') 315 | temp_platform_properties = {'Precision': precision} 316 | temp_simulation = Simulation(temp_omm_top, temp_omm_system, temp_integrator, temp_platform, temp_platform_properties) 317 | 318 | # Load the checkpoint with old precision into simulation object 319 | # Determine simulation 320 | temp_simulation.loadCheckpoint(checkpoint_file) 321 | source_precision = precision 322 | print(f"Successfully loaded checkpoint with {precision} precision") 323 | 324 | # Once you have extracted the state from the checkpoint using your 325 | # transfer_state_with_precision function, you can apply that state to a 326 | # new simulation object with any precision. 327 | # The state object contains the numerical data (positions, velocities, etc.) 328 | # and is precision-agnostic. 329 | state = temp_simulation.context.getState(getPositions=True, getVelocities=True, getEnergy=True) 330 | return state 331 | except Exception as e: 332 | print(f"Failed to load checkpoint with {precision} precision: {str(e)}") 333 | continue 334 | 335 | if source_precision is None: 336 | raise ValueError("Could not determine the precision of the checkpoint file") 337 | 338 | 339 | 340 | -------------------------------------------------------------------------------- /tests/__pycache__/conftest.cpython-39-pytest-8.4.0.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/tests/__pycache__/conftest.cpython-39-pytest-8.4.0.pyc -------------------------------------------------------------------------------- /tests/__pycache__/test_energy_minimization.cpython-39-pytest-8.4.0.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/tests/__pycache__/test_energy_minimization.cpython-39-pytest-8.4.0.pyc -------------------------------------------------------------------------------- /tests/__pycache__/test_run_simulation.cpython-39-pytest-8.4.0.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/tests/__pycache__/test_run_simulation.cpython-39-pytest-8.4.0.pyc -------------------------------------------------------------------------------- /tests/__pycache__/test_setup.cpython-39-pytest-8.4.0.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ingcoder/easy-md/127873156b8175852af2fb30a954db255ccaa4e2/tests/__pycache__/test_setup.cpython-39-pytest-8.4.0.pyc -------------------------------------------------------------------------------- /tests/test_setup.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | import yaml 4 | from pathlib import Path 5 | 6 | # Test if all required packages are installed 7 | def test_imports(): 8 | """Test that all required packages can be imported.""" 9 | try: 10 | import openmm 11 | import openff 12 | import numpy 13 | import mdtraj 14 | except ImportError as e: 15 | pytest.fail(f"Failed to import required package: {str(e)}") 16 | 17 | def test_config_creation(tmp_path): 18 | """Test that configuration file can be created with basic settings.""" 19 | from easy_md.utils.config import create_config 20 | 21 | # Create a mock protein file 22 | protein_file = tmp_path / "test.pdb" 23 | protein_file.write_text("MOCK PDB CONTENT") 24 | 25 | # Create configuration 26 | config = create_config( 27 | protein_file=str(protein_file), 28 | project_dir=str(tmp_path), 29 | output_dir=str(tmp_path / "output") 30 | ) 31 | 32 | # Check if config contains essential keys 33 | essential_keys = [ 34 | 'path_protein', 35 | 'path_openmm_system', 36 | 'path_openmm_topology', 37 | 'platform_name', 38 | 'platform_precision' 39 | ] 40 | 41 | for key in essential_keys: 42 | assert key in config, f"Missing essential key in config: {key}" 43 | 44 | # Check if output directory was created 45 | assert os.path.exists(tmp_path / "output"), "Output directory was not created" 46 | 47 | # Check if config file was saved 48 | config_file = tmp_path / "config" / "simulation_config.yaml" 49 | assert config_file.exists(), "Config file was not saved" 50 | 51 | # Verify config file content 52 | with open(config_file) as f: 53 | saved_config = yaml.safe_load(f) 54 | assert isinstance(saved_config, dict), "Saved config is not a valid YAML dictionary" 55 | 56 | def test_directory_structure(tmp_path): 57 | """Test that the expected directory structure is created.""" 58 | from easy_md.utils.config import create_config 59 | 60 | # Create basic configuration 61 | protein_file = tmp_path / "test.pdb" 62 | protein_file.write_text("MOCK PDB CONTENT") 63 | 64 | config = create_config( 65 | protein_file=str(protein_file), 66 | project_dir=str(tmp_path) 67 | ) 68 | 69 | # Check essential directories 70 | essential_dirs = [ 71 | tmp_path / "output", 72 | tmp_path / "config" 73 | ] 74 | 75 | for directory in essential_dirs: 76 | assert directory.exists(), f"Directory not created: {directory}" 77 | assert directory.is_dir(), f"Path exists but is not a directory: {directory}" 78 | 79 | def test_file_paths(tmp_path): 80 | """Test that file paths are correctly configured.""" 81 | from easy_md.utils.config import create_config 82 | 83 | # Create mock files 84 | protein_file = tmp_path / "test.pdb" 85 | protein_file.write_text("MOCK PDB CONTENT") 86 | 87 | config = create_config( 88 | protein_file=str(protein_file), 89 | project_dir=str(tmp_path) 90 | ) 91 | 92 | # Check that paths are properly set 93 | assert config['path_protein'] == str(protein_file) 94 | assert config['path_base'] == str(tmp_path) 95 | assert config['path_openmm_system'].startswith(str(tmp_path)) 96 | assert config['path_openmm_topology'].startswith(str(tmp_path)) 97 | 98 | # Check that paths use correct separators for the OS 99 | for key, path in config.items(): 100 | if key.startswith('path_') and isinstance(path, str) and path: # Only check non-empty paths 101 | assert '\\\\' not in path, f"Invalid path separator in {key}: {path}" 102 | assert os.path.sep in path or path == str(protein_file), f"Missing path separator in {key}: {path}" --------------------------------------------------------------------------------