204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | formats: all
3 | conda:
4 | environment: docs/environment.yml
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | cache: pip
3 | python:
4 | - "3.6"
5 | - "3.7"
6 | - "3.8"
7 | before_install:
8 | - pip install -U pip
9 | - pip install codecov
10 | install:
11 | - pip install -r requirements.txt
12 | script:
13 | - python -m unittest discover -s tests
14 | after_success:
15 | - bash <(curl -s https://codecov.io/bash)
16 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Wanling Song
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 |
2 | ==========================================================
3 | PyLipID - A Python Package For Lipid Interactions Analysis
4 | ==========================================================
5 |
6 | .. image:: https://travis-ci.com/wlsong/PyLipID.svg?branch=master
7 | :target: https://travis-ci.com/github/wlsong/PyLipID
8 | .. image:: https://img.shields.io/pypi/v/PyLipID
9 | :target: https://pypi.org/project/pylipid/
10 |
11 | .. image:: docs/static/pylipid_logo_smallsize.png
12 | :align: center
13 |
14 |
15 | PyLipID is a python package for analyzing lipid interactions with membrane proteins from
16 | Molecular Dynamics Simulations. PyLipID has the following main features, please check out
17 | the tutorials for examples and the documentations for the API functionalities:
18 |
19 | * Detection of binding sites via calculating community structures in the interactions networks.
20 | * Calculation of lipid koff and residence time for interaction with binding sites and residues.
21 | * Analysis of lipid interactions with binding sites and residues using a couple of metrics.
22 | * Generation of representative bound poses for binding sites.
23 | * Analysis of bound poses for binding sites via automated clustering scheme.
24 | * Adoption of a dual-cutoff scheme to overcome the 'rattling in cage' effect of coarse-grained simulations.
25 | * Generation of manuscript-ready figures for analysis.
26 |
27 | PyLipID can be used from Jupyter (former IPython, recommended), or by writing Python scripts.
28 | The documentaion and tutorials can be found at `pylipid.readthedocs.io `_.
29 |
30 | Installation
31 | ============
32 |
33 | We recommend installing PyLipID using the package installer `pip`:
34 |
35 | ``pip install pylipid``
36 |
37 | Alternatively, PyLipID can be installed from the source code. The package is available for
38 | download on Github via:
39 |
40 | ``git clone https://github.com/wlsong/PyLipID``
41 |
42 | Once the source code is downloaded, enter the source code directory and install the package as follow:
43 |
44 | ``python setup.py install``
45 |
46 |
47 | Citation |DOI for Citing PyEMMA|
48 | ================================
49 |
50 | If you use PyLipID in scientific research, please cite the following paper: ::
51 |
52 | @article{song_pylipid_2022,
53 | author = {Song, Wanling. and Corey, Robin A. and Ansell, T. Bertie. and
54 | Cassidy, C. Keith. and Horrell, Michael R. and Duncan, Anna L.
55 | and Stansfeld, Phillip J. and Sansom, Mark S.P.},
56 | title = {PyLipID: A Python package for analysis of protein-lipid interactions from MD simulations},
57 | journal = {J. Chem. Theory Comput},
58 | year = {2022},
59 | url = {https://doi.org/10.1021/acs.jctc.1c00708},
60 | doi = {10.1021/acs.jctc.1c00708},
61 | urldate = {2022-02-18},
62 | }
63 |
64 | .. |DOI for Citing PyEMMA| image:: https://img.shields.io/badge/DOI-10.1021/acs.jctc.1c00708-blue
65 | :target: https://doi.org/10.1021/acs.jctc.1c00708
66 |
--------------------------------------------------------------------------------
/docs/INSTALL.rst:
--------------------------------------------------------------------------------
1 |
2 | ============
3 | Installation
4 | ============
5 |
6 | To install the PyLipID Python package, you need a few Python package dependencies. These dependencies
7 | include:
8 |
9 | - mdtraj
10 | - numpy
11 | - pandas
12 | - matplotlib
13 | - seaborn
14 | - networkx
15 | - scipy
16 | - python-louvain
17 | - logomaker
18 | - statsmodels
19 | - scikit-learn
20 | - tqdm
21 | - kneebow
22 |
23 | If these dependencies are not available in their required versions, the installation will fail. We thus
24 | recommend using the package installer pip:
25 |
26 | .. code-block:: bash
27 |
28 | $ pip install pylipid
29 |
30 | If you know what you are doing and want to build from the source, the package is also available for
31 | download on Github via:
32 |
33 | .. code-block:: bash
34 |
35 | $ git clone git://github.com/wlsong/PyLipID.git
36 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/api/index.rst:
--------------------------------------------------------------------------------
1 |
2 | =============
3 | Documentation
4 | =============
5 |
6 | PyLipID is a Python library and can be used through python or ipython. See Tutorials for
7 | examples and getting started. The package contains 4 modules:
8 |
9 | The ``api`` module provides the outer layer functions and classes for calculation:
10 |
11 | .. toctree::
12 | :maxdepth: 1
13 |
14 | index_LipidInteraction
15 |
16 | The ``func`` module contains functions that do the heavy-lifting:
17 |
18 | .. toctree::
19 | :maxdepth: 1
20 |
21 | index_func
22 |
23 | The ``plot`` module contains plotting functions for aiding in the scientific analysis:
24 |
25 | .. toctree::
26 | :maxdepth: 1
27 |
28 | index_plot
29 |
30 | The ``util`` module contains other assisting functions:
31 |
32 | .. toctree::
33 | :maxdepth: 1
34 |
35 | index_util
36 |
37 |
--------------------------------------------------------------------------------
/docs/api/index_LipidInteraction.rst:
--------------------------------------------------------------------------------
1 |
2 | pylipid.api
3 | ===========
4 |
5 | .. currentmodule:: pylipid.api
6 |
7 | .. autoclass:: LipidInteraction
8 |
9 | .. rubric:: Methods
10 |
11 | .. autosummary::
12 | :toctree: generated/
13 |
14 | ~LipidInteraction.collect_residue_contacts
15 | ~LipidInteraction.compute_residue_duration
16 | ~LipidInteraction.compute_residue_occupancy
17 | ~LipidInteraction.compute_residue_lipidcount
18 | ~LipidInteraction.compute_residue_koff
19 | ~LipidInteraction.compute_binding_nodes
20 | ~LipidInteraction.compute_site_duration
21 | ~LipidInteraction.compute_site_occupancy
22 | ~LipidInteraction.compute_site_lipidcount
23 | ~LipidInteraction.compute_site_koff
24 | ~LipidInteraction.analyze_bound_poses
25 | ~LipidInteraction.compute_surface_area
26 | ~LipidInteraction.write_site_info
27 | ~LipidInteraction.show_stats_per_traj
28 | ~LipidInteraction.save_data
29 | ~LipidInteraction.save_coordinate
30 | ~LipidInteraction.save_pymol_script
31 | ~LipidInteraction.plot
32 | ~LipidInteraction.plot_logo
33 |
34 |
35 | .. rubric:: Attributes
36 |
37 | .. autosummary::
38 | :toctree: generated
39 |
40 | ~LipidInteraction.dataset
41 | ~LipidInteraction.residue_list
42 | ~LipidInteraction.node_list
43 | ~LipidInteraction.lipid
44 | ~LipidInteraction.lipid_atoms
45 | ~LipidInteraction.cutoffs
46 | ~LipidInteraction.nprot
47 | ~LipidInteraction.stride
48 | ~LipidInteraction.dt_traj
49 | ~LipidInteraction.trajfile_list
50 | ~LipidInteraction.topfile_list
51 | ~LipidInteraction.resi_offset
52 | ~LipidInteraction.save_dir
53 | ~LipidInteraction.timeunit
54 | ~LipidInteraction.koff
55 | ~LipidInteraction.res_time
56 | ~LipidInteraction.koff_bs
57 | ~LipidInteraction.res_time_bs
58 | ~LipidInteraction.residue
59 | ~LipidInteraction.binding_site
60 |
61 |
62 |
63 |
64 |
--------------------------------------------------------------------------------
/docs/api/index_func.rst:
--------------------------------------------------------------------------------
1 |
2 | pylipid.func
3 | ==============
4 |
5 | The ``func`` package provides tools to collect lipid interactions, calculate koff/residence time,
6 | and calculate binding sites in PyLipID
7 |
8 |
9 | .. currentmodule:: pylipid.func
10 |
11 | .. rubric:: binding_site
12 |
13 | .. autosummary::
14 | :toctree: generated/
15 |
16 | ~get_node_list
17 | ~collect_bound_poses
18 | ~vectorize_poses
19 | ~calculate_scores
20 | ~write_bound_poses
21 | ~analyze_pose_wrapper
22 | ~calculate_surface_area_wrapper
23 |
24 |
25 | .. rubric:: clusterer
26 |
27 | .. autosummary::
28 | :toctree: generated/
29 |
30 | ~cluster_DBSCAN
31 | ~cluster_KMeans
32 |
33 |
34 | .. rubric:: interactions
35 |
36 | .. autosummary::
37 | :toctree: generated/
38 |
39 | ~cal_contact_residues
40 | ~cal_occupancy
41 | ~cal_lipidcount
42 | ~Duration
43 | .. autoclass:: ~Duration
44 |
45 |
46 | .. rubric:: kinetics
47 |
48 | .. autosummary::
49 | :toctree: generated/
50 |
51 | ~cal_koff
52 | ~cal_survival_func
53 | ~calculate_koff_wrapper
54 |
--------------------------------------------------------------------------------
/docs/api/index_plot.rst:
--------------------------------------------------------------------------------
1 |
2 | pylipid.plot
3 | ==============
4 |
5 | The ``plot`` package provides tools to plot the generated data in PyLipID.
6 |
7 |
8 | .. currentmodule:: pylipid.plot
9 |
10 | .. autosummary::
11 | :toctree: generated/
12 |
13 | ~plot_koff
14 | ~plot_residue_data
15 | ~plot_residue_data_logo
16 | ~plot_binding_site_data
17 | ~plot_surface_area
18 | ~plot_corrcoef
19 |
20 |
--------------------------------------------------------------------------------
/docs/api/index_util.rst:
--------------------------------------------------------------------------------
1 |
2 | pylipid.util
3 | ==============
4 |
5 | The ``util`` package provides assisting tools to deal with e.g. the writing of pdb files,
6 | path checking and writing pymol script etc.
7 |
8 |
9 | .. currentmodule:: pylipid.util
10 |
11 | .. autosummary::
12 | :toctree: generated/
13 |
14 | ~check_dir
15 | ~write_PDB
16 | ~write_pymol_script
17 | ~sparse_corrcoef
18 | ~rmsd
19 | ~get_traj_info
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | sys.path.insert(0, os.path.abspath('..'))
16 | sys.path.insert(0, os.path.abspath('../pylipid'))
17 |
18 | # import sphinx_rtd_theme
19 |
20 | # -- Project information -----------------------------------------------------
21 |
22 | project = 'PyLipID'
23 | copyright = '2020, Wanling Song'
24 | author = 'Wanling Song'
25 |
26 |
27 | # -- General configuration ---------------------------------------------------
28 |
29 | autoclass_content = "both" # include both class docstring and __init__
30 | autodoc_default_flags = [
31 | # Make sure that any autodoc declarations show the right members
32 | "members",
33 | "inherited-members",
34 | "private-members",
35 | "show-inheritance",
36 | ]
37 | autoclass_content = "init"
38 |
39 | autodoc_default_options = {
40 | 'member-order': 'bysource',
41 | 'special-members': '__init__',
42 | 'undoc-members': True,
43 | 'exclude-members': '__init__'
44 | }
45 |
46 |
47 | # autosummary
48 | autosummary_generate = True
49 | autodoc_default_flags = ['members', 'inherited-members']
50 |
51 | # Napoleon settings
52 | napoleon_google_docstring = False
53 | napoleon_numpy_docstring = True
54 | napoleon_include_private_with_doc = False
55 | napoleon_include_special_with_doc = False
56 | napoleon_use_admonition_for_examples = False
57 | napoleon_use_admonition_for_notes = False
58 | napoleon_use_admonition_for_references = False
59 | napoleon_use_ivar = True
60 | napoleon_use_param = True
61 | napoleon_use_rtype = True
62 |
63 | extensions = [
64 | 'sphinx_rtd_theme',
65 | 'sphinx.ext.intersphinx',
66 | 'sphinx.ext.autodoc',
67 | 'sphinx.ext.autosummary',
68 | 'sphinx.ext.napoleon',
69 | 'sphinx.ext.coverage',
70 | 'sphinx.ext.mathjax',
71 | 'sphinx.ext.viewcode',
72 | 'nbsphinx'
73 | ]
74 |
75 | # Add any paths that contain templates here, relative to this directory.
76 | templates_path = ['_templates']
77 |
78 | # List of patterns, relative to source directory, that match files and
79 | # directories to ignore when looking for source files.
80 | # This pattern also affects html_static_path and html_extra_path.
81 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
82 |
83 | # -- Options for HTML output -------------------------------------------------
84 |
85 | # The theme to use for HTML and HTML Help pages. See the documentation for
86 | # a list of builtin themes.
87 | #
88 | html_theme = 'furo'
89 | html_static_path = ['static']
90 | html_logo = 'static/pylipid_logo.png'
91 | html_theme_options = {
92 | "sidebar_hide_name": True,
93 | "navigation_with_keys": True,
94 | }
95 |
96 | exclude_patterns = ['_build', '*_test*', '**/.ipynb_checkpoints/*']
97 |
98 | # -----------------------------------------------------------------------------
99 | # Autosummary
100 | # -----------------------------------------------------------------------------
101 |
102 |
103 |
104 |
105 |
--------------------------------------------------------------------------------
/docs/demo.rst:
--------------------------------------------------------------------------------
1 |
2 | ====
3 | Demo
4 | ====
5 |
6 | Here we provide a no-brainer demo script for lipid interaction analysis using PyLipID. This script works
7 | for versions later than 1.4. Please update PyLipID to the latest version ::
8 |
9 | import numpy as np
10 | import matplotlib.pyplot as plt
11 | from pylipid.api import LipidInteraction
12 | from pylipid.util import check_dir
13 |
14 | ##################################################################
15 | ##### This part needs changes according to your setting ##########
16 | ##################################################################
17 | trajfile_list = ["run1/md.xtc", "run2/md.xtc"]
18 | topfile_list = ["run1/md.gro", "run2/md.gro"] # topology file is needed when trajectory format does not
19 | # provide topology information. See mdtraj.load() for more
20 | # information.
21 | dt_traj = None # the timestep of trajectories. Need to use this param when trajectories are in a format
22 | # with no timestep information. Not necessary for trajectory formats of e.g. xtc, trr.
23 | stride = 1 # tell pylipid to analyze every stride-th frame. Can be used to save computation memory
24 | # and speed up the calculation.
25 |
26 | lipid = "CHOL" # residue name in the topology.
27 | lipid_atoms = None # all lipid atoms will be considered for interaction calculation.
28 | cutoffs = [0.5, 0.8] # dual-cutoff scheme for coarse-grained simulations. Single-cutoff scheme can be
29 | # achieved by using the same value for two cutoffs.
30 |
31 | nprot = 1 # if the simulation system has N copies of receptors, "nprot=N" will report interactions
32 | # averaged from the N copies, but "nprot=1" will ask PyLipID to report interaction for
33 | # each copy.
34 |
35 | binding_site_size = 4 # binding site should contain at least four residues.
36 |
37 | n_top_poses = 3 # write out num. of representative bound poses for each binding site.
38 | n_clusters = "auto" # cluster the bound poses for a binding site into num. of clusters. PyLipID
39 | # will write out a pose conformation for each of the cluster. By default, i.e.
40 | # "auto", PyLipID will use a density based clusterer to find possible clusters.
41 |
42 | save_dir = None # save at current working directory if it is None.
43 | save_pose_format = "gro" # format that poses are written in
44 | save_pose_traj = True # save all the bound poses in a trajectory for each binding site. The generated
45 | # trajectories can take some disk space (up to a couple GB depending on your system).
46 | save_pose_traj_format = "xtc" # The format for the saved pose trajectories. Can take any format that is supported
47 | # by mdtraj.
48 |
49 | timeunit = "us" # micro-sec. "ns" is nanosecond. Time unit used for reporting the results.
50 | resi_offset = 0 # shift the residue index, useful for MARTINI models.
51 |
52 | radii = None # Radii of protein atoms/beads. In the format of python dictionary {atom_name: radius}
53 | # Used for calculation of binding site surface area. The van der waals radii of common atoms were
54 | # defined by mdtraj (https://github.com/mdtraj/mdtraj/blob/master/mdtraj/geometry/sasa.py#L56).
55 | # The radii of MARTINI 2.2 beads were included in PyLipID.
56 |
57 | pdb_file_to_map = None # if a pdb coordinate of the receptor is provided, a python script
58 | # "show_binding_site_info.py" will be generated which maps the binding
59 | # site information to the structure in PyMol. As PyMol cannot recognize
60 | # coarse-grained structures, an atomistic structure of the receptor is needed.
61 |
62 | fig_format = "pdf" # format for all pylipid produced figures. Allow for formats that are supported by
63 | # matplotlib.pyplot.savefig().
64 |
65 | num_cpus = None # the number of cpu to use when functions are using multiprocessing. By default,
66 | # i.e. None, the functions will use up all the cpus available. This can use up all the memory in
67 | # some cases.
68 |
69 | #####################################
70 | ###### no changes needed below ######
71 | #####################################
72 |
73 | #### calculate lipid interactions
74 | li = LipidInteraction(trajfile_list, topfile_list=topfile_list, cutoffs=cutoffs, lipid=lipid,
75 | lipid_atoms=lipid_atoms, nprot=1, resi_offset=resi_offset,
76 | timeunit=timeunit, save_dir=save_dir, stride=stride, dt_traj=dt_traj)
77 | li.collect_residue_contacts()
78 | li.compute_residue_duration(residue_id=None)
79 | li.compute_residue_occupancy(residue_id=None)
80 | li.compute_residue_lipidcount(residue_id=None)
81 | li.show_stats_per_traj(write_log=True, print_log=True)
82 | li.compute_residue_koff(residue_id=None, plot_data=True, fig_close=True,
83 | fig_format=fig_format, num_cpus=num_cpus)
84 | li.compute_binding_nodes(threshold=binding_site_size, print_data=False)
85 | if len(li.node_list) == 0:
86 | print("*"*50)
87 | print("No binding site detected! Skip analysis for binding sites.")
88 | print("*"*50)
89 | else:
90 | li.compute_site_duration(binding_site_id=None)
91 | li.compute_site_occupancy(binding_site_id=None)
92 | li.compute_site_lipidcount(binding_site_id=None)
93 | li.compute_site_koff(binding_site_id=None, plot_data=True, fig_close=True,
94 | fig_format=fig_format, num_cpus=num_cpus)
95 | pose_traj, pose_rmsd_data = li.analyze_bound_poses(binding_site_id=None, pose_format=save_pose_format,
96 | n_top_poses=n_top_poses, n_clusters=n_clusters,
97 | fig_format=fig_format, num_cpus=num_cpus)
98 | # save pose trajectories
99 | if save_pose_traj:
100 | for bs_id in pose_traj.keys():
101 | pose_traj[bs_id].save("{}/Bound_Poses_{}/Pose_traj_BSid{}.{}".format(li.save_dir, li.lipid, bs_id,
102 | save_pose_traj_format))
103 | del pose_traj # save memory space
104 | surface_area_data = li.compute_surface_area(binding_site_id=None, radii=radii, fig_format=fig_format)
105 | data_dir = check_dir(li.save_dir, "Dataset_{}".format(li.lipid))
106 | pose_rmsd_data.to_csv("{}/Pose_RMSD_data.csv".format(data_dir), index=False, header=True)
107 | surface_area_data.to_csv("{}/Surface_Area_data.csv".format(data_dir), index=True, header=True)
108 | li.write_site_info(sort_residue="Residence Time")
109 |
110 | if pdb_file_to_map is not None:
111 | li.save_pymol_script(pdb_file_to_map)
112 |
113 | #### write and save data
114 | for item in ["Dataset", "Duration", "Occupancy", "Lipid Count", "CorrCoef"]:
115 | li.save_data(item=item)
116 | for item in ["Residence Time", "Duration", "Occupancy", "Lipid Count"]:
117 | li.save_coordinate(item=item)
118 | for item in ["Residence Time", "Duration", "Occupancy", "Lipid Count"]:
119 | li.plot(item=item, fig_close=True, fig_format=fig_format)
120 | li.plot_logo(item=item, fig_close=True, fig_format=fig_format)
121 |
122 | #### plot binding site comparison.
123 | if len(li.node_list) > 0:
124 | for item in ["Duration BS", "Occupancy BS"]:
125 | li.save_data(item=item)
126 |
127 | ylabel_timeunit = 'ns' if li.timeunit == "ns" else r"$\mu$s"
128 | ylabel_dict = {"Residence Time": "Residence Time ({})".format(ylabel_timeunit),
129 | "Duration": "Duration ({})".format(ylabel_timeunit),
130 | "Occupancy": "Occuoancy (100%)",
131 | "Lipid Count": "Lipid Count (num.)"}
132 |
133 | # plot No. 1
134 | binding_site_IDs = np.sort(
135 | [int(bs_id) for bs_id in li.dataset["Binding Site ID"].unique() if bs_id != -1])
136 | for item in ["Residence Time", "Duration", "Occupancy", "Lipid Count"]:
137 | item_values = np.array(
138 | [li.dataset[li.dataset["Binding Site ID"]==bs_id]["Binding Site {}".format(item)].unique()[0]
139 | for bs_id in binding_site_IDs])
140 | fig, ax = plt.subplots(1, 1, figsize=(len(li.node_list)*0.5, 2.6))
141 | ax.scatter(np.arange(len(item_values)), np.sort(item_values)[::-1], s=50, color="red")
142 | ax.set_xticks(np.arange(len(item_values)))
143 | sorted_index = np.argsort(item_values)[::-1]
144 | ax.set_xticklabels(binding_site_IDs[sorted_index])
145 | ax.set_xlabel("Binding Site ID", fontsize=12)
146 | ax.set_ylabel(ylabel_dict[item], fontsize=12)
147 | for label in ax.xaxis.get_ticklabels()+ax.yaxis.get_ticklabels():
148 | plt.setp(label, fontsize=12, weight="normal")
149 | plt.tight_layout()
150 | plt.savefig("{}/{}_{}_v_binding_site.{}".format(li.save_dir, li.lipid, "_".join(item.split()), fig_format),
151 | dpi=200)
152 | plt.close()
153 |
154 | # plot No. 2
155 | binding_site_IDs_RMSD = np.sort([int(bs_id) for bs_id in binding_site_IDs
156 | if f"Binding Site {bs_id}" in pose_rmsd_data.columns])
157 | RMSD_averages = np.array(
158 | [pose_rmsd_data[f"Binding Site {bs_id}"].dropna(inplace=False).mean()
159 | for bs_id in binding_site_IDs_RMSD])
160 | fig, ax = plt.subplots(1, 1, figsize=(len(li.node_list)*0.5, 2.6))
161 | ax.scatter(np.arange(len(RMSD_averages)), np.sort(RMSD_averages)[::-1], s=50, color="red")
162 | ax.set_xticks(np.arange(len(RMSD_averages)))
163 | sorted_index = np.argsort(RMSD_averages)[::-1]
164 | ax.set_xticklabels(binding_site_IDs_RMSD[sorted_index])
165 | ax.set_xlabel("Binding Site ID", fontsize=12)
166 | ax.set_ylabel("RMSD (nm)", fontsize=12)
167 | for label in ax.xaxis.get_ticklabels()+ax.yaxis.get_ticklabels():
168 | plt.setp(label, fontsize=12, weight="normal")
169 | plt.tight_layout()
170 | plt.savefig("{}/{}_RMSD_v_binding_site.{}".format(li.save_dir, li.lipid, fig_format), dpi=200)
171 | plt.close()
172 |
173 | # plot No. 3
174 | surface_area_averages = np.array(
175 | [surface_area_data["Binding Site {}".format(bs_id)].dropna(inplace=False).mean()
176 | for bs_id in binding_site_IDs])
177 | fig, ax = plt.subplots(1, 1, figsize=(len(li.node_list)*0.5, 2.6))
178 | ax.scatter(np.arange(len(surface_area_averages)), np.sort(surface_area_averages)[::-1], s=50, color="red")
179 | ax.set_xticks(np.arange(len(surface_area_averages)))
180 | sorted_index = np.argsort(surface_area_averages)[::-1]
181 | ax.set_xticklabels(binding_site_IDs[sorted_index])
182 | ax.set_xlabel("Binding Site ID", fontsize=12)
183 | ax.set_ylabel(r"Surface Area (nm$^2$)", fontsize=12)
184 | for label in ax.xaxis.get_ticklabels()+ax.yaxis.get_ticklabels():
185 | plt.setp(label, fontsize=12, weight="normal")
186 | plt.tight_layout()
187 | plt.savefig("{}/{}_surface_area_v_binding_site.{}".format(li.save_dir, li.lipid, fig_format), dpi=200)
188 | plt.close()
189 |
190 | # plot No. 4
191 | res_time_BS = np.array(
192 | [li.dataset[li.dataset["Binding Site ID"]==bs_id]["Binding Site Residence Time"].unique()[0]
193 | for bs_id in binding_site_IDs_RMSD])
194 | fig, ax = plt.subplots(1, 1, figsize=(len(li.node_list)*0.5, 2.6))
195 | ax.scatter(res_time_BS, RMSD_averages, s=50, color="red")
196 | ax.set_xlabel(ylabel_dict["Residence Time"], fontsize=12)
197 | ax.set_ylabel("RMSD (nm)", fontsize=12)
198 | for label in ax.xaxis.get_ticklabels()+ax.yaxis.get_ticklabels():
199 | plt.setp(label, fontsize=12, weight="normal")
200 | plt.tight_layout()
201 | plt.savefig("{}/{}_Residence_Time_v_RMSD.{}".format(li.save_dir, li.lipid, fig_format), dpi=200)
202 | plt.close()
203 |
204 | # plot No. 5
205 | res_time_BS = np.array(
206 | [li.dataset[li.dataset["Binding Site ID"]==bs_id]["Binding Site Residence Time"].unique()[0]
207 | for bs_id in binding_site_IDs])
208 | fig, ax = plt.subplots(1, 1, figsize=(len(li.node_list)*0.5, 2.6))
209 | ax.scatter(res_time_BS, surface_area_averages, s=50, color="red")
210 | ax.set_xlabel(ylabel_dict["Residence Time"], fontsize=12)
211 | ax.set_ylabel(r"Surface Area (nm$^2$)", fontsize=12)
212 | for label in ax.xaxis.get_ticklabels()+ax.yaxis.get_ticklabels():
213 | plt.setp(label, fontsize=12, weight="normal")
214 | plt.tight_layout()
215 | plt.savefig("{}/{}_Residence_Time_v_surface_area.{}".format(li.save_dir, li.lipid, fig_format), dpi=200)
216 | plt.close()
217 |
218 |
219 |
--------------------------------------------------------------------------------
/docs/environment.yml:
--------------------------------------------------------------------------------
1 | channels:
2 | - conda-forge
3 | dependencies:
4 | - python>=3
5 | - pandoc
6 | - ipykernel
7 | - pip
8 | - sphinx=4.0.2
9 | - pip:
10 | - nbsphinx
11 | - pylipid
12 | - furo
13 |
--------------------------------------------------------------------------------
/docs/gallery.rst:
--------------------------------------------------------------------------------
1 |
2 |
3 | =======
4 | Gallery
5 | =======
6 |
7 |
8 | **Most representative bound poses**
9 |
10 | .. image:: static/top_ranked_poses.png
11 | :align: center
12 |
13 |
14 | **Interaction hotspots**
15 |
16 | .. image:: tutorials/statics/surface_hotspots.png
17 | :align: center
18 |
19 |
20 | **Binding Site analysis**
21 |
22 | .. image:: tutorials/statics/binding_site_comparisons_cutoffs.png
23 | :align: center
24 |
25 |
26 | **Koff plot**
27 |
28 | .. image:: tutorials/statics/koff_figure.png
29 | :align: center
30 |
31 |
32 | **Interaction plot**
33 |
34 | .. image:: tutorials/statics/Residence_Time.png
35 | :align: center
36 |
37 |
38 | **Logo plot**
39 |
40 | .. image:: tutorials/statics/Residence_Time_logo.png
41 | :align: center
42 |
43 |
44 | **Correlation coefficient**
45 |
46 | .. image:: static/CorrCoef.png
47 | :align: center
48 |
49 |
50 | **Bound pose RMSD**
51 |
52 | .. image:: tutorials/statics/Pose_RMSD_violinplot.png
53 | :align: center
54 |
55 |
56 | **Binding site surface**
57 |
58 | .. image:: tutorials/statics/Surface_Area_violinplot.png
59 | :align: center
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 |
2 | ==========================================================
3 | PyLipID - A Python Package For Lipid Interactions Analysis
4 | ==========================================================
5 |
6 | PyLipID is a python package for analyzing lipid interactions with membrane proteins from
7 | Molecular Dynamics Simulations. PyLipID has the following main features, please check out
8 | the tutorials for examples and the documentations for the API functionalities:
9 |
10 | * Detection of binding sites via calculating community structures in the interactions networks.
11 | * Calculation of lipid koff and residence time for interaction with binding sites and residues.
12 | * Analysis of lipid interactions with binding sites and residues using a couple of metrics.
13 | * Generation of representative bound poses for binding sites.
14 | * Analysis of bound poses for binding sites via automated clustering scheme.
15 | * Adoption of a dual-cutoff scheme to overcome the 'rattling in cage' effect of coarse-grained simulations.
16 | * Generation of manuscript-ready figures for analysis.
17 |
18 |
19 | Citation |DOI for Citing PyLipID|
20 | ================================
21 |
22 | If you use PyLipID in scientific research, please cite the following paper: ::
23 |
24 | @article{song_pylipid_2021,
25 | author = {Song, Wanling. and Corey, Robin A. and Ansell, T. Bertie. and
26 | Cassidy, C. Keith. and Horrell, Michael R. and Duncan, Anna L.
27 | and Stansfeld, Phillip J. and Sansom, Mark S.P.},
28 | title = {PyLipID: A Python package for analysis of protein-lipid interactions from MD simulations},
29 | journal = {BioRxiv},
30 | year = {2021},
31 | url = {https://doi.org/10.1101/2021.07.14.452312},
32 | doi = {10.1101/2021.07.14.452312},
33 | urldate = {2021-07-14},
34 | month = jul,
35 | }
36 |
37 | .. |DOI for Citing PyLipID| image:: https://img.shields.io/badge/DOI-10.1101/2021.07.14.452312-blue
38 | :target: https://doi.org/10.1101/2021.07.14.452312
39 |
40 |
41 |
42 | Installation
43 | ============
44 |
45 | PyLipID can be installed with `pip `_
46 |
47 | .. code-block:: bash
48 |
49 | $ pip install pylipid
50 |
51 | Alternatively, you can grab the latest source code from `GitHub `_:
52 |
53 | .. code-block:: bash
54 |
55 | $ git clone git://github.com/wlsong/PyLipID.git
56 | $ python setup.py install
57 |
58 |
59 | Usage
60 | =====
61 |
62 | The :doc:`tutorial` is the place to go to learn how to use the PyLipID. The :doc:`api/index`
63 | provides API-level documentation.
64 |
65 | A no-brainer demo script is available at :doc:`demo` to run PyLipID with all the analysis.
66 |
67 |
68 | License
69 | =======
70 |
71 | PyLipID is made available under the MIT License. For more details,
72 | see `LICENSE.txt `_.
73 |
74 |
75 | Table of Contents
76 | =================
77 |
78 | .. toctree::
79 | :maxdepth: 2
80 |
81 | INSTALL
82 | api/index
83 | tutorial
84 | gallery
85 | demo
86 |
87 |
88 |
89 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/static/CorrCoef.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/static/CorrCoef.png
--------------------------------------------------------------------------------
/docs/static/pylipid_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/static/pylipid_logo.png
--------------------------------------------------------------------------------
/docs/static/pylipid_logo_smallsize.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/static/pylipid_logo_smallsize.png
--------------------------------------------------------------------------------
/docs/static/top_ranked_poses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/static/top_ranked_poses.png
--------------------------------------------------------------------------------
/docs/tutorial.rst:
--------------------------------------------------------------------------------
1 |
2 | =========
3 | Tutorials
4 | =========
5 |
6 | We provide a collection of Jupyter notebook tutorials that provide either a complete walk-through of the application
7 | or explanations of some important functions.
8 |
9 | For those who are impatient, we have provided a no-brainer demo script at :doc:`demo`, which runs PyLipID with all
10 | the analysis.
11 |
12 | If you find mistakes or have suggestions for the tutorials, please file issues or pull requests on Github to help us
13 | improve.
14 |
15 | .. toctree::
16 | :maxdepth: 1
17 |
18 | tutorials/0-application-walk-through
19 | tutorials/1-Distance_cutoff_determination.ipynb
20 |
--------------------------------------------------------------------------------
/docs/tutorials/statics/A2aR_system_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/A2aR_system_overview.png
--------------------------------------------------------------------------------
/docs/tutorials/statics/BSid3_top1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/BSid3_top1.png
--------------------------------------------------------------------------------
/docs/tutorials/statics/Duration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/Duration.png
--------------------------------------------------------------------------------
/docs/tutorials/statics/Lipid_Count.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/Lipid_Count.png
--------------------------------------------------------------------------------
/docs/tutorials/statics/Occupancy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/Occupancy.png
--------------------------------------------------------------------------------
/docs/tutorials/statics/Pose_RMSD_violinplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/Pose_RMSD_violinplot.png
--------------------------------------------------------------------------------
/docs/tutorials/statics/Residence_Time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/Residence_Time.png
--------------------------------------------------------------------------------
/docs/tutorials/statics/Residence_Time_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/Residence_Time_logo.png
--------------------------------------------------------------------------------
/docs/tutorials/statics/Surface_Area_timeseries.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/Surface_Area_timeseries.png
--------------------------------------------------------------------------------
/docs/tutorials/statics/Surface_Area_violinplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/Surface_Area_violinplot.png
--------------------------------------------------------------------------------
/docs/tutorials/statics/binding_site_comparisons_cutoffs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/binding_site_comparisons_cutoffs.png
--------------------------------------------------------------------------------
/docs/tutorials/statics/directory_overview.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/directory_overview.gif
--------------------------------------------------------------------------------
/docs/tutorials/statics/dist_10VAL_CHOL6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/dist_10VAL_CHOL6.png
--------------------------------------------------------------------------------
/docs/tutorials/statics/dist_120LYS_POP211.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/dist_120LYS_POP211.png
--------------------------------------------------------------------------------
/docs/tutorials/statics/dist_197ARG_POP218.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/dist_197ARG_POP218.png
--------------------------------------------------------------------------------
/docs/tutorials/statics/dist_203ARG_POP229.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/dist_203ARG_POP229.png
--------------------------------------------------------------------------------
/docs/tutorials/statics/dist_27TRP_CHOL166.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/dist_27TRP_CHOL166.png
--------------------------------------------------------------------------------
/docs/tutorials/statics/dist_9THR_CHOL224.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/dist_9THR_CHOL224.png
--------------------------------------------------------------------------------
/docs/tutorials/statics/koff_figure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/koff_figure.png
--------------------------------------------------------------------------------
/docs/tutorials/statics/surface_hotspots.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/statics/surface_hotspots.png
--------------------------------------------------------------------------------
/docs/tutorials/traj_data/A2a/run1/protein_lipids.xtc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/traj_data/A2a/run1/protein_lipids.xtc
--------------------------------------------------------------------------------
/docs/tutorials/traj_data/A2a/run2/protein_lipids.xtc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/traj_data/A2a/run2/protein_lipids.xtc
--------------------------------------------------------------------------------
/docs/tutorials/traj_data/GABAA/run1/protein_lipids.xtc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/traj_data/GABAA/run1/protein_lipids.xtc
--------------------------------------------------------------------------------
/docs/tutorials/traj_data/GABAA/run2/protein_lipids.xtc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/docs/tutorials/traj_data/GABAA/run2/protein_lipids.xtc
--------------------------------------------------------------------------------
/legacy/README.md:
--------------------------------------------------------------------------------
1 |
2 | ## Introduction
3 | **pylipid.py**: is a toolkit to calculate lipid interactions with membrane proteins.
4 |
5 | It calculates:
6 | - lipid interactions with the proteins in terms of their duration, residence time, occupancy, num. of lipids surrounding given residues and koff;
7 | - lipid binding sites via interaction networks.
8 | - various binding kinetics, e.g. lipid residence time, koff, etc, for each binding site.
9 | - lipid binding site surface area via Shrake-Rupley algorithm (Shrake, A; Rupley, JA. (1973) J Mol Biol 79 (2): 351–71)
10 | - probablity density functions of bound lipid and generates representative binding poses for each binding site based on the calcuated PDF.
11 |
12 | It plots:
13 | - lipid interactions (in terms of duration, residence time, occupancy, and num. of surroudning lipids) with the protein as a function of protein residue indeces.
14 | - the calculated lipid koff for each protein residue.
15 | - the calculated lipid koff for each binding site.
16 | - surface area for each binding site.
17 |
18 | It generates:
19 | - protein coordinates in pdb formate in which such data as residuence time, koff, duration and occupancy are recorded in the b factor column.
20 | - representative binding poses for each binding site based on scoring functions that use probability density functions of the bound lipids.
21 |
22 | It can also map in a PyMol session the calculated binding sites to a pdb structure users provide through -pdb. When the flag -pdb is provided, pylipid.py will write out a python script 'show_binding_site_info.py' that allows users to open up a PyMol session, in which residues that belong to the same binding site are shown in spheres with sizes corresponding to their calculated residence time.
23 |
24 | For definition of residence time, please refer to:
25 | - García, Angel E.Stiller, Lewis. Computation of the mean residence time of water in the hydration shells of biomolecules. 1993. Journal of Computational Chemistry;
26 | - Duncan AL, Corey RA, Sansom MSP. Defining how multiple lipid species interact with inward rectifier potassium (Kir2) channels. 2020. Proc Natl Acad Sci U S A.
27 |
28 | To alleviate the 'cage-rattling' phenomenon of the beads dynamics in coarse-grained simulations, pylipid uses a dual-cutoff scheme. This scheme defines the start of a continuous interaction of a lipid molecule with a given object when any atom/bead of the lipid molecule moves within the smaller cutoff; and the end of such a contunuous interaction when all of the atoms/beas of the lipid molecule move out of the larger cutoff. Such a dual-cutoff scheme can also be applied to atomistic simulations. The recommended dual-cutoff for coarse-grained simulations is **0.55 1.0** nm, and that for atomistic simulations is **0.35 0.55** nm. But it's reccommended for users to do some tests on their systems. Users can use the same value for both cutoffs to achieve a single cutoff scheme.
29 |
30 |
31 | ## Installation:
32 | pylipid.py requires following packages:
33 | - python
34 | - mdtraj
35 | - numpy
36 | - pandas
37 | - matplotlib
38 | - seaborn
39 | - networkx
40 | - scipy
41 | - python-louvain
42 | - logomaker
43 | - statsmodels
44 | - scikit-learn
45 | - tqdm
46 |
47 | To create a compatible python environment but not to mess up with your global python settings, we recommend building an independent env called PyLipID using [conda](https://www.anaconda.com/distribution/).
48 | To create this PyLipID environment using the provided env.yml, assuming you have installed [conda](https://www.anaconda.com/distribution/) in your system:
49 | ```
50 | conda env create -f env_{OS_PLATFORM}.yml
51 | ```
52 | Now your python env PyLipID is all set. Whenever you want to use the script, activate PyLipID first by
53 | ```
54 | conda activate PyLipID
55 | ```
56 | To get back to your default global python env:
57 | ```
58 | conda deactivate
59 | ```
60 | Remove this env from your system by:
61 | ```
62 | conda env remove --name PyLipID
63 | ```
64 |
65 |
66 | ## Examples:
67 | Information regarding **pylipid.py** flags can be checked via 'python pylipid.py -h'.
68 |
69 | A standard check on lipid interactions using **pylipid.py**, which suits most of the cases:
70 | ```
71 | conda activate PyLipID
72 | python pylipid.py -f ./run_1/md.xtc ./run_2/md.xtc -c ./run_1/protein_lipids.gro ./run_2/protein_lipids.gro
73 | -cutoffs 0.55 1.0 -lipids POPC CHOL POP2 -nprot 1 -save_dataset
74 | ```
75 |
76 | Due to the smoothened energy potentials, coarse-grained force fields often render the tails of phosphalipids too flexible, which could lead to poor characterisation of binding sites. When behaviours of the tails are not the main focus, it's better to focus on the binding of headgroups. Users can use the flag -lipid_atoms to specify lipid atoms/beads for calculation. An example of calculating the binding of PIP2 in MARTINI 2 (named as POP2 in this force field) using only the headgroup beads:
77 | ```
78 | python pylipid.py -f ./run_1/md.xtc ./run_2/md.xtc -c ./run_1/protein_lipids.gro ./run_2/protein_lipids.gro
79 | -cutoffs 0.55 1.0 -lipids POP2 -lipid_atoms C1 C2 C3 C4 PO4 P1 P2 -nprot 1 -save_dataset
80 | ```
81 |
82 | **pylipid.py** uses graph theory and community analysis to calcualte lipid binding sites. The Binding Site information are wrapped up in the BindingSites_info_{LIPID}.txt in the "Binding_Sites_{LIPID}" directory. For each binding site, pylipid.py can write out top-rated binding poses sampled in the simulations. pylipid.py rates the bound lipid poses of each binding site via a scoring function that is based on the probability density of bound lipids at that binding site. The written coordinate of the lipid poses include that of the bound lipid and the receptor that the pose bound to. By default, pylipid.py writes out the top 5 ranking lipid poses for each binding site in the 'gro' format (the Gromacs coordinate format). Users can use -save_pose_format to change the coordinate format to any that is supported by [mdtraj](http://mdtraj.org). For phospholipids, it's recommended to give higher weights to lipid headgroups in the scoring functions. Users can use -score_weights to change the weights. The flag -n_binding_poses specify how many lipid poses to be generated for each binding site. The following example shows how to generate 10 top ranking poses for each binding site, to save the binding poses in the 'gro' format and to give higher weight to the headgroup beads of PIP2 in the MARTINI force field:
83 | ```
84 | python pylipid.py f ./run_1/md.xtc ./run_2/md.xtc -c ./run_1/protein_lipids.gro ./run_2/protein_lipids.gro
85 | -cutoffs 0.55 1.0 -lipids POP2 -nprot 1 -save_pose_format gro -score_weights PO4:10 P1:10:P2:10 C1:10 C2:10 C3:10 -n_binding_poses 10
86 | ```
87 | The calculation of lipid probability density uses the function of [KDEMultivariate](https://www.statsmodels.org/stable/generated/statsmodels.nonparametric.kernel_density.KDEMultivariate.html) in statsmodels. This calculation can take some time (up to one hour) for atomistic simulations or long coarse-grained simulations where the collected binding data are large (either due to higher granularity or a larger number of binding events). To speed up the calculation, users can decrease the volume of data by using the flag -stride to stride throught trajectories, i.e. analyse only every X-th of the trajectory frame. In addition, if getting the bound lipid coordinates is not the focus, users can use -n_binding_poses 0 to switch off the binding pose generation process.
88 |
89 | The script also allows users to view the calculated binding sites in PyMol via generating a python script, a process that is evoked by providing a protein atomistic structure (preferably in pdb format) to the flag -pdb. For the coarse-grained simulations, either provide the atomistic protein structure before coarse-graining or use an atomistic structure that is converted back from coarse-grained models. Users need to make sure that the provided protein coordinates are consistent with the configuration in the simulations in terms of the residue indices and ordering of the protein. An example of using the flag -pdb:
90 | ```
91 | python pylipid.py -f ./run_1/md.xtc ./run_2/md.xtc -c ./run_1/protein_lipids.gro ./run_2/protein_lipids.gro
92 | -cutoffs 0.55 1.0 -lipids POPC CHOL POP2 -nprot 1 -save_dataset -pdb XXXX.pdb
93 | ```
94 | Replace 'XXXX.pdb' with the pdb file of your chose. Running the generated python script by the comment 'python show_binding_site_info.py' will open a PyMol session displaying binding site information.
95 |
96 | **pylipid.py** allows user to specify a couple of regions for calculation via the flag -resi_list. Supported syntax include: 1/ use "-" to indicate a range of the protein residue index (both ends included); or 2/ specify individual residue index seperated by space:
97 | ```
98 | python pylipid.py f ./run_1/md.xtc ./run_2/md.xtc -c ./run_1/protein_lipids.gro ./run_2/protein_lipids.gro
99 | -cutoffs 0.55 1.0 -lipids POPC CHOL POP2 -nprot 1 -resi_list 5 7 8 10-30 50-70 100-130 -save_dataset
100 | ```
101 |
102 | **pylipid.py** calculates the surface area of each binding site. By default, the script uses atom radii defined by mdtraj (https://github.com/mdtraj/mdtraj/blob/master/mdtraj/geometry/sasa.py#L56) for calculation. The script also defines the radii of MARTINI coarse-grained beads BB as 0.26 nm and SC1/2/3 as 0.23 nm. To change or define radii of atoms/beads, use -radii and specify radius in unit of nm. For example, to change the radius of MARINI coarse-grained beads BB to 0.28 nm and SC1 to 0.22 nm:
103 | ```
104 | python pylipid.py f ./run_1/md.xtc ./run_2/md.xtc -c ./run_1/protein_lipids.gro ./run_2/protein_lipids.gro
105 | -cutoffs 0.55 1.0 -lipids POPC CHOL POP2 -nprot 1 -radii BB:0.28 SC1:0.22
106 | ```
107 |
108 |
109 | ## Developers:
110 | - Wanling Song
111 | - Anna Duncan
112 | - Robin Corey
113 | - Bertie Ansell
114 |
115 |
116 | ## Thanks for reading to the end, much respect!
117 | Writing scripts is about fixing one bug after another.
118 |
119 | Be brave, be real and keep going, homie!
120 | 
121 |
122 |
--------------------------------------------------------------------------------
/legacy/env_linux.yml:
--------------------------------------------------------------------------------
1 | name: PyLipID
2 | channels:
3 | - conda-forge
4 | - defaults
5 | - tpeulen
6 | - bioconda
7 | - rangsiman
8 | - anaconda
9 | dependencies:
10 | - python
11 | - mdtraj
12 | - matplotlib
13 | - seaborn
14 | - networkx
15 | - python-louvain
16 | - pymol-open-source
17 | - pillow
18 | - logomaker
19 | - statsmodels
20 | - scikit-learn
21 | - tqdm
22 |
--------------------------------------------------------------------------------
/legacy/env_macos.yml:
--------------------------------------------------------------------------------
1 | name: PyLipID
2 | channels:
3 | - conda-forge
4 | - defaults
5 | - schrodinger
6 | - bioconda
7 | - rangsiman
8 | - anaconda
9 | dependencies:
10 | - python
11 | - mdtraj
12 | - matplotlib
13 | - seaborn
14 | - networkx
15 | - python-louvain
16 | - pymol
17 | - pillow
18 | - logomaker
19 | - statsmodels
20 | - scikit-learn
21 | - tqdm
22 |
--------------------------------------------------------------------------------
/pylipid/__init__.py:
--------------------------------------------------------------------------------
1 | ##############################################################################
2 | # PyLipID: A python module for analysing protein-lipid interactions
3 | #
4 | # Author: Wanling Song
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | ##############################################################################
16 |
17 | from ._version import __version__
18 |
--------------------------------------------------------------------------------
/pylipid/_version.py:
--------------------------------------------------------------------------------
1 | ##############################################################################
2 | # PyLipID: A python module for analysing protein-lipid interactions
3 | #
4 | # Author: Wanling Song
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | ##############################################################################
16 |
17 | __version__ = '1.5.14'
--------------------------------------------------------------------------------
/pylipid/api/__init__.py:
--------------------------------------------------------------------------------
1 | ##############################################################################
2 | # PyLipID: A python module for analysing protein-lipid interactions
3 | #
4 | # Author: Wanling Song
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | ##############################################################################
16 |
17 | r"""
18 | PyLipID class
19 | =============
20 | **api** provides the outer layer functions to calculate lipid interactions
21 | and binding sites.
22 |
23 | .. currentmodule:: pylipid.api
24 |
25 | .. autosummary::
26 | :toctree: generated/
27 |
28 | LipidInteraction
29 |
30 | """
31 |
32 | from .api import LipidInteraction
--------------------------------------------------------------------------------
/pylipid/func/__init__.py:
--------------------------------------------------------------------------------
1 | ##############################################################################
2 | # PyLipID: A python module for analysing protein-lipid interactions
3 | #
4 | # Author: Wanling Song
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | ##############################################################################
16 |
17 | r"""
18 | func module
19 | ==============
20 | The ``func`` module provides functions for functions that do the heavy-lifting:
21 |
22 | .. currentmodule:: pylipid.func
23 |
24 | .. autosummary::
25 | :toctree: generated/
26 |
27 | cal_koff
28 | cal_survival_func
29 | calculate_koff_wrapper
30 | Duration
31 | cal_contact_residues
32 | cal_occupancy
33 | cal_lipidcount
34 | get_node_list
35 | collect_bound_poses
36 | vectorize_poses
37 | calculate_scores
38 | write_bound_poses
39 | calculate_surface_area_wrapper
40 | analyze_pose_wrapper
41 | cluster_DBSCAN
42 | cluster_KMeans
43 |
44 | """
45 |
46 | from .kinetics import cal_koff
47 | from .kinetics import cal_survival_func
48 | from .interactions import Duration
49 | from .interactions import cal_contact_residues, cal_occupancy, cal_lipidcount
50 | from .binding_site import get_node_list
51 | from .binding_site import collect_bound_poses, vectorize_poses, calculate_scores, write_bound_poses
52 | from .clusterer import cluster_DBSCAN, cluster_KMeans
53 | from .binding_site import analyze_pose_wrapper
54 | from .binding_site import calculate_surface_area_wrapper
55 | from .kinetics import calculate_koff_wrapper
56 |
57 |
--------------------------------------------------------------------------------
/pylipid/func/clusterer.py:
--------------------------------------------------------------------------------
1 | ##############################################################################
2 | # PyLipID: A python module for analysing protein-lipid interactions
3 | #
4 | # Author: Wanling Song
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | ##############################################################################
16 |
17 | """This module contains functions for clustering the bound poses. """
18 |
19 | import numpy as np
20 | from sklearn.neighbors import NearestNeighbors
21 | from sklearn.cluster import DBSCAN
22 | from sklearn.cluster import KMeans
23 | from kneebow.rotor import Rotor
24 |
25 |
26 | __all__ = ["cluster_DBSCAN", "cluster_KMeans"]
27 |
28 |
29 | def cluster_DBSCAN(data, eps=None, min_samples=None, metric="euclidean"):
30 | r"""Cluster data using DBSCAN.
31 |
32 | This function clusters the samples using a density-based cluster
33 | `DBSCAN `_ provided by scikit.
34 | DBSCAN finds clusters of core samples of high density. A sample point is a core sample if at least `min_samples`
35 | points are within distance :math:`\varepsilon` of it. A cluster is defined as a set of sample points that are
36 | mutually density-connected and density-reachable, i.e. there is a path
37 | :math:`\left\langle p_{1}, p_{2}, \ldots, p_{n}\right\rangle` where each :math:`p_{i+1}` is within distance
38 | :math:`\varepsilon` of :math:`p_{i}` for any two p in the two. The values of `min_samples` and :math:`\varepsilon`
39 | determine the performance of this cluster.
40 |
41 | If None, `min_samples` takes the value of 2 * n_dims. If :math:`\varepsilon` is None, it is set as the value at the
42 | knee of the k-distance plot.
43 |
44 | Parameters
45 | ----------
46 | data : numpy.ndarray, shape=(n_samples, n_dims)
47 | Sample data to find clusters.
48 |
49 | eps : None or scalar, default=None
50 | The maximum distance between two samples for one to be considered as in the neighborhood of the other. This is
51 | not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to
52 | choose appropriately for your data set and distance function. If None, it is set as the value at the
53 | knee of the k-distance plot.
54 |
55 | min_samples : None or scalar, default=None
56 | The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. This
57 | includes the point itself. If None, it takes the value of 2 * n_dims
58 |
59 | metric : string or callable, default=’euclidean’
60 | The metric to use when calculating distance between instances in a feature array. If metric
61 | is a string or callable, it must be one of the options allowed by `sklearn.metrics.pairwise_distances`
62 | for its metric parameter.
63 |
64 | Returns
65 | -------
66 | labels : array_like, shape=(n_samples,)
67 | Cluster labels for each data point.
68 |
69 | core_sample_indices : array_like, shape=(n_clusters,)
70 | Indices of core samples.
71 |
72 | """
73 | if len(data) <= len(data[0]):
74 | return np.array([0 for dummy in data]), np.arange(len(data))[np.newaxis, :]
75 | if 2*len(data[0]) > len(data):
76 | min_samples = np.min([len(data[0]), 4])
77 | elif len(data) < 1000:
78 | min_samples = np.min([2 * len(data[0]), len(data)])
79 | elif len(data) >= 1000:
80 | min_samples = np.min([5 * len(data[0]), len(data)])
81 | if eps is None:
82 | nearest_neighbors = NearestNeighbors(n_neighbors=min_samples)
83 | nearest_neighbors.fit(data)
84 | distances, indices = nearest_neighbors.kneighbors(data)
85 | distances = np.sort(distances, axis=0)[:, 1]
86 | data_vstacked = np.vstack([np.arange(len(distances)), distances]).T
87 | rotor = Rotor()
88 | rotor.fit_rotate(data_vstacked)
89 | elbow_index = rotor.get_elbow_index()
90 | eps = distances[elbow_index]
91 | dbscan = DBSCAN(eps=eps, min_samples=min_samples, metric=metric)
92 | dbscan.fit(data)
93 | core_sample_indices = [[] for label in np.unique(dbscan.labels_) if label != -1]
94 | for core_sample_index in dbscan.core_sample_indices_:
95 | core_sample_indices[dbscan.labels_[core_sample_index]].append(core_sample_index)
96 | return dbscan.labels_, core_sample_indices
97 |
98 |
99 | def cluster_KMeans(data, n_clusters):
100 | r"""Cluster data using KMeans.
101 |
102 | This function clusters the samples
103 | using `KMeans `_
104 | provided by scikit. The KMeans cluster separates the samples into `n` clusters of equal variances, via minimizing
105 | the `inertia`, which is defined as:
106 |
107 | .. math::
108 | \sum_{i=0}^{n} \min _{u_{i} \in C}\left(\left\|x_{i}-u_{i}\right\|^{2}\right)
109 |
110 | where :math:`u_{i}` is the `centroid` of cluster i. KMeans scales well with large dataset but performs poorly
111 | with clusters of varying sizes and density.
112 |
113 | Parameters
114 | ----------
115 | data : numpy.ndarray, shape=(n_samples, n_dims)
116 | Sample data to find clusters.
117 |
118 | n_clusters : int
119 | The number of clusters to form as well as the number of centroids to generate.
120 |
121 | Returns
122 | -----------
123 | labels : array_like, shape=(n_samples)
124 | Cluster labels for each data point.
125 |
126 | """
127 | if len(data) < n_clusters:
128 | return cluster_DBSCAN(data, eps=None, min_samples=None, metric="euclidean")
129 | model = KMeans(n_clusters=n_clusters).fit(data)
130 | labels = model.predict(data)
131 | return labels
--------------------------------------------------------------------------------
/pylipid/func/interactions.py:
--------------------------------------------------------------------------------
1 | ##############################################################################
2 | # PyLipID: A python module for analysing protein-lipid interactions
3 | #
4 | # Author: Wanling Song
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | ##############################################################################
16 |
17 | """This module contains the class Durations for using dual-cutoff scheme.
18 | """
19 | import numpy as np
20 |
21 | __all__ = ["cal_contact_residues", "Duration", "cal_occupancy", "cal_lipidcount"]
22 |
23 |
24 | def cal_contact_residues(dist_matrix, cutoff):
25 | """Obtain contact residues as a function of time.
26 |
27 | This function takes a distance matrix that records the measured distance for molecules at each trajectory frame,
28 | then returns the indices of molecules the distance of which are smaller than the provided cutoff at each frame. It
29 | also returns the molecule indices the frame indices in which the molecule is within the cutoff.
30 |
31 | Parameters
32 | ----------
33 | dist_matrix : list or numpy.ndarray, shape=(n_residues, n_frames)
34 | The measured distance for molecules at each trajectory frame.
35 |
36 | cutoff : scalar
37 | The distance cutoff to define a contact. A distance to the target
38 | equal or lower to the ``cutoff`` is considered as in contact.
39 |
40 | Returns
41 | -------
42 | contact_list : list
43 | A list that records the indices of molecules that are within the given cutoff in each frame
44 |
45 | frame_id_set : list
46 | A list of frame indices for contacting molecules.
47 |
48 | residue_id_set : lsit
49 | A list of contacting molecules indices.
50 |
51 | Examples
52 | --------
53 | >>> dr0 = [0.9, 0.95, 1.2, 1.1, 1.0, 0.9] # the distances of R0 to the target as a function of time
54 | >>> dr1 = [0.95, 0.9, 0.95, 1.1, 1.2, 1.1] # the distances of R1
55 | >>> dr2 = [0.90, 0.90, 0.85, 0.95, 1.0, 1.1] # the distances of R2
56 | >>> dist_matrix = [dr0, dr1, dr2]
57 | >>> contact_list, frame_id_set, residue_id_set = cal_contact_residues(dist_matrix, 1.0)
58 | >>> print(contact_list)
59 | [[0, 1, 2], [0, 1, 2], [1, 2], [2], [0, 2], [0]]
60 | >>> print(frame_id_set)
61 | array([0, 1, 4, 5, 0, 1, 2, 0, 1, 2, 3, 4])
62 | >>> print(residue_id_set)
63 | array([0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2])
64 |
65 | """
66 | residue_id_set, frame_id_set = np.where(np.array(dist_matrix) <= cutoff)
67 | contact_list = [[] for dummy in np.arange(len(dist_matrix[0]))]
68 | for frame_id, residue_id in zip(frame_id_set, residue_id_set):
69 | contact_list[frame_id].append(residue_id)
70 | return contact_list, frame_id_set, residue_id_set
71 |
72 |
73 | class Duration:
74 | def __init__(self, contact_low, contact_high, dt):
75 | """Dual cutoff scheme for calculating the interaction durations.
76 |
77 | In the dual cutoff scheme, a continuous contact starts when a molecule moves closer than the lower distance cutoff
78 | and ends when the molecule moves out of the upper cutoff. The duration between these two time points is the
79 | duration of the contact.
80 |
81 | Here, the ``contact_low`` is the lipid index for the lower cutoff and ``contact_high`` is the lipid index
82 | for the upper cutoff. For calculation of contact durations, a lipid molecule that appears in the ``contact_low``
83 | is searched in the subsequent frames of the ``contact_high`` and the search then stops if this
84 | molecule disappears from the ``contact_high``. This lipid molecule is labeled as 'checked', and the duration of
85 | this contact is calculated from the number of frames in which this lipid molecule appears in the lipid indices.
86 | This calculation iterates until all lipid molecules in the lower lipid index are labeled as 'checked'.
87 |
88 | Parameters
89 | ----------
90 | contact_low : list
91 | A list that records the indices of lipid molecule within the lower distance cutoff at each trajectory frame.
92 |
93 | contact_high : list
94 | A list that records the indices of lipid molecule within the upper distance cutoff at each trajectory frame.
95 |
96 | dt : scalar
97 | The timestep between two adjacent trajectory frames.
98 |
99 | """
100 | self.contact_low = contact_low
101 | self.contact_high = contact_high
102 | self.dt = dt
103 | self.pointer = [np.zeros_like(self.contact_high[idx], dtype=np.int)
104 | for idx in range(len(self.contact_high))]
105 | return
106 |
107 | def cal_durations(self):
108 | """Calculate interaction durations using the dual-cutoff scheme.
109 |
110 | Calculate the durations of the appearances that start from the point when a molecule appears
111 | in ``contact_low`` and ends when it disappears from ``contact_high``.
112 |
113 | Returns
114 | -------
115 | durations : list
116 | A list of durations of the contacts defined by ``contact_low`` and ``contact_high``.
117 |
118 | """
119 |
120 | durations = []
121 | for i in range(len(self.contact_low)):
122 | for j in range(len(self.contact_low[i])):
123 | pos = np.where(self.contact_high[i] == self.contact_low[i][j])[0][0]
124 | if self.pointer[i][pos] == 0:
125 | durations.append(self._get_duration(i, pos))
126 | if len(durations) == 0:
127 | return [0]
128 | else:
129 | durations.sort()
130 | return durations
131 |
132 | def _get_duration(self, i, j):
133 | count = 1
134 | self.pointer[i][j] = 1
135 | lipid_to_search = self.contact_high[i][j]
136 | for k in range(i+1, len(self.contact_high)):
137 | locations = np.where(self.contact_high[k] == lipid_to_search)[0]
138 | if len(locations) == 0:
139 | return count * self.dt
140 | else:
141 | pos = locations[0]
142 | self.pointer[k][pos] = 1
143 | count +=1
144 | return (count - 1) * self.dt
145 |
146 |
147 | def cal_occupancy(contact_list):
148 | """Calculate the percentage of frames in which a contact is formed.
149 |
150 | ``contact_list`` records a list of residue indices of contact lipid molecules at each trajectory frames. This function
151 | calculates the percentage of frames that a lipid contact is formed.
152 |
153 | Parameters
154 | ___________
155 | contact_list : list
156 | A list of residue indices of contact lipid molecules at each trajectory frames.
157 |
158 | Returns
159 | -------
160 | Ocupancy : scalar
161 | The percentage of frames in which a contact is formed
162 |
163 | Examples
164 | --------
165 | >>> contact_list = [[], [130], [130, 145], [145], [], [], [145], [145]] # contacts are formed in 5 out of the 8 frames
166 | >>> occupancy = cal_occupancy(contact_list)
167 | >>> print(occupancy) # percentage
168 | 62.5
169 |
170 | See also
171 | --------
172 | pylipid.api.LipidInteraction.compute_residue_occupancy
173 | Calculate the percentage of frames in which the specified residue formed lipid contacts for residues.
174 | pylipid.api.LipidInteraction.compute_site_occupancy
175 | Calculate the percentage of frames in which the specified lipid contacts are formed for binding sites.
176 |
177 | """
178 | if len(contact_list) == 0:
179 | return 0
180 | else:
181 | contact_counts = [len(item) for item in contact_list]
182 | mask = np.array(contact_counts) > 0
183 | return 100 * np.sum(mask)/len(contact_list)
184 |
185 |
186 | def cal_lipidcount(contact_list):
187 | """Calculate the average number of contacting molecules.
188 |
189 | This function calculates the average number of contacting molecules when any contact is formed.
190 |
191 | Parameters
192 | ___________
193 | contact_list : list
194 | A list of residue indices of contact lipid molecules at each trajectory frames.
195 |
196 | Returns
197 | -------
198 | LipidCount : scalar
199 | The average number of contacts in frames in which any contact is formed.
200 |
201 | Examples
202 | --------
203 | >>> contact_list = [[], [130], [130, 145], [145], [], [], [145], [145]]
204 | >>> lipidcount = cal_lipidcount(contact_list)
205 | >>> print(lipidcount) # (1+2+1+1+1)/5
206 | 1.2
207 |
208 | See also
209 | --------
210 | pylipid.api.LipidInteraction.compute_residue_lipidcount
211 | Calculate the average number of contacting lipids for residues.
212 | pylipid.api.LipidInteraction.compute_site_lipidcount
213 | Calculate the average number of contacting lipids for binding sites.
214 |
215 | """
216 | if len(contact_list) == 0:
217 | return 0
218 | else:
219 | contact_counts = np.array([len(item) for item in contact_list])
220 | mask = contact_counts > 0
221 | if np.sum(mask) == 0:
222 | return 0
223 | else:
224 | contact_counts_nonzero = contact_counts[mask]
225 | return np.nan_to_num(contact_counts_nonzero.mean())
226 |
227 |
--------------------------------------------------------------------------------
/pylipid/func/kinetics.py:
--------------------------------------------------------------------------------
1 | ##############################################################################
2 | # PyLipID: A python module for analysing protein-lipid interactions
3 | #
4 | # Author: Wanling Song
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | ##############################################################################
16 |
17 | """This module contains functions for calculating interaction residence time and koff.
18 | """
19 | import warnings
20 | import numpy as np
21 | from scipy.optimize import curve_fit
22 | from ..plot import plot_koff
23 |
24 |
25 | __all__ = ["cal_koff", "cal_survival_func", "calculate_koff_wrapper"]
26 |
27 |
28 | def cal_koff(durations, t_total, timestep, nbootstrap=10, initial_guess=[1., 1., 1., 1.], cap=True):
29 | r"""Calculate residence time and koff.
30 |
31 | This function calculates the normalized survival time correlation function of the given list of durations, and fits t
32 | he survival function to a bi-exponential curve [1]_.
33 |
34 | The survival time correlation function σ(t) is calculated as follow
35 |
36 | .. math::
37 |
38 | \sigma(t) = \frac{1}{N_{j}} \frac{1}{T-t} \sum_{j=1}^{N_{j}} \sum_{v=0}^{T-t}\tilde{n}_{j}(v, v+t)
39 |
40 | where T is the length of the simulation trajectory, :math:`N_{j}` is the total number of lipid contacts and
41 | :math:`\sum_{v=0}^{T-t} \tilde{n}_{j}(v, v+t)` is a binary function that takes the value 1 if the contact of
42 | lipid j lasts from time ν to time v+t and 0 otherwise. The values of :math:`\sigma(t)` are calculated for every
43 | value of t from 0 to T ns, for each time step of the trajectories, and normalized by dividing by :math:`\sigma(t)`,
44 | so that the survival time-correlation function has value 1 at t = 0.
45 |
46 | The normalized survival function is then fitted to a biexponential to model the long and short decays of
47 | lipid relaxation:
48 |
49 | .. math::
50 | \sigma(t) \sim A e^{-k_{1} t}+B e^{-k_{2} t}\left(k_{1} \leq k_{2}\right)
51 |
52 | This function then takes :math:`k_{1}` as the the dissociation :math:`k_{off}`, and calculates the residence time as
53 | :math:`\tau=1 / k_{o f f}`.
54 |
55 | This function also measures the :math:`r^{2}` of the biexponential fitting to the survival function to show the
56 | quality of the :math:`k_{off}` estimation. In addition, it bootstraps the contact durations and measures the
57 | :math:`k_{off}` of the bootstrapped data, to report how well lipid contacts are sampled from simulations. The
58 | lipid contact sampling, the curve-fitting and the bootstrap results can be conveniently checked via the
59 | :math:`k_{off}` plot. The :math:`r^{2}`, :math:`\sigma(t)` and bootstrapped data are stored in the returned data
60 | ``properties``.
61 |
62 | Parameters
63 | ----------
64 | durations : array_like
65 | A list of interaction durations
66 |
67 | t_total : scalar
68 | The duration, or the longest if using multiple simulations of different durations, of the
69 | simulation trajectories. Should be in the same time unit as durations.
70 |
71 | timestep : scalar
72 | :math:`\Delta t` of the survival function :math:`\sigma`. Often takes the time step of the simulation
73 | trajectories or multiples of the trajectory time step. Should be in the same time unit as durations.
74 |
75 | nbootstrap : int, optional, default=10
76 | Number of bootstrapping. The default is 10.
77 |
78 | initial_quess : list, optional, default=(1., 1., 1., 1.)
79 | The initial guess for fitting of a bi-exponential curve to the survival function. Used by
80 | `scipy.optimize.curve_fit `_.
81 |
82 | cap : bool, optional, default=True
83 | Cap the returned residence time to ``t_total``. This is useful for cases of poor samplings where the curve fitting
84 | may be bad and the calculated residence times may be unrealistically large.
85 |
86 | Returns
87 | ----------
88 | koff : scalar
89 | The calculated koff. In the unit of :math:`{timeunit^{-1}}` in which :math:`{timeunit}` is the same as
90 | what is used in ``durations``.
91 |
92 | res_time : scalar
93 | The calculated residence time. In the same time unit as used by ``durations``.
94 |
95 | properties : dict
96 | A dictionary of all the computed values, including the original and bootstrapped koffs, residence times, ks
97 | of the bi-expo curve :math:`y=A*e^{(-k_1*x)}+B*e^{(-k_2*x)}` and :math:`R^2`.
98 |
99 | See also
100 | -----------
101 | pylipid.plot.plot_koff
102 | Plotting function for interaction durations and the calculated survival function.
103 | pylipid.api.LipidInteraction.compute_residue_koff
104 | Calculate interaction koff and residence time for residues.
105 | pylipid.api.LipidInteraction.compute_site_koff
106 | Calculate interaction koff and residence time for binding sites.
107 |
108 | References
109 | -----------
110 | .. [1] García, Angel E.Stiller, Lewis. Computation of the mean residence time of water in the hydration shells
111 | of biomolecules. 1993. Journal of Computational Chemistry.
112 |
113 | """
114 | # calculate original residence time
115 | delta_t_list = np.arange(0, t_total, timestep)
116 | survival_func = cal_survival_func(durations, np.max(t_total), delta_t_list)
117 | survival_rates = np.array([survival_func[delta_t] for delta_t in delta_t_list])
118 | res_time, _, r_squared, params = _curve_fitting(survival_func, delta_t_list, initial_guess)
119 | if cap and res_time > t_total:
120 | res_time = t_total
121 | n_fitted = _bi_expo(np.array(delta_t_list), *params)
122 | r_squared = 1 - np.sum((np.nan_to_num(n_fitted) - np.nan_to_num(survival_rates)) ** 2) / np.sum(
123 | (survival_rates - np.mean(survival_rates)) ** 2)
124 | ks = [abs(k) for k in params[:2]]
125 | ks.sort() # the smaller k is considered as koff
126 |
127 | # calculate bootstrapped residence time
128 | if nbootstrap > 0:
129 | duration_boot_set = [np.random.choice(durations, size=len(durations)) for dummy in range(nbootstrap)]
130 | ks_boot_set = []
131 | r_squared_boot_set = []
132 | survival_rates_boot_set = []
133 | n_fitted_boot_set = []
134 | for duration_boot in duration_boot_set:
135 | survival_func_boot = cal_survival_func(duration_boot, np.max(t_total), delta_t_list)
136 | survival_rates_boot = np.array([survival_func_boot[delta_t] for delta_t in delta_t_list])
137 | _, _, r_squared_boot, params_boot = _curve_fitting(survival_func_boot, delta_t_list,
138 | initial_guess)
139 | n_fitted_boot = _bi_expo(np.array(delta_t_list), *params_boot)
140 | r_squared_boot = 1 - np.sum((np.nan_to_num(n_fitted_boot) - np.nan_to_num(survival_rates_boot)) ** 2) / np.sum(
141 | (survival_rates_boot - np.mean(survival_rates_boot)) ** 2)
142 | ks_boot = [abs(k) for k in params_boot[:2]]
143 | ks_boot.sort()
144 | ks_boot_set.append(ks_boot)
145 | r_squared_boot_set.append(r_squared_boot)
146 | survival_rates_boot_set.append(survival_rates_boot)
147 | n_fitted_boot_set.append(n_fitted_boot)
148 | else:
149 | ks_boot_set = [0]
150 | r_squared_boot_set = [0]
151 | survival_rates_boot_set = [0]
152 | n_fitted_boot_set = [0]
153 |
154 | properties = {"ks": ks, "res_time": res_time, "delta_t_list": delta_t_list,
155 | "survival_rates": survival_rates, "survival_rates_boot_set": survival_rates_boot_set,
156 | "n_fitted": n_fitted, "n_fitted_boot_set": n_fitted_boot_set,
157 | "ks_boot_set": ks_boot_set,
158 | "r_squared": r_squared, "r_squared_boot_set": r_squared_boot_set}
159 |
160 | return ks[0], res_time, properties
161 |
162 |
163 | def cal_survival_func(durations, t_total, delta_t_list):
164 | r"""Compute the normalised survival function.
165 |
166 | Calculate the normalized survival time correlation function of the given list of durations.
167 | The survival time correlation function σ(t) is calculated as follow
168 |
169 | .. math::
170 |
171 | \sigma(t) = \frac{1}{N_{j}} \frac{1}{T-t} \sum_{j=1}^{N_{j}} \sum_{v=0}^{T-t}\tilde{n}_{j}(v, v+t)
172 |
173 | where T is the length of the simulation trajectory, :math:`N_{j}` is the total number of lipid contacts and
174 | :math:`\sum_{v=0}^{T-t} \tilde{n}_{j}(v, v+t)` is a binary function that takes the value 1 if the contact of
175 | lipid j lasts from time ν to time v+t and 0 otherwise. The values of :math:`\sigma(t)` are calculated for every
176 | value of t from 0 to T ns, for each time step of the trajectories, and normalized by dividing by :math:`\sigma(t)`,
177 | so that the survival time-correlation function has value 1 at t = 0.
178 |
179 | Parameters
180 | -----------
181 | durations : array_like
182 | A list of contact durations.
183 |
184 | t_total : scalar
185 | The duration or length, or the longest if using multiple simulations of different durations/lengths, of the
186 | simulation trajectories. Should be in the same time unit as durations.
187 |
188 | delta_t_list : array_like
189 | The list of :math:`\Delta t` for the survival function :math:`\sigma` to check the interaction survival rate.
190 |
191 | Returns
192 | -----------
193 | survival_func : dict
194 | The survival function :math:`\sigma` stored in a dictionary {delta_t: survival rate}.
195 |
196 | See also
197 | -----------
198 | pylipid.func.cal_koff
199 | Calculate residence time and koff.
200 |
201 | """
202 | num_of_contacts = len(durations)
203 | survival_func = {}
204 | for delta_t in delta_t_list:
205 | if delta_t == 0:
206 | survival_func[delta_t] = 1
207 | survival_func0 = float(sum([res_time - delta_t for res_time in durations if res_time >= delta_t])) / \
208 | ((t_total - delta_t) * num_of_contacts)
209 | else:
210 | try:
211 | survival_func[delta_t] = float(sum([res_time - delta_t for res_time in durations if res_time >= delta_t])) / \
212 | ((t_total - delta_t) * num_of_contacts * survival_func0)
213 | except ZeroDivisionError:
214 | survival_func[delta_t] = 0
215 | return survival_func
216 |
217 |
218 | def _curve_fitting(survival_func, delta_t_list, initial_guess):
219 | """Fit the exponential curve :math:`y=Ae^{-k_1\Delta t}+Be^{-k_2\Delta t}`"""
220 | survival_rates = np.nan_to_num([survival_func[delta_t] for delta_t in delta_t_list]) # y
221 | try:
222 | popt, pcov = curve_fit(_bi_expo, np.array(delta_t_list), np.array(survival_rates), p0=initial_guess, maxfev=100000)
223 | n_fitted = _bi_expo(np.array(delta_t_list, dtype=np.float128), *popt)
224 | r_squared = 1 - np.sum((np.nan_to_num(n_fitted) -
225 | np.nan_to_num(survival_rates))**2)/np.sum((survival_rates - np.mean(survival_rates))**2)
226 | ks = [abs(k) for k in popt[:2]]
227 | koff = np.min(ks)
228 | res_time = 1/koff
229 | except RuntimeError:
230 | koff = 0
231 | res_time = 0
232 | r_squared = 0
233 | popt = [0, 0, 0, 0]
234 | return res_time, koff, r_squared, popt
235 |
236 |
237 | def _bi_expo(x, k1, k2, A, B):
238 | """The exponential curve :math:`y=Ae^{-k_1\Delta t}+Be^{-k_2\Delta t}`"""
239 | return A*np.exp(-k1*x) + B*np.exp(-k2*x)
240 |
241 |
242 | def calculate_koff_wrapper(durations, title, fn, t_total=None, timestep=1, nbootstrap=10,
243 | initial_guess=[1., 1., 1., 1.], plot_data=True, timeunit="us", fig_close=True):
244 | """Wrapper function that calculates koff and plot koff. """
245 | if np.sum(durations) == 0:
246 | koff = 0
247 | res_time = 0
248 | r_squared = 0
249 | koff_boot = 0
250 | r_squared_boot = 0
251 | else:
252 | with warnings.catch_warnings():
253 | warnings.simplefilter("ignore")
254 | koff, res_time, properties = cal_koff(durations, t_total, timestep, nbootstrap, initial_guess)
255 | r_squared = properties["r_squared"]
256 | koff_boot = np.mean(properties["ks_boot_set"], axis=0)[0]
257 | r_squared_boot = np.mean(properties["r_squared_boot_set"])
258 | if plot_data:
259 | text = _format_koff_text(properties, timeunit)
260 | plot_koff(durations, properties["delta_t_list"], properties["survival_rates"],
261 | properties["n_fitted"], survival_rates_bootstraps=properties["survival_rates_boot_set"],
262 | fig_fn=fn, title=title, timeunit=timeunit, t_total=t_total, text=text, fig_close=fig_close)
263 | return koff, res_time, r_squared, koff_boot, r_squared_boot
264 |
265 |
266 | def _format_koff_text(properties, timeunit):
267 | """Format text for koff plot. """
268 | tu = "ns" if timeunit == "ns" else r"$\mu$s"
269 | text = "{:18s} = {:.3f} {:2s}$^{{-1}} $\n".format("$k_{{off1}}$", properties["ks"][0], tu)
270 | text += "{:18s} = {:.3f} {:2s}$^{{-1}} $\n".format("$k_{{off2}}$", properties["ks"][1], tu)
271 | text += "{:14s} = {:.4f}\n".format("$R^2$", properties["r_squared"])
272 | ks_boot_avg = np.mean(properties["ks_boot_set"], axis=0)
273 | cv_avg = 100 * np.std(properties["ks_boot_set"], axis=0) / np.mean(properties["ks_boot_set"], axis=0)
274 | text += "{:18s} = {:.3f} {:2s}$^{{-1}}$ ({:3.1f}%)\n".format("$k_{{off1, boot}}$", ks_boot_avg[0],
275 | tu, cv_avg[0])
276 | text += "{:18s} = {:.3f} {:2s}$^{{-1}}$ ({:3.1f}%)\n".format("$k_{{off2, boot}}$", ks_boot_avg[1],
277 | tu, cv_avg[1])
278 | text += "{:14s} = {:.4f}\n".format("$R^2$$_{{boot}}$", np.mean(properties["r_squared_boot_set"]))
279 | text += "{:18s} = {:.3f} {:2s}".format("$Res. Time$", properties["res_time"], tu)
280 | return text
--------------------------------------------------------------------------------
/pylipid/plot/__init__.py:
--------------------------------------------------------------------------------
1 | ##############################################################################
2 | # PyLipID: A python module for analysing protein-lipid interactions
3 | #
4 | # Author: Wanling Song
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | ##############################################################################
16 |
17 | r"""
18 | plot module
19 | ============
20 | The ``plot`` module provides functions for aiding in the scientific analysis:
21 |
22 | .. currentmodule:: pylipid.plot
23 |
24 | .. autosummary::
25 | :toctree: generated/
26 |
27 | plot_koff
28 | plot_residue_data
29 | plot_residue_data_logo
30 | plot_surface_area
31 | plot_binding_site_data
32 | plot_corrcoef
33 |
34 | """
35 |
36 |
37 | from .koff import plot_koff
38 | from .plot1d import plot_residue_data
39 | from .plot1d import plot_residue_data_logo
40 | from .plot1d import plot_surface_area
41 | from .plot1d import plot_binding_site_data
42 | from .plot2d import plot_corrcoef
43 |
44 |
--------------------------------------------------------------------------------
/pylipid/plot/koff.py:
--------------------------------------------------------------------------------
1 | ##############################################################################
2 | # PyLipID: A python module for analysing protein-lipid interactions
3 | #
4 | # Author: Wanling Song
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | ##############################################################################
16 |
17 | """This module contains functions for plotting koff.
18 | """
19 |
20 | import os
21 | import numpy as np
22 | import matplotlib.pyplot as plt
23 |
24 | __all__ = ["plot_koff"]
25 |
26 |
27 | def plot_koff(durations, delta_t_list, survival_rates, n_fitted,
28 | survival_rates_bootstraps=None, fig_fn=None, title=None,
29 | timeunit=None, text=None, t_total=None, fig_close=True):
30 | r"""Plot the koff figure.
31 |
32 | The koff figure contains two axes. The left axis plot the sorted
33 | interaction durations, and the right one plot normalised survival rates, the fitted
34 | bi-exponenial curve and the bootstrapped survival rates.
35 |
36 | Parameters
37 | ----------
38 | durations : array_like
39 | A list of contact durations
40 |
41 | delta_t_list : array_like
42 | A list of :math:`\Delta t` at which the survival rates are calculated.
43 |
44 | survival_rates : array_like
45 | Survival rates calculated at delta_t_list.
46 |
47 | n_fitted: array_like
48 | The values of fitted bi-exponential at delta_t_list.
49 |
50 | survival_rates_bootstraps : list of array, optional, default=None
51 | A list of bootstrapped survival rates.
52 |
53 | fig_fn : str, optional, default=None
54 | Name of the koff figure. by default the figure will be saved as "koff.png"
55 | in the current working directory.
56 |
57 | title : str, optional, default=None
58 | Figure title. Default is None.
59 |
60 | t_total : scalar, optional, default=None
61 | Duration of simulation trajectories. The xlim of both axes will set to t_total if
62 | a value is given, otherwise xlim will be determined by matplotlib.
63 |
64 | timeunit : {"ns", "us", None}, optional, default=None
65 | Time unit of the given durations. Default is None.
66 |
67 | text : str, optional, default=None
68 | Text printed next to the koff figure. The default is None.
69 |
70 | """
71 | # plot settings
72 | if timeunit is None:
73 | xlabel = "Duration (timeunit)"
74 | elif timeunit == "ns":
75 | xlabel = "Duration (ns)"
76 | elif timeunit == "us":
77 | xlabel = r"Duration ($\mu s$)"
78 |
79 | if text is None:
80 | fig = plt.figure(1, figsize=(5.5, 3.5))
81 | left, width = 0.13, 0.33
82 | bottom, height = 0.17, 0.68
83 | left_h = left + width + 0.05
84 | rect_scatter = [left, bottom, width, height]
85 | rect_histy = [left_h, bottom, width, height]
86 | axScatter = fig.add_axes(rect_scatter)
87 | axHisty = fig.add_axes(rect_histy)
88 | else:
89 | fig = plt.figure(1, figsize=(8.2, 3.5))
90 | left, width = 0.0975, 0.23
91 | bottom, height = 0.17, 0.68
92 | left_h = left + width + 0.0375
93 | rect_scatter = [left, bottom, width, height]
94 | rect_histy = [left_h, bottom, width, height]
95 | axScatter = fig.add_axes(rect_scatter)
96 | axHisty = fig.add_axes(rect_histy)
97 |
98 | # plot original data
99 | x = np.sort(durations)
100 | y = np.arange(len(x)) + 1
101 | axScatter.scatter(x[::-1], y, label="Contacts", s=10, c="#176BA0")
102 | axScatter.set_xlim(0, x[-1] * 1.1)
103 | axScatter.legend(loc="upper right", prop={"size": 10, "weight": "bold"}, frameon=False, handletextpad=0.1)
104 | axScatter.set_ylabel("Sorted Index", fontsize=10, weight="bold")
105 | axScatter.set_xlabel(xlabel, fontsize=10, weight="bold")
106 | # plot survival function
107 | axHisty.scatter(delta_t_list, survival_rates, zorder=8, s=10, label="Survival func.", c="#7a5195")
108 | axHisty.yaxis.set_label_position("right")
109 | axHisty.yaxis.tick_right()
110 | axHisty.set_xlabel(r"$\Delta$t", fontsize=10, weight="bold")
111 | axHisty.set_ylabel("Probability", fontsize=10, weight="bold")
112 | axHisty.set_yticks([0, 0.25, 0.5, 0.75, 1.0])
113 | axHisty.set_ylim(-0.1, 1.1)
114 | # plot the fitted curve
115 | axHisty.plot(delta_t_list, n_fitted, 'r--', linewidth=3, zorder=10, label="Fitted biexpo.")
116 | # plot bootstrapped survival functions
117 | if survival_rates_bootstraps is not None:
118 | for boot_idx, survival_rates_boot in enumerate(np.atleast_2d(survival_rates_bootstraps)):
119 | if boot_idx == 0:
120 | axHisty.plot(delta_t_list, survival_rates_boot, color="gray", alpha=0.5,
121 | label="Bootstrapping", linewidth=3)
122 | else:
123 | axHisty.plot(delta_t_list, survival_rates_boot, color="gray", alpha=0.5, linewidth=3)
124 |
125 | axHisty.legend(loc="upper right", prop={"size": 8, "weight": "bold"}, frameon=False)
126 | # set xlim
127 | if t_total is not None:
128 | axScatter.set_xlim(0, t_total)
129 | axHisty.set_xlim(0, t_total)
130 |
131 | if title is not None:
132 | fig.text(0.13, 0.89, title, fontdict={"size":12, "weight": "bold"})
133 |
134 | # set ticklabel fonts
135 | for ax in [axHisty, axScatter]:
136 | for label in ax.xaxis.get_ticklabels() + ax.yaxis.get_ticklabels():
137 | plt.setp(label, fontsize=10, weight="bold")
138 |
139 | # print text on the right
140 | axHisty.text(1.4, 1.0, text, verticalalignment='top', horizontalalignment='left', transform=axHisty.transAxes,
141 | fontdict={"size": 8, "weight": "normal"}, linespacing=2)
142 |
143 | if fig_fn is None:
144 | fig_fn = os.path.join(os.getcwd(), "koff.pdf")
145 | fig.savefig(fig_fn, dpi=300)
146 | if fig_close:
147 | plt.close()
148 |
149 | return
150 |
151 |
--------------------------------------------------------------------------------
/pylipid/plot/plot1d.py:
--------------------------------------------------------------------------------
1 | ##############################################################################
2 | # PyLipID: A python module for analysing protein-lipid interactions
3 | #
4 | # Author: Wanling Song
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | ##############################################################################
16 |
17 | """This module contains functions that plot interactions as a function of residue index.
18 | """
19 |
20 | import os
21 | from collections import defaultdict
22 | import matplotlib.pyplot as plt
23 | from matplotlib.ticker import MultipleLocator
24 | import numpy as np
25 | import pandas as pd
26 | import logomaker
27 |
28 | __all__ = ["plot_residue_data", "plot_residue_data_logo",
29 | "plot_binding_site_data", "plot_surface_area", "AxisIndex"]
30 |
31 |
32 | def plot_residue_data(residue_index, interactions, gap=200, ylabel=None,
33 | fn=None, title=None, fig_close=False):
34 | """Plot interactions as a function of residue index
35 |
36 | Parameters
37 | ----------
38 | residue_index : list
39 | Residue indices in an ascending order. If a residue index is smaller than its preceding one,
40 | the plotting function will consider it as the start of a new chain and will plot the following data
41 | in a new figure. A gap in residue index that is less than ``gap`` will be marked as gray areas in
42 | the figure, but a gap that is larger than ``gap`` will start a new figure.
43 | interactions : list
44 | Values to plot. In the same order as ``residue_index``.
45 | gap : int, optional, default=200
46 | The number of missing residues in ``residue_index`` that initiate a new figure. The gap between two adjacent
47 | index in ``residue_index`` that is smaller than the provided value will be considered as missing residues
48 | and will be marked as gray areas in the figure, whereas a gap that is larger than the provided value
49 | will start a new figure and plot the following data in that new figure. This can help to make figures
50 | more compressed. The default gap is 200.
51 | ylabel : str, optional, default=None
52 | y axis label. Default is "Interactions".
53 | fn : str, optional, default=None
54 | Figure name. By default the figure is saved as "Figure_interactions.pdf" as the current
55 | working directory.
56 | title : str, optional, default=None
57 | Figure title.
58 | fig_close : bool, optional, default=False
59 | Use plt.close() to close the figure. Can be used to save memory if many figures are opened.
60 |
61 |
62 | """
63 | bar_color = "#176BA0"
64 | if ylabel is None:
65 | ylabel = "Interactions"
66 |
67 | if fn is None:
68 | fn = os.path.join(os.getcwd(), "Figure_interactions.pdf")
69 |
70 | # check for chain breaks
71 | gray_areas = defaultdict(list) # show grey area to indicate missing residues
72 | chain_starts = [0] # plot in separate figures if the gap between two adjacent residues is larger than 50
73 | for idx in np.arange(1, len(residue_index)):
74 | if residue_index[idx] - residue_index[idx - 1] < 0:
75 | chain_starts.append(idx)
76 | elif residue_index[idx] - residue_index[idx - 1] > gap:
77 | chain_starts.append(idx)
78 | elif 1 < residue_index[idx] - residue_index[idx - 1] <= gap:
79 | gray_areas[chain_starts[-1]].append([residue_index[idx - 1] + 1, residue_index[idx] - 1])
80 | chain_starts.append(len(residue_index))
81 |
82 | # plot
83 | for chain_idx in np.arange(len(chain_starts[:-1])):
84 | df = interactions[chain_starts[chain_idx]:chain_starts[chain_idx + 1]]
85 | resi_selected = residue_index[chain_starts[chain_idx]:chain_starts[chain_idx + 1]]
86 | if 0 < len(df) <= 20:
87 | fig, ax = plt.subplots(1, 1, figsize=(2.8, 1.5))
88 | ax.xaxis.set_major_locator(MultipleLocator(5))
89 | ax.xaxis.set_minor_locator(MultipleLocator(1))
90 | elif 20 < len(df) <= 50:
91 | fig, ax = plt.subplots(1, 1, figsize=(3.2, 1.5))
92 | ax.xaxis.set_major_locator(MultipleLocator(10))
93 | ax.xaxis.set_minor_locator(MultipleLocator(1))
94 | elif 50 < len(df) <= 300:
95 | fig, ax = plt.subplots(1, 1, figsize=(3.8, 1.8))
96 | ax.xaxis.set_major_locator(MultipleLocator(50))
97 | ax.xaxis.set_minor_locator(MultipleLocator(10))
98 | elif 300 < len(df) <= 1000:
99 | fig, ax = plt.subplots(1, 1, figsize=(4.5, 1.8))
100 | ax.xaxis.set_major_locator(MultipleLocator(100))
101 | ax.xaxis.set_minor_locator(MultipleLocator(10))
102 | elif 1000 < len(df) <= 2000:
103 | fig, ax = plt.subplots(1, 1, figsize=(6.0, 1.8))
104 | ax.xaxis.set_major_locator(MultipleLocator(200))
105 | ax.xaxis.set_minor_locator(MultipleLocator(50))
106 | elif len(df) > 2000:
107 | fig, ax = plt.subplots(1, 1, figsize=(7.5, 1.8))
108 | ax.xaxis.set_major_locator(MultipleLocator(500))
109 | ax.xaxis.set_minor_locator(MultipleLocator(100))
110 | ax.bar(resi_selected, df, 1.0, linewidth=0, color=bar_color)
111 | # plot missing residue area
112 | if chain_starts[chain_idx] in gray_areas.keys():
113 | for gray_area in gray_areas[chain_starts[chain_idx]]:
114 | ax.axvspan(gray_area[0], gray_area[1], facecolor="#c0c0c0", alpha=0.3)
115 | # axis setting
116 | ax.set_ylim(0, df.max() * 1.05)
117 | ax.set_xlim(resi_selected.min() - 1, resi_selected.max() + 1)
118 | ax.set_ylabel(ylabel, fontsize=8, weight="bold")
119 | ax.set_xlabel("Residue Index", fontsize=8, weight="bold")
120 | for label in ax.xaxis.get_ticklabels() + ax.yaxis.get_ticklabels():
121 | plt.setp(label, fontsize=8, weight="bold")
122 | if title is not None:
123 | ax.set_title(title, fontsize=8, weight="bold")
124 | plt.tight_layout()
125 | if len(chain_starts) == 2:
126 | fig.savefig(fn, dpi=300)
127 | else:
128 | name, ext = os.path.splitext(fn)
129 | fig.savefig("{}_{}{}".format(name, chain_idx, ext), dpi=300)
130 | if fig_close:
131 | plt.close()
132 |
133 | return
134 |
135 |
136 | def plot_residue_data_logo(residue_index, logos, interactions, gap=1000, letter_map=None,
137 | color_scheme="chemistry", ylabel=None, title=None, fn=None, fig_close=False):
138 | """Plot interactions using `logomaker.Logo
139 | `_.
140 |
141 | Parameters
142 | -----------
143 | residue_index : list
144 | Residue indices in an ascending order. If a residue index is smaller than its preceding one,
145 | the plotting function will consider it as the start of a new chain and will plot the following data
146 | in a new figure. A gap in residue index that is less than ``gap`` will be marked as gray areas in
147 | the figure, but a gap that is larger than ``gap`` will start a new figure.
148 | logos : list of str
149 | Single letter logos in the corresponding order as ``residue_index``. The height of logos in the figure
150 | will be determined by values given to ``interactions``. Three-letter name of the 20 common amino acids
151 | are accepted and will be converted to their corresponding single-letter names in this function by
152 | the default. Other mappings can be defined via ``letter_map``.
153 | interactions : list
154 | Plotting values in the corresponding order as ``residue_index``.
155 | gap : int, optional, default=1000
156 | The number of missing residues in ``residue_index`` that starts a new figure. A gap between two adjacent
157 | index in ``residue_index`` that is smaller than the provided value will be considered as missing residues
158 | and will be marked as gray areas in the figure, whereas a gap that is larger than the provided value
159 | will start a new figure and plot the following data in that new figure. This can help to make figures
160 | more compressed. The gap needs to be greater than 1000. The default is 1000.
161 | letter_map : dict, optional, default=None
162 | A dictionary that maps provided names to single-letter logos in the form of
163 | {"provided name": "single_letter logo"}.
164 | color_scheme : str, optional, default="chemistry"
165 | The color scheme used by logomaker.Logo(). See
166 | `Color Schemes `_ for accepted values.
167 | Default is "chemistry".
168 | ylabel : str, optional, default=None
169 | y axis label. Default is "Interactions".
170 | fn : str, optional, default=None
171 | Figure name. By default the figure is saved as "Figure_interactions_logo.pdf" as the current
172 | working directory.
173 | fig_close : bool, optional, default=False
174 | Use plt.close() to close the figure. Can be used to save memory if many figures are opened.
175 |
176 | """
177 | # single-letter dictionary
178 | single_letter = {'CYS': 'C', 'ASP': 'D', 'SER': 'S', 'GLN': 'Q', 'LYS': 'K',
179 | 'ILE': 'I', 'PRO': 'P', 'THR': 'T', 'PHE': 'F', 'ASN': 'N',
180 | 'GLY': 'G', 'HIS': 'H', 'LEU': 'L', 'ARG': 'R', 'TRP': 'W',
181 | 'ALA': 'A', 'VAL': 'V', 'GLU': 'E', 'TYR': 'Y', 'MET': 'M'}
182 | if letter_map is not None:
183 | single_letter.update(letter_map)
184 |
185 | logos_checked = []
186 | for name in logos:
187 | if len(name) == 1:
188 | logos_checked.append(name)
189 | else:
190 | logos_checked.append(single_letter[name])
191 | if ylabel is None:
192 | ylabel = "Interactions"
193 | if fn is None:
194 | fn = os.path.join(os.getcwd(), "Figure_interactions_logo.pdf")
195 |
196 | length = 100
197 | # check for chain breaks, gray_areas and axis breaks
198 | axis_obj = AxisIndex(residue_index, logos_checked, interactions, length, gap)
199 | axis_obj.sort()
200 | # plot
201 | for page_idx in axis_obj.breaks.keys():
202 | n_rows = len(axis_obj.breaks[page_idx])
203 | fig, axes = plt.subplots(n_rows, 1, figsize=(4.5, 1.3 * n_rows), sharey=True)
204 | plt.subplots_adjust(hspace=0.5, left=0.2)
205 | ymax = []
206 | for ax_idx, ax in enumerate(np.atleast_1d(axes)):
207 | resi_selected = [item[0] for item in axis_obj.breaks[page_idx][ax_idx]]
208 | logos_selected = [item[1] for item in axis_obj.breaks[page_idx][ax_idx]]
209 | interaction_selected = [item[2] for item in axis_obj.breaks[page_idx][ax_idx]]
210 | ymax.append(np.max(interaction_selected))
211 | if np.sum(interaction_selected) > 0:
212 | df = pd.DataFrame({"Resid": resi_selected, "Resn": logos_selected, "Data": interaction_selected})
213 | matrix = df.pivot(index="Resid", columns='Resn', values="Data").fillna(0)
214 | logomaker.Logo(matrix, color_scheme=color_scheme, ax=ax)
215 | if ax_idx == (n_rows - 1):
216 | ax.set_xlabel("Residue Index", fontsize=8, weight="bold")
217 | ax.xaxis.set_major_locator(MultipleLocator(20))
218 | ax.xaxis.set_minor_locator(MultipleLocator(1))
219 | ax.set_xlim(resi_selected[0] - 0.5, resi_selected[-1] + 0.5)
220 | ax.set_ylabel(ylabel, fontsize=8, weight="bold", va="center")
221 | for label in ax.xaxis.get_ticklabels() + ax.yaxis.get_ticklabels():
222 | plt.setp(label, fontsize=8, weight="bold")
223 | np.atleast_1d(axes)[-1].set_ylim(0, np.max(ymax) * 1.05)
224 | # plot missing areas
225 | if page_idx in axis_obj.gray_areas.keys():
226 | for item in axis_obj.gray_areas[page_idx]:
227 | np.atleast_1d(axes)[item[0]].axvspan(item[1], item[2], facecolor="#c0c0c0", alpha=0.3)
228 | if title is not None:
229 | np.atleast_1d(axes)[0].set_title(title, fontsize=10, weight="bold")
230 | plt.tight_layout()
231 | if len(axis_obj.breaks.keys()) == 1:
232 | fig.savefig(fn, dpi=300)
233 | else:
234 | name, ext = os.path.splitext(fn)
235 | fig.savefig("{}_{}{}".format(name, page_idx, ext), dpi=300)
236 | if fig_close:
237 | plt.close()
238 |
239 | return
240 |
241 |
242 | def plot_binding_site_data(data, fig_fn, ylabel=None, title=None, fig_close=False):
243 | """Plot binding site data in a matplotlib violin plot.
244 |
245 | The provided ``data`` needs to be a pandas.DataFrame object which has "Binding Site {idx}" as its column names and
246 | records binding site information by column.
247 |
248 | Parameters
249 | ----------
250 | data : padnas.DataFrame
251 | Data to plot. It needs to have "Binding Site {idx}" as its column names and records binding site information by
252 | column.
253 | fig_fn : str
254 | Figure name.
255 | ylabel : str, optional, default=None
256 | Y label.
257 | title : str, optional, default=None
258 | Figure title.
259 | fig_close : bool, optional, default=False
260 | Use plt.close() to close the figure. Can be used to save memory if many figures are opened.
261 |
262 | """
263 | from itertools import cycle as _cycle
264 |
265 | def adjacent_values(vals, q1, q3):
266 | upper_adjacent_value = q3 + (q3 - q1) * 1.5
267 | upper_adjacent_value = np.clip(upper_adjacent_value, q3, vals[-1])
268 | lower_adjacent_value = q1 - (q3 - q1) * 1.5
269 | lower_adjacent_value = np.clip(lower_adjacent_value, vals[0], q1)
270 | return lower_adjacent_value, upper_adjacent_value
271 |
272 | if ylabel is None:
273 | ylabel = ""
274 | if title is None:
275 | title = ""
276 |
277 | color_set = _cycle(plt.get_cmap("tab10").colors)
278 | plt.rcParams["font.size"] = 10
279 | plt.rcParams["font.weight"] = "bold"
280 |
281 | BS_names = [col for col in data.columns]
282 | BS_id_set = [int(name.split()[-1]) for name in BS_names]
283 | BS_id_set.sort()
284 | data_processed = [np.sort(data["Binding Site {}".format(bs_id)].dropna().tolist())
285 | for bs_id in BS_id_set]
286 | colors = [next(color_set) for dummy in BS_id_set]
287 | fig, ax = plt.subplots(1, 1, figsize=(len(BS_id_set)*0.6, 2.8))
288 | plt.subplots_adjust(bottom=0.20, top=0.83)
289 | ax.set_title(title, fontsize=10, weight="bold")
290 | parts = ax.violinplot(data_processed, showmeans=False, showmedians=False, showextrema=False)
291 | for pc_idx, pc in enumerate(parts['bodies']):
292 | pc.set_facecolors(colors[pc_idx])
293 | pc.set_edgecolor('black')
294 | pc.set_alpha(1)
295 |
296 | # deal with the situation in which the columns in data have different lengths.
297 | quartile1, medians, quartile3 = np.array([np.percentile(d, [25, 50, 75]) for d in data_processed]).T
298 | whiskers = np.array([
299 | adjacent_values(sorted_array, q1, q3)
300 | for sorted_array, q1, q3 in zip(data_processed, quartile1, quartile3)])
301 | whiskers_min, whiskers_max = whiskers[:, 0], whiskers[:, 1]
302 |
303 | inds = np.arange(1, len(medians) + 1)
304 | ax.scatter(inds, medians, marker='o', color='white', s=3, zorder=3)
305 | ax.vlines(inds, quartile1, quartile3, color='k', linestyle='-', lw=5)
306 | ax.vlines(inds, whiskers_min, whiskers_max, color='k', linestyle='-', lw=1)
307 |
308 | ax.get_xaxis().set_tick_params(direction='out')
309 | ax.xaxis.set_ticks_position('bottom')
310 | ax.set_xticks(np.arange(1, len(BS_id_set) + 1))
311 | ax.set_xticklabels(BS_id_set, fontsize=10, weight="bold")
312 | ax.set_xlim(0.25, len(BS_id_set) + 0.75)
313 | ax.set_xlabel('Binding Site', fontsize=10, weight="bold")
314 | ax.set_ylabel(ylabel, fontsize=10, weight="bold")
315 | plt.tight_layout()
316 | fig.savefig(fig_fn, dpi=200)
317 | if fig_close:
318 | plt.close()
319 | return
320 |
321 |
322 | def plot_surface_area(surface_area, fig_fn, timeunit=None, fig_close=False):
323 | """Plot binding site surface area as a function of time.
324 |
325 | The provided ``surface_area`` needs to be a pandas.DataFrame object, which has columns named as "Binding Site {idx}"
326 | to record binding site surface areas and a column named "Time" to record the timesteps at which the surface area data
327 | are taken.
328 |
329 | Parameters
330 | ----------
331 | surface_area : pandas.DataFrame
332 | A pandas.DataFrame object which has columns named as "Binding Site {idx}"
333 | to record binding site surface areas and a column named "Time" to record the timesteps at which the surface area data
334 | fig_fn : str
335 | Figure filename.
336 | timeunit : {"ns", "us"} or None, optional, default=None
337 | Time unit shown in x label.
338 | fig_close : bool, optional, default=False
339 | Use plt.close() to close the figure. Can be used to save memory if many figures are opened.
340 |
341 | See also
342 | ---------
343 | pylipid.func.calculate_surface_area
344 | The function that generates surface_area data.
345 |
346 | """
347 | from itertools import cycle as _cycle
348 |
349 | color_set = _cycle(plt.get_cmap("tab10").colors)
350 | plt.rcParams["font.size"] = 10
351 | plt.rcParams["font.weight"] = "normal"
352 |
353 | if timeunit is None:
354 | timeunit = ""
355 | elif timeunit == "ns":
356 | timeunit = " (ns)"
357 | elif timeunit == "us":
358 | timeunit = r" ($\mu$s)"
359 |
360 | row_set = list(set([ind[:2] for ind in surface_area.index]))
361 | row_set.sort()
362 | col_set = [col for col in surface_area.columns if col != "Time"]
363 | colors = [next(color_set) for dummy in col_set]
364 | fig, axes = plt.subplots(len(row_set), len(col_set), figsize=(len(col_set)*2.4, len(row_set)*1.6),
365 | sharex=True, sharey=True)
366 | plt.subplots_adjust(wspace=0.2, hspace=0.16)
367 | if len(col_set) == 1:
368 | axes = np.atleast_1d(axes)[:, np.newaxis]
369 | else:
370 | axes = np.atleast_2d(axes)
371 | for row_idx, row in enumerate(row_set):
372 | df = surface_area.loc[row]
373 | for col_idx, bs_name in enumerate(col_set):
374 | axes[row_idx, col_idx].plot(df["Time"], df[bs_name], color=colors[col_idx],
375 | label="traj {} prot {}".format(row[0], row[1]))
376 | if row_idx == len(row_set)-1:
377 | axes[row_idx, col_idx].set_xlabel("Time{}".format(timeunit), fontsize=10)
378 | if col_idx == 0:
379 | axes[row_idx, col_idx].set_ylabel(r"Area (nm$^2$)", fontsize=10)
380 | if row_idx == 0:
381 | axes[row_idx, col_idx].set_title(bs_name, fontsize=10)
382 | axes[row_idx, col_idx].legend(loc="best", frameon=False)
383 | fig.tight_layout()
384 | fig.savefig(fig_fn, dpi=200)
385 | if fig_close:
386 | plt.close()
387 |
388 | return
389 |
390 |
391 | class AxisIndex:
392 | """Build axes for logo figure."""
393 |
394 | def __init__(self, residue_index, logos, interactions, length, gap):
395 | self.page_idx = 0
396 | self.length = length
397 | self.gap = gap
398 | self.residue_index = residue_index
399 | self.logos = logos
400 | self.interactions = interactions
401 | self.axis_start = (residue_index[0] // length) * length
402 | self.breaks = defaultdict(list)
403 | self.breaks[self.page_idx].append([])
404 | self.gray_areas = defaultdict(list)
405 |
406 | def fill_missing(self, start_value, end_value):
407 | for xloci in np.arange(start_value, end_value + 1):
408 | self.breaks[self.page_idx][-1].append((xloci, "A", 0))
409 | self.gray_areas[self.page_idx].append((len(self.breaks[self.page_idx]) - 1, start_value, end_value))
410 |
411 | def new_axis(self, pointer):
412 | self.breaks[self.page_idx].append([])
413 | self.axis_start = self.residue_index[pointer]
414 | self.breaks[self.page_idx][-1].append(
415 | (self.residue_index[pointer], self.logos[pointer], self.interactions[pointer]))
416 |
417 | def new_page(self, pointer):
418 | if len(self.breaks[self.page_idx][-1]) < self.length:
419 | self.fill_missing(self.axis_start + len(self.breaks[self.page_idx][-1]), self.axis_start + self.length - 1)
420 | self.page_idx += 1
421 | self.breaks[self.page_idx].append([])
422 | self.axis_start = (self.residue_index[pointer] // self.length) * self.length
423 | if self.axis_start != self.residue_index[pointer]:
424 | self.fill_missing(self.axis_start, self.residue_index[pointer] - 1)
425 | self.breaks[self.page_idx][-1].append(
426 | (self.residue_index[pointer], self.logos[pointer], self.interactions[pointer]))
427 |
428 | def new_gap(self, pointer):
429 | gray_start = self.residue_index[pointer - 1] + 1
430 | for xloci in np.arange(self.residue_index[pointer - 1] + 1, self.residue_index[pointer]):
431 | if xloci - self.axis_start < self.length:
432 | self.breaks[self.page_idx][-1].append((xloci, "A", 0))
433 | else:
434 | self.gray_areas[self.page_idx].append((len(self.breaks[self.page_idx]) - 1, gray_start, xloci - 1))
435 | self.breaks[self.page_idx].append([])
436 | self.breaks[self.page_idx][-1].append((xloci, "A", 0))
437 | self.axis_start = xloci
438 | gray_start = xloci
439 | self.gray_areas[self.page_idx].append(
440 | (len(self.breaks[self.page_idx]) - 1, gray_start, self.residue_index[pointer] - 1))
441 | self.breaks[self.page_idx][-1].append(
442 | (self.residue_index[pointer], self.logos[pointer], self.interactions[pointer]))
443 |
444 | def sort(self):
445 | end = False
446 | if self.axis_start != self.residue_index[0]:
447 | self.fill_missing(self.axis_start, self.residue_index[0] - 1)
448 | self.breaks[self.page_idx][-1].append((self.residue_index[0], self.logos[0], self.interactions[0]))
449 | pointer = 1
450 | while not end:
451 | if self.residue_index[pointer] - self.residue_index[pointer - 1] == 1 and self.residue_index[
452 | pointer] - self.axis_start < self.length:
453 | self.breaks[self.page_idx][-1].append(
454 | (self.residue_index[pointer], self.logos[pointer], self.interactions[pointer]))
455 | pointer += 1
456 | elif self.residue_index[pointer] - self.residue_index[pointer - 1] == 1 and self.residue_index[
457 | pointer] - self.axis_start >= self.length:
458 | self.new_axis(pointer)
459 | pointer += 1
460 | elif self.residue_index[pointer] - self.residue_index[pointer - 1] < 0:
461 | self.new_page(pointer)
462 | pointer += 1
463 | elif 1 < self.residue_index[pointer] - self.residue_index[pointer - 1] <= self.gap:
464 | self.new_gap(pointer)
465 | pointer += 1
466 | elif self.residue_index[pointer] - self.residue_index[pointer - 1] > self.gap:
467 | self.new_page(pointer)
468 | pointer += 1
469 | if pointer == len(self.residue_index):
470 | end = True
471 | if len(self.breaks[self.page_idx][-1]) < self.length:
472 | self.fill_missing(self.axis_start + len(self.breaks[self.page_idx][-1]), self.axis_start + self.length - 1)
473 |
--------------------------------------------------------------------------------
/pylipid/plot/plot2d.py:
--------------------------------------------------------------------------------
1 | ##############################################################################
2 | # PyLipID: A python module for analysing protein-lipid interactions
3 | #
4 | # Author: Wanling Song
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | ##############################################################################
16 |
17 | """This module contains functions for 2D plot.
18 | """
19 |
20 | import os
21 | import matplotlib.pyplot as plt
22 | import matplotlib.colors as colors
23 | import numpy as np
24 |
25 | __all__ = ["plot_corrcoef"]
26 |
27 |
28 | def plot_corrcoef(corrcoef, residue_index, cmap="Reds", vmin=None, vmax=None,
29 | fn=None, title=None, fig_close=False):
30 | """Plot correlation coefficient matrix.
31 |
32 | Parameters
33 | ----------
34 | corrcoef : array_like
35 | A scalar 2D array of correlation coefficient matrix.
36 | residue_index : array_list, optional, default=None
37 | A 1D array of residue index.
38 | cmap : str or `matplotlib.colors.Colormap`, optional, default="coolwarm"
39 | A Colormap instance or matplotlib register colormap name. The
40 | colormap maps *corrcoef* to colors.
41 | fn : str, optional, default=None
42 | Figure name. By default the figure is saved as "Figure_corrcoef.pdf" as the current
43 | working directory.
44 | title : str, optional, default=None
45 | Figure title.
46 | fig_close : bool, optional, default=False
47 | Use plt.close() to close the figure. Can be used to save memory if many figures are opened.
48 |
49 | """
50 | plt.rcParams["font.size"] = 10
51 | plt.rcParams["font.weight"] = "bold"
52 |
53 | if fn is None:
54 | fn = os.path.join(os.getcwd(), "Figure_Correlation_Matrix.pdf")
55 |
56 | if len(corrcoef) <= 20:
57 | fig, ax = plt.subplots(1, 1, figsize=(2.5, 1.8))
58 | majorlocator = 5
59 | minorlocator = 1
60 | elif 20 < len(corrcoef) <= 50:
61 | fig, ax = plt.subplots(1, 1, figsize=(2.9, 2.0))
62 | majorlocator = 10
63 | minorlocator = 1
64 | elif 50 < len(corrcoef) <= 500:
65 | fig, ax = plt.subplots(1, 1, figsize=(4.9, 3.5))
66 | majorlocator = 50
67 | minorlocator = 10
68 | elif 500 <= len(corrcoef) < 1000:
69 | fig, ax = plt.subplots(1, 1, figsize=(5.9, 4.5))
70 | majorlocator = 100
71 | minorlocator = 10
72 | elif 1000 <= len(corrcoef) < 2000:
73 | fig, ax = plt.subplots(1, 1, figsize=(7.9, 6.5))
74 | majorlocator = 200
75 | minorlocator = 20
76 | elif len(corrcoef) >= 2000:
77 | fig, ax = plt.subplots(1, 1, figsize=(8.9, 7.5))
78 | majorlocator = 500
79 | minorlocator = 100
80 |
81 | # sort index, check duplicates in residue index.
82 | majorticks = []
83 | minorticks = []
84 | ticklabels = []
85 | breaks = []
86 | for idx, resi in enumerate(residue_index):
87 | if not resi%majorlocator:
88 | majorticks.append(idx-0.5)
89 | ticklabels.append(resi)
90 | elif not resi%minorlocator:
91 | minorticks.append(idx-0.5)
92 | if idx > 0 and resi - residue_index[idx-1] != 1:
93 | breaks.append(idx-0.5)
94 |
95 | x = y = np.arange(len(residue_index)+1, dtype=float)
96 | x -= 0.5
97 | y -= 0.5
98 |
99 | corrcoef = np.nan_to_num(corrcoef)
100 | if vmax is None:
101 | vmax = np.percentile(np.unique(np.ravel(corrcoef)), 99)
102 | if vmin is None:
103 | vmin = np.percentile(np.unique(np.ravel(corrcoef)), 1)
104 | pcm = ax.pcolormesh(x, y, corrcoef, cmap=cmap,
105 | norm=colors.LogNorm(vmax=vmax, vmin=vmin))
106 | fig.colorbar(pcm, ax=ax)
107 | # set ticks
108 | ax.set_xticks(majorticks)
109 | ax.set_xticks(minorticks, minor=True)
110 | ax.xaxis.set_ticklabels(ticklabels, fontsize=10, weight="bold")
111 | ax.set_yticks(majorticks)
112 | ax.set_yticks(minorticks, minor=True)
113 | ax.yaxis.set_ticklabels(ticklabels, fontsize=10, weight="bold")
114 | ax.tick_params(which='both', direction='out')
115 |
116 | ax.set_xlabel("Residue Index", fontsize=10, weight="bold")
117 | ax.set_ylabel("Residue Index", fontsize=10, weight="bold")
118 |
119 | if len(breaks) > 0:
120 | for break_line in breaks:
121 | ax.axhline(break_line, linewidth=0.8, color="black", linestyle="--")
122 | ax.axvline(break_line, linewidth=0.8, color="black", linestyle="--")
123 |
124 | if title is not None:
125 | ax.set_title(title, fontsize=8, weight="bold")
126 |
127 | plt.tight_layout()
128 | fig.savefig(fn, dpi=200)
129 | if fig_close:
130 | plt.close()
131 |
132 | return
133 |
134 |
--------------------------------------------------------------------------------
/pylipid/util/__init__.py:
--------------------------------------------------------------------------------
1 | ##############################################################################
2 | # PyLipID: A python module for analysing protein-lipid interactions
3 | #
4 | # Author: Wanling Song
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | ##############################################################################
16 |
17 |
18 | r"""
19 | util module
20 | =============
21 | The ``util`` module contains other assisting functions:
22 |
23 | .. currentmodule:: pylipid.util
24 |
25 | .. autosummary::
26 | :toctree: generated/
27 |
28 | check_dir
29 | write_PDB
30 | write_pymol_script
31 | sparse_corrcoef
32 | rmsd
33 | get_traj_info
34 |
35 | """
36 |
37 |
38 | from .directory import check_dir
39 | from .coordinate import write_PDB
40 | from .pymol_script import write_pymol_script
41 | from .corrcoef import sparse_corrcoef
42 | from .rmsd import rmsd
43 | from .trajectory import get_traj_info
--------------------------------------------------------------------------------
/pylipid/util/coordinate.py:
--------------------------------------------------------------------------------
1 | ##############################################################################
2 | # PyLipID: A python module for analysing protein-lipid interactions
3 | #
4 | # Author: Wanling Song
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | ##############################################################################
16 |
17 | """This module contains functions for writing coordinates in PDB format"""
18 |
19 | import numpy as np
20 |
21 | __all__ = ["write_PDB"]
22 |
23 | def write_PDB(prot_obj, bfactor, pdb_fn, resi_offset=0):
24 | """Write interaction data in bfactor columns.
25 |
26 | Parameters
27 | ----------
28 | prot_obj : mdtraj.Trajectory Object
29 | A mdtraj.Trajectory object of protein coordinates and topology information.
30 | bfactor : array_like
31 | A list of b factor values for each protein residues.
32 | pdb_fn : str
33 | Filename of the PDB coordinate file.
34 | resi_offset : int, optional, default=0
35 | Shift the residue index.
36 |
37 | """
38 | coords = prot_obj.xyz[0]
39 | table = prot_obj.top.to_dataframe()[0]
40 | atom_idx_set = table.serial
41 | resid_set = table.resSeq + resi_offset
42 | atom_name_set = table.name
43 | resn_set = table.resName
44 | chainID = [chr(65 + int(idx)) for idx in table.chainID]
45 | atom_residue_map = {atom_idx: prot_obj.top.atom(atom_idx).residue.index
46 | for atom_idx in np.arange(prot_obj.n_atoms)}
47 | ######## write out coords ###########
48 | with open(pdb_fn, "w") as f:
49 | for idx in np.arange(prot_obj.n_atoms):
50 | coords_dictionary = {"HEADER": "ATOM",
51 | "ATOM_ID": atom_idx_set[idx],
52 | "ATOM_NAME": atom_name_set[idx],
53 | "SPARE": "",
54 | "RESN": resn_set[idx],
55 | "CHAIN_ID": chainID[idx],
56 | "RESI": resid_set[idx],
57 | "COORDX": coords[idx, 0] * 10,
58 | "COORDY": coords[idx, 1] * 10,
59 | "COORDZ": coords[idx, 2] * 10,
60 | "OCCUP": 1.0,
61 | "BFACTOR": bfactor[atom_residue_map[idx]]}
62 | row = "{HEADER:6s}{ATOM_ID:5d} ".format(**coords_dictionary) + \
63 | "{ATOM_NAME:^4s}{SPARE:1s}{RESN:3s} ".format(**coords_dictionary) + \
64 | "{CHAIN_ID:1s}{RESI:4d}{SPARE:1s} ".format(**coords_dictionary) + \
65 | "{COORDX:8.3f}{COORDY:8.3f}{COORDZ:8.3f}{OCCUP:6.2f}{BFACTOR:6.2f}\n".format(**coords_dictionary)
66 | f.write(row)
67 | f.write("TER")
68 |
69 | return
--------------------------------------------------------------------------------
/pylipid/util/corrcoef.py:
--------------------------------------------------------------------------------
1 | ##############################################################################
2 | # PyLipID: A python module for analysing protein-lipid interactions
3 | #
4 | # Author: Wanling Song
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | ##############################################################################
16 |
17 | """This module contains functions for calculating correlation coefficient. """
18 |
19 | from scipy import sparse
20 | import numpy as np
21 |
22 | __all__ = ["sparse_corrcoef"]
23 |
24 |
25 | def sparse_corrcoef(A, B=None):
26 | """Calculate correlation coeffient matrix using sparse matrix"""
27 | if B is not None:
28 | A = sparse.vstack((A, B), format='csr')
29 | A = A.astype(np.float64)
30 | n = A.shape[1]
31 | # Compute the covariance matrix
32 | rowsum = A.sum(1)
33 | centering = rowsum.dot(rowsum.T.conjugate()) / n
34 | C = (A.dot(A.T.conjugate()) - centering) / (n - 1)
35 | # The correlation coefficients are given by
36 | # C_{i,j} / sqrt(C_{i} * C_{j})
37 | d = np.diag(C)
38 | corrcoefs = C / np.sqrt(np.outer(d, d))
39 | return corrcoefs
--------------------------------------------------------------------------------
/pylipid/util/directory.py:
--------------------------------------------------------------------------------
1 | ##############################################################################
2 | # PyLipID: A python module for analysing protein-lipid interactions
3 | #
4 | # Author: Wanling Song
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | ##############################################################################
16 |
17 | """This module contains the assisting functions for dealing with directories."""
18 |
19 | import os
20 |
21 | __all__ = ["check_dir"]
22 |
23 |
24 | def check_dir(directory=None, suffix=None, print_info=True):
25 | """Check directory
26 |
27 | This function will combine the suffix with the given directory (or the current
28 | working directory if None is given) to generate a new directory name, and create a
29 | directory with this name if it does not exit.
30 |
31 | """
32 | if directory is None:
33 | directory = os.getcwd()
34 | else:
35 | directory = os.path.abspath(directory)
36 | if suffix is not None:
37 | directory = os.path.join(directory, suffix)
38 | if not os.path.isdir(directory):
39 | os.makedirs(directory)
40 | if print_info:
41 | print("Creating new director: {}".format(directory))
42 | return directory
43 |
44 |
--------------------------------------------------------------------------------
/pylipid/util/pymol_script.py:
--------------------------------------------------------------------------------
1 | ##############################################################################
2 | # PyLipID: A python module for analysing protein-lipid interactions
3 | #
4 | # Author: Wanling Song
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | ##############################################################################
16 |
17 | """This module contains functions that write PyMol scripts."""
18 |
19 | import os
20 | from shutil import copyfile
21 |
22 | __all__ = ["write_pymol_script"]
23 |
24 |
25 | def write_pymol_script(fname, pdb_fname, data_fname, lipid, n_binding_site):
26 | """Write Python script that opens a PyMol session with binding site information.
27 |
28 | This function will generate a Python script which opens a PyMol session in which the binding site information
29 | stored in the interaction data file (a csv file provided by ``data_fname``) is mapped onto the receptor structure
30 | (loaded from a PDB coordinate file provided by ``pdb_fname``). In this PyMol session, residues from the same binding
31 | site are colored in the same color and shown in spheres with scales corresponding to their residence times.
32 |
33 | The PDB coordinate of the receptor need to have the same topology as in the simulations (i.e. the same as shown in
34 | the ``data_fname``). A warning will show up and break the PyMol seesion, if residue names are different or residue
35 | indices are different.
36 |
37 | As PyMol only recognizes atomistic structures, the PDB coordinates of the receptor (provided by ``pdb_fname``) needs
38 | to be an atomistic structure. For coarse-grained simulation results, this ``pdb_fname`` needs to be a PDB coordinates
39 | before the coarse-graining step.
40 |
41 | Parameters
42 | ----------
43 | fname : str
44 | The filename of the PyMol script to be written.
45 | pdb_fname : str
46 | The PDB coordinate file of the receptor.
47 | data_fname : str
48 | The csv file of the interaction data.
49 | lipid : str
50 | Lipid residue name
51 | n_binding_site : int
52 | Number of binding site detected (shown in the csv file).
53 |
54 | """
55 | script_fname = os.path.abspath(fname)
56 | save_dir, _ = os.path.split(script_fname)
57 | pdb_new_fname = os.path.join(save_dir, os.path.basename(pdb_fname))
58 | copyfile(os.path.abspath(pdb_fname), pdb_new_fname)
59 | data_fname = os.path.abspath(data_fname)
60 |
61 | text = """
62 | import numpy as np
63 | import re
64 | import pymol
65 | from pymol import cmd
66 | pymol.finish_launching()
67 |
68 | ########## files to process ##########
69 | csv_file = "{CSV_FILE}"
70 | pdb_file = "{PDB}"
71 |
72 | ########## set the sphere scales to corresponding value ##########
73 | value_to_show = "Residence Time"
74 |
75 | ###### reading data from csv file ##########
76 | num_of_binding_site = {N_BINDING_SITE}
77 |
78 | with open(csv_file, "r") as f:
79 | data_lines = f.readlines()
80 |
81 | column_names = data_lines[0].strip().split(",")
82 | for column_idx, column_name in enumerate(column_names):
83 | if column_name == "Residue":
84 | column_id_residue_list = column_idx
85 | elif column_name == "Residue ID":
86 | column_id_residue_index = column_idx
87 | elif column_name == "Binding Site ID":
88 | column_id_BS = column_idx
89 | elif column_name == value_to_show:
90 | column_id_value_to_show = column_idx
91 |
92 | residue_list = []
93 | residue_rank_set = []
94 | binding_site_identifiers = []
95 | values_to_show = []
96 | for line in data_lines[1:]:
97 | data_list = line.strip().split(",")
98 | residue_list.append(data_list[column_id_residue_list])
99 | residue_rank_set.append(data_list[column_id_residue_index])
100 | binding_site_identifiers.append(float(data_list[column_id_BS]))
101 | values_to_show.append(data_list[column_id_value_to_show])
102 |
103 | ############## read information from pdb file ##########
104 | with open(pdb_file, "r") as f:
105 | pdb_lines = f.readlines()
106 | residue_identifiers = []
107 | for line in pdb_lines:
108 | line_stripped = line.strip()
109 | if line_stripped[:4] == "ATOM":
110 | identifier = (line_stripped[22:26].strip(), line_stripped[17:20].strip(), line_stripped[21].strip())
111 | ## residue index, resname, chain id
112 | if len(residue_identifiers) == 0:
113 | residue_identifiers.append(identifier)
114 | elif identifier != residue_identifiers[-1]:
115 | residue_identifiers.append(identifier)
116 |
117 | ######### set sphere scales ###############
118 | values_to_show = np.array(values_to_show, dtype=float)
119 | MIN = np.percentile(np.unique(values_to_show), 5)
120 | MAX = np.percentile(np.unique(values_to_show), 100)
121 | X = (values_to_show - np.percentile(np.unique(values_to_show), 50))/(MAX - MIN)
122 | SCALES = 1/(0.5 + np.exp(-X * 5))
123 |
124 | ######## some pymol settings #########
125 | cmd.set("retain_order", 1)
126 | cmd.set("cartoon_oval_length", 1.0)
127 | cmd.set("cartoon_oval_width", 0.3)
128 | cmd.set("cartoon_color", "white")
129 | cmd.set("stick_radius", 0.35)
130 |
131 | ##################################
132 | cmd.load(pdb_file, "Prot_{LIPID}")
133 | prefix = "Prot_{LIPID}"
134 | cmd.hide("everything")
135 | cmd.show("cartoon", prefix)
136 | cmd.center(prefix)
137 | cmd.orient(prefix)
138 | colors = np.array([np.random.choice(np.arange(256, dtype=float), size=3) for dummy in range(num_of_binding_site)])
139 | colors /= 255.0
140 | """.format(**{"SAVE_DIR": save_dir, "LIPID": lipid, "N_BINDING_SITE": n_binding_site,
141 | "PDB": pdb_new_fname, "CSV_FILE": data_fname})
142 |
143 | text += r"""
144 | residue_list = np.array(residue_list, dtype=str)
145 | residue_rank_set = np.array(residue_rank_set, dtype=int)
146 | binding_site_identifiers = np.array(binding_site_identifiers, dtype=int)
147 | residue_identifiers = list(residue_identifiers)
148 | for bs_id in np.arange(num_of_binding_site):
149 | cmd.set_color("tmp_{}".format(bs_id), list(colors[bs_id]))
150 | for entry_id in np.where(binding_site_identifiers == bs_id)[0]:
151 | selected_residue = residue_list[entry_id]
152 | selected_residue_rank = residue_rank_set[entry_id]
153 | identifier_from_pdb = residue_identifiers[selected_residue_rank]
154 | if re.findall("[a-zA-Z]+$", selected_residue)[0] != identifier_from_pdb[1]:
155 | raise IndexError(
156 | "The {}th residue in the provided pdb file ({}{}) is different from that in the simulations ({})!".format(
157 | entry_id+1,
158 | identifier_from_pdb[0],
159 | identifier_from_pdb[1],
160 | selected_residue)
161 | )
162 | if identifier_from_pdb[2] != " ":
163 | cmd.select("BSid{}_{}".format(bs_id, selected_residue),
164 | "chain {} and resid {} and (not name C+O+N)".format(identifier_from_pdb[2], identifier_from_pdb[0]))
165 | else:
166 | cmd.select("BSid{}_{}".format(bs_id, selected_residue),
167 | "resid {} and (not name C+O+N)".format(identifier_from_pdb[0]))
168 | cmd.show("spheres", "BSid{}_{}".format(bs_id, selected_residue))
169 | cmd.set("sphere_scale", SCALES[entry_id], selection="BSid{}_{}".format(bs_id, selected_residue))
170 | cmd.color("tmp_{}".format(bs_id), "BSid{}_{}".format(bs_id, selected_residue))
171 | cmd.group("BSid{}".format(bs_id), "BSid{}_*".format(bs_id))
172 |
173 | """
174 | with open(fname, "w") as f:
175 | f.write(text)
176 |
177 | return
--------------------------------------------------------------------------------
/pylipid/util/rmsd.py:
--------------------------------------------------------------------------------
1 | ##############################################################################
2 | # PyLipID: A python module for analysing protein-lipid interactions
3 | #
4 | # Author: Wanling Song
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | ##############################################################################
16 |
17 | import numpy as np
18 |
19 | def rmsd(V, W):
20 | """
21 | Calculate Root-mean-square deviation from two sets of vectors V and W.
22 |
23 | Parameters
24 | ----------
25 | V : array
26 | (N,D) matrix, where N is points and D is dimension.
27 | W : array
28 | (N,D) matrix, where N is points and D is dimension.
29 |
30 | Returns
31 | -------
32 | rmsd : scalar
33 | Root-mean-square deviation between the two vectors
34 |
35 | """
36 | diff = np.array(V) - np.array(W)
37 | N = len(V)
38 | return np.sqrt((diff * diff).sum() / N)
39 |
--------------------------------------------------------------------------------
/pylipid/util/trajectory.py:
--------------------------------------------------------------------------------
1 | ##############################################################################
2 | # PyLipID: A python module for analysing protein-lipid interactions
3 | #
4 | # Author: Wanling Song
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | ##############################################################################
16 |
17 | """This module contains functions that deals with trajectories. """
18 |
19 | from collections import defaultdict
20 | import numpy as np
21 |
22 |
23 | __all__ = ["get_traj_info"]
24 |
25 |
26 | def get_traj_info(traj, lipid=None, lipid_atoms=None, resi_offset=0, nprot=1, protein_ref=None, lipid_ref=None):
27 | """Get trajectory information regarding atom/residue index and topologies.
28 |
29 | Parameters
30 | ----------
31 | traj : mdtraj.Trajectory
32 | A mdtraj.Trajectory object.
33 | lipid : str or None
34 | The residue name of the lipid to check.
35 | lipid_atoms : a list of str; opt
36 | The names of lipid atoms that are used to define lipid interaction and lipid binding sites.
37 | Default is None, that is all the lipid atoms will be used for calculation.
38 | resi_offset : int, optional, default=0
39 | Shift of residue index. The new residue index (i.e. the original index + resi_offset) will
40 | be used in all the generated data.
41 | nprot : int, optional, default=1
42 | Number of protein copies in the systems. If nprot >= 2, the protein copies need to be identical,
43 | and the generated data will be the averages of the copies.
44 | protein_ref : None or mdtraj.Trajectory, optional, default=None
45 | A mdtraj.Trajectory object that stores the topology and coordinates of a copy of the protein structure.
46 | lipid_ref : None or mdtraj.Trajectory, optional, default=None
47 | A mdtraj.Trajectory object that stores the topology and coordinates of a lipid molecule structure.
48 |
49 | Returns
50 | -------
51 | traj_info : dict
52 | A dictionary that contains the topology information of `traj`.
53 | protein_ref : mdtraj.Trajectory
54 | A mdtraj.Trajectory object that stores the topology and coordinates of a copy of the protein structure.
55 | lipid_ref : mdtraj.Trajectory
56 | A mdtraj.Trajectory object that stores the topology and coordinates of a lipid molecule structure.
57 |
58 | """
59 | # get lipid atom indices
60 | if lipid is None:
61 | lipid_residue_atomid_list = []
62 | else:
63 | lipid_index_dict = defaultdict(list)
64 | lipid_atom_indices = traj.top.select("resn {}".format(lipid))
65 | if lipid_atoms is not None:
66 | for atom_index in lipid_atom_indices:
67 | if traj.top.atom(atom_index).name in lipid_atoms:
68 | lipid_index_dict[traj.top.atom(atom_index).residue.index].append(atom_index)
69 | else:
70 | for atom_index in lipid_atom_indices:
71 | lipid_index_dict[traj.top.atom(atom_index).residue.index].append(atom_index)
72 | lipid_residue_atomid_list = [lipid_index_dict[resi] for resi in np.sort(list(lipid_index_dict.keys()))]
73 | if lipid_ref is None:
74 | one_lipid_indices = []
75 | for lipid_id in np.sort(traj.top.select("resn {}".format(lipid))):
76 | if len(one_lipid_indices) == 0:
77 | one_lipid_indices.append(lipid_id)
78 | elif traj.top.atom(lipid_id).residue.index != traj.top.atom(one_lipid_indices[-1]).residue.index:
79 | break
80 | else:
81 | one_lipid_indices.append(lipid_id)
82 | lipid_ref = traj[0].atom_slice(np.unique(one_lipid_indices), inplace=False)
83 | # get protein atom indices
84 | all_protein_atom_indices = traj.top.select("protein")
85 | natoms_per_protein = int(len(all_protein_atom_indices)/nprot)
86 | protein_residue_atomid_list = []
87 | for protein_idx in np.arange(nprot):
88 | chain_index_dict = defaultdict(list)
89 | for atom_index in all_protein_atom_indices[protein_idx*natoms_per_protein:(protein_idx+1)*natoms_per_protein]:
90 | chain_index_dict[traj.top.atom(atom_index).residue.index].append(atom_index)
91 | protein_residue_atomid_list.append([chain_index_dict[resi] for resi in np.sort(list(chain_index_dict.keys()))])
92 | protein_residue_id = np.arange(len(protein_residue_atomid_list[0]), dtype=int)
93 | residue_list = np.array(["{}{}".format(traj.top.atom(residue_atoms[0]).residue.resSeq+resi_offset,
94 | traj.top.atom(residue_atoms[0]).residue.name)
95 | for residue_atoms in protein_residue_atomid_list[0]], dtype=str)
96 | if protein_ref is None:
97 | protein_ref = traj[0].atom_slice(all_protein_atom_indices[:natoms_per_protein], inplace=False)
98 |
99 | traj_info = {"protein_residue_atomid_list": protein_residue_atomid_list,
100 | "lipid_residue_atomid_list": lipid_residue_atomid_list,
101 | "protein_residue_id": protein_residue_id, "residue_list": residue_list}
102 |
103 | return traj_info, protein_ref, lipid_ref
104 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | mdtraj
2 | numpy
3 | pandas
4 | matplotlib>=3.3.3
5 | networkx
6 | scipy
7 | python-louvain
8 | logomaker
9 | statsmodels
10 | scikit-learn
11 | tqdm
12 | p_tqdm
13 | kneebow
14 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # setup.py
2 | from setuptools import setup, find_packages
3 | import pathlib
4 |
5 | here = pathlib.Path(__file__).parent.resolve()
6 |
7 | # Get the long description from the README file
8 | long_description = (here / 'README.rst').read_text(encoding='utf-8')
9 |
10 | # read version info
11 | import re
12 | VERSIONFILE="pylipid/_version.py"
13 | verstrline = open(VERSIONFILE, "rt").read()
14 | VSRE = r"^__version__ = ['\"]([^'\"]*)['\"]"
15 | mo = re.search(VSRE, verstrline, re.M)
16 | if mo:
17 | verstr = mo.group(1)
18 | else:
19 | raise RuntimeError("Unable to find version string in %s." % (VERSIONFILE,))
20 |
21 | # setup
22 | setup(
23 | name='pylipid',
24 | version=verstr,
25 | description='PyLipID - A Python Library For Lipid Interaction Analysis',
26 | long_description=long_description,
27 | long_description_content_type='text/x-rst',
28 | url='https://github.com/wlsong/PyLipID',
29 | author='Wanling Song',
30 | author_email='wanling.song@hotmail.com',
31 | classifiers=[
32 | 'Development Status :: 5 - Production/Stable',
33 | 'Intended Audience :: Science/Research',
34 | 'Topic :: Scientific/Engineering :: Bio-Informatics',
35 | 'Topic :: Scientific/Engineering :: Chemistry',
36 | 'Topic :: Scientific/Engineering :: Mathematics',
37 | 'Topic :: Scientific/Engineering :: Physics',
38 | 'License :: OSI Approved :: MIT License',
39 | 'Programming Language :: Python :: 3.6',
40 | 'Programming Language :: Python :: 3.7',
41 | 'Programming Language :: Python :: 3.9',
42 | ],
43 | keywords='simulation tools, network community, binding site',
44 | python_requires='>=3.6, <4, !=3.8.*',
45 | packages=find_packages(),
46 | install_requires=[
47 | "mdtraj",
48 | "numpy",
49 | "pandas",
50 | "matplotlib>=3.3.3",
51 | "networkx",
52 | "scipy",
53 | "python-louvain",
54 | "logomaker",
55 | "statsmodels",
56 | "scikit-learn",
57 | "tqdm",
58 | "p_tqdm",
59 | "kneebow"
60 | ]
61 | )
62 |
--------------------------------------------------------------------------------
/tests/api/test_LipidInteraction.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import os
3 | import shutil
4 | from pylipid.api import LipidInteraction
5 | from pylipid.util import check_dir
6 |
7 | class TestLipidInteraction(unittest.TestCase):
8 |
9 | def test_pylipid(self):
10 | trajfile_list = ["../data/run1/protein_lipids.xtc", "../data/run2/protein_lipids.xtc"]
11 | topfile_list = ["../data/run1/protein_lipids.gro", "../data/run2/protein_lipids.gro"]
12 | lipid = "CHOL"
13 | cutoffs = [0.55, 0.8]
14 | file_dir = os.path.dirname(os.path.abspath(__file__))
15 | self.save_dir = check_dir(os.path.join(file_dir, "test_pylipid"))
16 | li = LipidInteraction(trajfile_list, cutoffs=cutoffs, topfile_list=topfile_list, lipid=lipid,
17 | nprot=1, save_dir=self.save_dir)
18 | li.collect_residue_contacts()
19 |
20 | li.compute_residue_duration()
21 | li.compute_residue_duration(10)
22 | li.compute_residue_duration([2,3,4])
23 |
24 | li.compute_residue_koff(plot_data=False)
25 | li.compute_residue_koff(10)
26 | li.compute_residue_koff([2,4,5,10])
27 |
28 | li.compute_binding_nodes(threshold=4)
29 | li.compute_binding_nodes(threshold=2, print_data=False)
30 |
31 | li.compute_site_koff(plot_data=False)
32 | li.compute_site_koff(binding_site_id=[1,2,3])
33 | li.compute_site_koff(binding_site_id=1)
34 |
35 | li.compute_site_duration()
36 | li.compute_site_duration(1)
37 | li.compute_site_duration([0,1])
38 |
39 | li.analyze_bound_poses()
40 | li.analyze_bound_poses(binding_site_id=[1,2,3])
41 | li.analyze_bound_poses(binding_site_id=[1,2,3], n_clusters=2)
42 |
43 | li.compute_surface_area()
44 | li.compute_surface_area(binding_site_id=[1,2,3])
45 | li.compute_surface_area(binding_site_id=[1, 2, 3], radii={"BB": 0.30, "SC1": 0.2})
46 |
47 | li.write_site_info()
48 | li.write_site_info(sort_residue="Duration")
49 |
50 | li.show_stats_per_traj()
51 |
52 | li.save_data(item="Dataset")
53 | li.save_data(item="Duration")
54 |
55 | li.save_coordinate(item="Residence Time")
56 | li.save_coordinate(item="Duration")
57 |
58 | li.save_pymol_script(pdb_file="../data/receptor.pdb")
59 |
60 | li.plot(item="Duration")
61 | li.plot(item="Residence Time")
62 |
63 | li.plot_logo(item="Residence Time")
64 | li.plot_logo(item="Lipid Count")
65 |
66 | def tearDown(self):
67 | shutil.rmtree(self.save_dir)
68 |
69 | if __name__ == "__main__":
70 | unittest.main()
71 |
72 |
73 |
74 |
75 |
76 |
--------------------------------------------------------------------------------
/tests/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/tests/data/.DS_Store
--------------------------------------------------------------------------------
/tests/data/run1/protein_lipids.xtc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/tests/data/run1/protein_lipids.xtc
--------------------------------------------------------------------------------
/tests/data/run2/protein_lipids.xtc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/tests/data/run2/protein_lipids.xtc
--------------------------------------------------------------------------------
/tests/funcs/test_binding_site.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import os
3 | import shutil
4 | import numpy as np
5 | from pylipid.func import get_node_list, collect_bound_poses, vectorize_poses, calculate_scores, write_bound_poses
6 | from pylipid.func import calculate_site_surface_area
7 | from pylipid import LipidInteraction
8 |
9 |
10 | class TestBindingSites(unittest.TestCase):
11 |
12 | def setUp(self):
13 | trajfile_list = ["../data/run1/protein_lipids.xtc", "../data/run2/protein_lipids.xtc"]
14 | topfile_list = ["../data/run1/protein_lipids.gro", "../data/run2/protein_lipids.gro"]
15 | lipid = "CHOL"
16 | cutoffs = [0.55, 0.8]
17 | file_dir = os.path.dirname( os.path.abspath(__file__) )
18 | self.save_dir = os.path.join(file_dir, "test_binding_site")
19 | self.li = LipidInteraction(trajfile_list, topfile_list, cutoffs=cutoffs, lipid=lipid,
20 | nprot=1, save_dir=self.save_dir)
21 | self.li.collect_residue_contacts(write_log=False, print_log=True)
22 |
23 | def test_get_node_list(self):
24 | corrcoef = self.li.interaction_corrcoef
25 | node_list = get_node_list(corrcoef)
26 | self.assertIsInstance(node_list, list)
27 |
28 | def test_collect_binding_poses(self):
29 | binding_site_map = {bs_id: nodes for bs_id, nodes in enumerate(self.li._node_list)}
30 | contact_list = self.li.contact_residues_low
31 | pose_pool = collect_bound_poses(binding_site_map, contact_list, self.li.trajfile_list[0],
32 | self.li.topfile_list[0], self.li.lipid, self.li.stride, self.li.nprot)
33 | self.assertIsInstance(pose_pool, dict)
34 |
35 | def tearDown(self):
36 | shutil.rmtree(self.save_dir)
37 |
38 |
39 | class TestBindingPoses(unittest.TestCase):
40 |
41 | def setUp(self):
42 | trajfile_list = ["../data/run1/protein_lipids.xtc", "../data/run2/protein_lipids.xtc"]
43 | topfile_list = ["../data/run1/protein_lipids.gro", "../data/run2/protein_lipids.gro"]
44 | lipid = "CHOL"
45 | cutoffs = [0.55, 0.8]
46 | file_dir = os.path.dirname(os.path.abspath(__file__))
47 | self.save_dir = os.path.join(file_dir, "binding_site")
48 | self.li = LipidInteraction(trajfile_list, topfile_list, cutoffs=cutoffs, lipid=lipid,
49 | nprot=1, save_dir=self.save_dir)
50 | self.li.collect_residue_contacts(write_log=False, print_log=True)
51 | node_list = self.li.compute_binding_nodes(print_data=False)
52 | binding_site_map = {bs_id: nodes for bs_id, nodes in enumerate(node_list)}
53 | contact_residue_dict = self.li.contact_residues_low
54 | self.pose_pool = collect_bound_poses(binding_site_map, contact_residue_dict, self.li.trajfile_list[0],
55 | self.li.topfile_list[0], self.li.lipid, self.li.stride, self.li.nprot)
56 |
57 | def test_vectorize_poses(self):
58 | for bs_id, nodes in enumerate(self.li._node_list):
59 | dist_matrix, pose_traj = vectorize_poses(self.pose_pool[bs_id], nodes, self.li._protein_ref, self.li._lipid_ref)
60 | self.assertEqual(dist_matrix.shape[0], self.li._lipid_ref.n_atoms)
61 | self.assertEqual(dist_matrix.shape[1], len(self.pose_pool[bs_id]))
62 | self.assertEqual(dist_matrix.shape[2], len(self.li._node_list[bs_id]))
63 | return
64 |
65 | def test_calculate_scores(self):
66 | for bs_id, nodes in enumerate(self.li._node_list):
67 | dist_matrix, pose_traj = vectorize_poses(self.pose_pool[bs_id], nodes, self.li._protein_ref, self.li._lipid_ref)
68 | scores = calculate_scores(dist_matrix)
69 | self.assertEqual(len(scores), pose_traj.n_frames)
70 | scores = calculate_scores(dist_matrix, score_weights={"RHO": 10})
71 | self.assertEqual(len(scores), pose_traj.n_frames)
72 |
73 |
74 | def test_write_binding_poses(self):
75 | for bs_id, nodes in enumerate(self.li._node_list):
76 | dist_matrix, pose_traj = vectorize_poses(self.pose_pool[bs_id], nodes, self.li._protein_ref, self.li._lipid_ref)
77 | scores = calculate_scores(dist_matrix)
78 | num_of_poses = min(5, pose_traj.n_frames)
79 | pose_indices = np.argsort(scores)[::-1][:num_of_poses]
80 | write_bound_poses(pose_traj, pose_indices, self.save_dir, pose_prefix="BSid{}_top".format(bs_id),
81 | pose_format="gro")
82 |
83 |
84 | def test_calculate_site_surface_area(self):
85 | binding_site_map = {bs_id: nodes for bs_id, nodes in enumerate(self.li._node_list)}
86 | radii_book = {"BB": 0.36, "SC1": 0.33, "SC2": 0.33, "SC3": 0.33}
87 | surface_area= calculate_site_surface_area(binding_site_map, radii_book, self.li.trajfile_list,
88 | self.li.topfile_list, self.li.nprot, self.li.timeunit,
89 | self.li.stride, self.li.dt_traj)
90 |
91 | def tearDown(self):
92 | shutil.rmtree(self.save_dir)
93 |
94 |
95 | if __name__ == "__main__":
96 | unittest.main()
97 |
--------------------------------------------------------------------------------
/tests/funcs/test_clusterer.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import os
3 | import shutil
4 | import numpy as np
5 | from sklearn.decomposition import PCA
6 | from pylipid.func import collect_bound_poses, vectorize_poses, write_bound_poses
7 | from pylipid.func import cluster_DBSCAN, cluster_KMeans
8 | from pylipid import LipidInteraction
9 |
10 | class TestCluster(unittest.TestCase):
11 |
12 | def setUp(self):
13 | trajfile_list = ["../data/run1/protein_lipids.xtc", "../data/run2/protein_lipids.xtc"]
14 | topfile_list = ["../data/run1/protein_lipids.gro", "../data/run2/protein_lipids.gro"]
15 | lipid = "CHOL"
16 | cutoffs = [0.55, 0.8]
17 | file_dir = os.path.dirname(os.path.abspath(__file__))
18 | self.save_dir = os.path.join(file_dir, "binding_site")
19 | self.li = LipidInteraction(trajfile_list, topfile_list, cutoffs=cutoffs, lipid=lipid,
20 | nprot=1, save_dir=self.save_dir)
21 | self.li.collect_residue_contacts(write_log=False, print_log=True)
22 | node_list = self.li.compute_binding_nodes(print_data=False)
23 | binding_site_map = {bs_id: nodes for bs_id, nodes in enumerate(node_list)}
24 | contact_residue_dict = self.li.contact_residues_low
25 | self.pose_pool = collect_bound_poses(binding_site_map, contact_residue_dict, self.li.trajfile_list[0],
26 | self.li.topfile_list[0], self.li.lipid, self.li.stride, self.li.nprot)
27 |
28 | def test_cluster_DBSCAN(self):
29 | for bs_id, nodes in enumerate(self.li._node_list):
30 | dist_matrix, pose_traj = vectorize_poses(self.pose_pool[bs_id], nodes, self.li._protein_ref,
31 | self.li._lipid_ref)
32 | lipid_dist_per_pose = [dist_matrix[:, pose_id, :].ravel()
33 | for pose_id in np.arange(dist_matrix.shape[1])]
34 | transformed_data = PCA(n_components=0.95).fit_transform(lipid_dist_per_pose)
35 | cluster_labels = cluster_DBSCAN(transformed_data, eps=None, min_samples=None,
36 | metric="euclidean")
37 | self.assertEqual(len(cluster_labels), len(lipid_dist_per_pose))
38 | cluster_id_set = [label for label in np.unique(cluster_labels) if label != -1]
39 | selected_pose_id = [np.random.choice(np.where(cluster_labels == cluster_id)[0], 1)[0]
40 | for cluster_id in cluster_id_set]
41 | write_bound_poses(pose_traj, selected_pose_id, self.save_dir,
42 | pose_prefix="BSid{}_cluster_DBSCAN".format(bs_id), pose_format="gro")
43 |
44 | def test_cluster_KMeans(self):
45 | for bs_id, nodes in enumerate(self.li._node_list):
46 | dist_matrix, pose_traj = vectorize_poses(self.pose_pool[bs_id], nodes, self.li._protein_ref,
47 | self.li._lipid_ref)
48 | lipid_dist_per_pose = [dist_matrix[:, pose_id, :].ravel()
49 | for pose_id in np.arange(dist_matrix.shape[1])]
50 | transformed_data = PCA(n_components=0.95).fit_transform(lipid_dist_per_pose)
51 | cluster_labels = cluster_KMeans(transformed_data, n_clusters=5)
52 | self.assertEqual(len(cluster_labels), len(lipid_dist_per_pose))
53 | cluster_id_set = [label for label in np.unique(cluster_labels) if label != -1]
54 | selected_pose_id = [np.random.choice(np.where(cluster_labels == cluster_id)[0], 1)[0]
55 | for cluster_id in cluster_id_set]
56 | write_bound_poses(pose_traj, selected_pose_id, self.save_dir,
57 | pose_prefix="BSid{}_cluster_KMeans".format(bs_id), pose_format="gro")
58 |
59 | def tearDown(self):
60 | shutil.rmtree(self.save_dir)
61 |
62 | if __name__ == "__main__":
63 | unittest.main()
64 |
--------------------------------------------------------------------------------
/tests/funcs/test_interactions.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import numpy as np
3 | from pylipid.func import cal_contact_residues, Duration, cal_occupancy, cal_lipidcount
4 |
5 | class TestCutoff(unittest.TestCase):
6 |
7 | def test_cal_contact_residues(self):
8 | dr0 = [0.9, 0.95, 1.2, 1.1, 1.0, 0.9]
9 | dr1 = [0.95, 0.9, 0.95, 1.1, 1.2, 1.1]
10 | dr2 = [0.90, 0.90, 0.85, 0.95, 1.0, 1.1]
11 | dist_matrix = np.array([dr0, dr1, dr2])
12 | contact_list, frame_id_set, residue_id_set = cal_contact_residues(dist_matrix, 1.0)
13 | self.assertEqual(contact_list, [[0, 1, 2], [0, 1, 2], [1, 2], [2], [0, 2], [0]])
14 | self.assertListEqual(list(frame_id_set), [0, 1, 4, 5, 0, 1, 2, 0, 1, 2, 3, 4])
15 | self.assertListEqual(list(residue_id_set), [0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2])
16 |
17 | def test_Duration(self):
18 | contact_low = [ np.array([0,1,2]), np.array([2,3]), np.array([2,3]), np.array([1,3]), np.array([]), np.array([])]
19 | contact_high = [np.array([0,1,2,3]), np.array([0,1,2,3]), np.array([1,2,3]), np.array([1,3]), np.array([1,3]), np.array([])]
20 | durations = Duration(contact_low, contact_high, 2).cal_durations()
21 | self.assertEqual(durations, [4, 6, 8, 10])
22 |
23 | def test_cal_occupancy(self):
24 | contact_list = [[0], [0, 1], [1, 2], [0, 2], [], [], [1], [0], [], []]
25 | occupancy = cal_occupancy(contact_list)
26 | self.assertEqual(occupancy, 60)
27 |
28 | def test_cal_lipid_count(self):
29 | contact_list = [[0], [0, 1], [1, 2], [0, 2], [], [], [1], [0], [], []]
30 | lipidcount = cal_lipidcount(contact_list)
31 | self.assertEqual(lipidcount, 1.5)
32 |
33 |
34 | if __name__ == "__main__":
35 | unittest.main()
36 |
--------------------------------------------------------------------------------
/tests/funcs/test_kinetics.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import os
3 | import shutil
4 | import numpy as np
5 | from pylipid.func import cal_koff, cal_survival_func
6 | from pylipid import LipidInteraction
7 |
8 | class TestKinetics(unittest.TestCase):
9 |
10 | def setUp(self):
11 | trajfile_list = ["../data/run1/protein_lipids.xtc", "../data/run2/protein_lipids.xtc"]
12 | topfile_list = ["../data/run1/protein_lipids.gro", "../data/run2/protein_lipids.gro"]
13 | lipid = "CHOL"
14 | cutoffs = [0.55, 1.0]
15 | file_dir = os.path.dirname(os.path.abspath(__file__))
16 | self.save_dir = os.path.join(file_dir, "test_kinetics")
17 | self.li = LipidInteraction(trajfile_list, topfile_list, cutoffs=cutoffs, lipid=lipid,
18 | nprot=1, save_dir=self.save_dir)
19 | self.li.collect_residue_contacts()
20 | self.t_total = np.max(self.li._T_total)
21 | self.timestep = np.min(self.li._timesteps)
22 |
23 | def test_cal_survivla_function(self):
24 | delta_t_list = np.arange(0, self.t_total, self.timestep)
25 | survival_func = cal_survival_func(np.concatenate(self.li.durations[25]), self.t_total, delta_t_list)
26 | self.assertIsInstance(survival_func, dict)
27 |
28 | def test_cal_koff(self):
29 | koff, restime, properties = cal_koff(np.concatenate(self.li.durations[25]), self.t_total,
30 | self.timestep, nbootstrap=20, initial_guess=[1,1,1,1])
31 | print(koff)
32 | print(restime)
33 | print(properties)
34 | self.assertIsInstance(properties, dict)
35 |
36 | def tearDown(self):
37 | shutil.rmtree(self.li.save_dir)
38 |
39 |
40 | if __name__ == "__main__":
41 | unittest.main()
42 |
--------------------------------------------------------------------------------
/tests/plots/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/tests/plots/.DS_Store
--------------------------------------------------------------------------------
/tests/plots/test_koff.py:
--------------------------------------------------------------------------------
1 | import os
2 | import unittest
3 | import numpy as np
4 | import shutil
5 | from pylipid.plot import plot_koff
6 | from pylipid.func import cal_koff
7 | from pylipid.util import check_dir
8 |
9 | class TestPlot(unittest.TestCase):
10 |
11 | def setUp(self):
12 | file_dir = os.path.dirname(os.path.abspath(__file__))
13 | self.save_dir = os.path.join(file_dir, "test_plot")
14 | check_dir(self.save_dir)
15 |
16 | def test_koff(self):
17 | t_total = 150
18 | timestep = 1
19 | durations = np.random.normal(loc=50, scale=15, size=400)
20 | koff, restime, properties = cal_koff(durations, t_total, timestep, nbootstrap=10,
21 | initial_guess=[1., 1., 1., 1.], cap=True)
22 |
23 | plot_koff(durations, properties["delta_t_list"], properties["survival_rates"],
24 | properties["n_fitted"], survival_rates_bootstraps=properties["survival_rates_boot_set"],
25 | fig_fn=os.path.join(self.save_dir, "test_koff_plot.pdf"), title="test koff",
26 | timeunit="ns", t_total=t_total, text=None)
27 |
28 | # set the text printed on the right
29 | tu = "ns"
30 | text = "{:18s} = {:.3f} {:2s}$^{{-1}} $\n".format("$k_{{off1}}$", properties["ks"][0], tu)
31 | text += "{:18s} = {:.3f} {:2s}$^{{-1}} $\n".format("$k_{{off2}}$", properties["ks"][1], tu)
32 | text += "{:14s} = {:.4f}\n".format("$R^2$", properties["r_squared"])
33 | ks_boot_avg = np.mean(properties["ks_boot_set"], axis=0)
34 | cv_avg = 100 * np.std(properties["ks_boot_set"], axis=0) / np.mean(properties["ks_boot_set"], axis=0)
35 | text += "{:18s} = {:.3f} {:2s}$^{{-1}}$ ({:3.1f}%)\n".format("$k_{{off1, boot}}$",
36 | ks_boot_avg[0], tu, cv_avg[0])
37 | text += "{:18s} = {:.3f} {:2s}$^{{-1}}$ ({:3.1f}%)\n".format("$k_{{off2, boot}}$",
38 | ks_boot_avg[1], tu, cv_avg[1])
39 | text += "{:18s} = {:.3f} {:2s}".format("$Res. Time$", properties["res_time"], tu)
40 |
41 | plot_koff(durations, properties["delta_t_list"], properties["survival_rates"],
42 | properties["n_fitted"], survival_rates_bootstraps=properties["survival_rates_boot_set"],
43 | fig_fn=os.path.join(self.save_dir, "test_koff_plot_withText.pdf"), title="test koff",
44 | timeunit="ns", t_total=t_total, text=text)
45 |
46 |
47 | def tearDown(self):
48 | shutil.rmtree(self.save_dir)
49 |
50 | if __name__ == "__main__":
51 | unittest.main()
52 |
53 |
--------------------------------------------------------------------------------
/tests/plots/test_plot_1d.py:
--------------------------------------------------------------------------------
1 | import os
2 | import unittest
3 | import numpy as np
4 | import shutil
5 | import pandas as pd
6 | from pylipid.util import check_dir
7 | from pylipid.plot import plot_residue_data, plot_residue_data_logos
8 | from pylipid.plot import plot_binding_site_data, plot_surface_area
9 |
10 | class TestPlot1d(unittest.TestCase):
11 |
12 | def setUp(self):
13 | file_dir = os.path.dirname(os.path.abspath(__file__))
14 | self.save_dir = os.path.join(file_dir, "test_plot1d")
15 | check_dir(self.save_dir)
16 |
17 | def test_plot_residue_data(self):
18 | letters_base = [chr(num) for num in np.arange(65, 65 + 26)]
19 | # basic
20 | residue_index = np.arange(120) + 34
21 | interactions = np.random.random(size=120) * 150
22 | plot_residue_data(residue_index, interactions, gap=20,
23 | ylabel=None, fn=os.path.join(self.save_dir, "plot_residue_data_1.pdf"),
24 | title="This is a test")
25 | logos = np.random.choice(letters_base, size=120)
26 | plot_residue_data_logos(residue_index, logos, interactions, gap=100, letter_map=None,
27 | color_scheme="chemistry", ylabel="interactions",
28 | fn=os.path.join(self.save_dir, "plot_residue_data_logo_1.pdf"))
29 |
30 | # gap in-between sequence
31 | residue_index = np.concatenate([np.arange(120), np.arange(138, 183), np.arange(355, 382)])
32 | interactions = np.random.random(size=(120+(183-138)+(312-255)))
33 | plot_residue_data(residue_index, interactions, gap=50,
34 | ylabel=None, fn=os.path.join(self.save_dir, "plot_residue_data_2.pdf"),
35 | title="This is a test")
36 | logos = np.random.choice(letters_base, size=len(interactions))
37 | plot_residue_data_logos(residue_index, logos, interactions, gap=100,
38 | ylabel=None, fn=os.path.join(self.save_dir, "plot_residue_data_logo_2.pdf"))
39 |
40 | # two chains in sequences.
41 | residue_index = np.concatenate([np.arange(150)+24, np.arange(10, 120)])
42 | interactions = np.random.random(size=(150+(120-10)))
43 | plot_residue_data(residue_index, interactions, gap=20,
44 | ylabel=None, fn=os.path.join(self.save_dir, "plot_residue_data_3.pdf"),
45 | title="This is a test")
46 | logos = np.random.choice(letters_base, size=len(interactions))
47 | plot_residue_data_logos(residue_index, logos, interactions, gap=100,
48 | ylabel=None, fn=os.path.join(self.save_dir, "plot_residue_data_logo_3.pdf"))
49 |
50 | # check letter mapping
51 | letter_map = {"ABC": "A", "BCD": "B", "CEF": "C", "DEF": "D", "EFG": "E", "FGH": "F"}
52 | three_letter_seq = np.random.choice(list(letter_map.keys()), size=23)
53 | residue_index = np.arange(23) + 52
54 | interactions = np.random.random(size=23)
55 | plot_residue_data_logos(residue_index, three_letter_seq, interactions, gap=100,
56 | ylabel=None, fn=os.path.join(self.save_dir, "plot_residue_data_logo_4.pdf"),
57 | letter_map=letter_map)
58 |
59 | def test_plot_binding_site_data(self):
60 | toy_dataset = {}
61 | for bs_id, length in zip(np.arange(12), [40,80,120,200]*3):
62 | toy_dataset[f"Binding Site {bs_id}"] = np.random.normal(loc=0, size=length)
63 | data_processed = pd.DataFrame(
64 | dict([(bs_label, pd.Series(data)) for bs_label, data in toy_dataset.items()])
65 | )
66 | plot_binding_site_data(data_processed, os.path.join(self.save_dir, "binding_site_data.pdf"),
67 | title="Binding Site data", ylabel="RMSD (nm)")
68 |
69 | def test_plot_surface_area(self):
70 | full_set = []
71 | for dummy in range(4):
72 | toy_dataset = {f"Binding Site {bs_id}": np.random.normal(loc=0, size=20)
73 | for bs_id in np.arange(12)}
74 | toy_dataset["Time"] = np.arange(20) * 0.1
75 | full_set.append(pd.DataFrame(toy_dataset))
76 | full_set_dataframe = pd.concat(full_set, keys=[(0,0), (0,1), (1,0), (1,1)])
77 | plot_surface_area(full_set_dataframe, os.path.join(self.save_dir, "surface_area_data.pdf"), timeunit=None)
78 |
79 | def tearDown(self):
80 | shutil.rmtree(self.save_dir)
81 |
82 |
83 | if __name__ == "__main__":
84 | unittest.main()
85 |
86 |
--------------------------------------------------------------------------------
/tests/plots/test_plot_2d.py:
--------------------------------------------------------------------------------
1 | import os
2 | import unittest
3 | import numpy as np
4 | import shutil
5 | from pylipid.util import check_dir
6 | from pylipid.plot import plot_corrcoef
7 |
8 |
9 | class TestPlot2d(unittest.TestCase):
10 |
11 | def setUp(self):
12 | file_dir = os.path.dirname(os.path.abspath(__file__))
13 | self.save_dir = os.path.join(file_dir, "test_plot1d")
14 | check_dir(self.save_dir)
15 |
16 | def test_plot_corrcoef(self):
17 | corrcoef = np.random.normal(size=(250,250))
18 | residue_index = np.arange(250) + 134
19 | plot_corrcoef(corrcoef, residue_index, cmap="coolwarm",
20 | fn=os.path.join(self.save_dir, "corrcoef_plot_1.pdf"), title="This is a test")
21 |
22 | # gap in sequences
23 | residue_index = np.concatenate([np.arange(250) + 134, np.arange(500, 899), np.arange(100, 400)])
24 | corrcoef = np.random.normal(size=(len(residue_index), len(residue_index)))
25 | plot_corrcoef(corrcoef, residue_index, cmap="coolwarm",
26 | fn=os.path.join(self.save_dir, "corrcoef_plot_2.pdf"), title="This is a test")
27 |
28 | def tearDown(self):
29 | shutil.rmtree(self.save_dir)
30 |
31 | if __name__ == "__main__":
32 | unittest.main()
33 |
34 |
--------------------------------------------------------------------------------
/tests/util/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wlsong/PyLipID/4dc250d3bd5dbe7aff12594474c7e73916fabcfa/tests/util/.DS_Store
--------------------------------------------------------------------------------
/tests/util/test_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import unittest
3 | import numpy as np
4 | import shutil
5 | import mdtraj as md
6 | from pylipid.util import write_pymol_script
7 | from pylipid.util import check_dir
8 | from pylipid.util import rmsd
9 | from pylipid.util import sparse_corrcoef
10 | from pylipid.util import get_traj_info
11 |
12 | class TestScript(unittest.TestCase):
13 |
14 | def setUp(self):
15 | file_dir = os.path.dirname(os.path.abspath(__file__))
16 | self.save_dir = os.path.join(file_dir, "test_util")
17 | check_dir(self.save_dir)
18 |
19 | def test_write_pymol_script(self):
20 | write_pymol_script(os.path.join(self.save_dir, "show_bs_info.py"),
21 | "../data/receptor.pdb", "../data/Interactions_CHOL.csv", "CHOL", 10)
22 |
23 | def test_rmsd(self):
24 | matrix_a = np.random.random(size=(100, 5))
25 | matrix_b = np.random.random(size=(100, 5))
26 | value = rmsd(matrix_a, matrix_b)
27 | self.assertIsInstance(value, float)
28 |
29 | def test_sparse_corrcoef(self):
30 | A = np.random.normal(size=(4, 500))
31 | corrcoefs = sparse_corrcoef(A)
32 | self.assertEqual(len(corrcoefs), len(A))
33 |
34 | def test_get_traj_info(self):
35 | trajfile = "../data/run1/protein_lipids.xtc"
36 | topfile = "../data/run1/protein_lipids.gro"
37 | traj = md.load(trajfile, top=topfile)
38 | traj_info, protein_ref, lipid_ref = get_traj_info(traj, "CHOL")
39 | self.assertIsInstance(protein_ref, md.Trajectory)
40 | self.assertIsInstance(lipid_ref, md.Trajectory)
41 |
42 | def tearDown(self):
43 | shutil.rmtree(self.save_dir)
44 |
45 |
46 | if __name__ == "__main__":
47 | unittest.main()
48 |
--------------------------------------------------------------------------------