├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .travis.yml ├── INSTALL.rst ├── LICENSE.rst ├── MANIFEST.in ├── README.rst ├── csb ├── __init__.py ├── apps │ ├── __init__.py │ ├── bfit.py │ ├── bfite.py │ ├── buildhmm.py │ ├── csfrag.py │ ├── embd.py │ ├── helloworld.py │ ├── hhfrag.py │ ├── hhsearch.py │ ├── precision.py │ └── promix.py ├── bio │ ├── __init__.py │ ├── fragments │ │ ├── __init__.py │ │ └── rosetta.py │ ├── hmm │ │ ├── __init__.py │ │ └── pseudocounts.py │ ├── io │ │ ├── __init__.py │ │ ├── clans.py │ │ ├── cs.py │ │ ├── dssp.py │ │ ├── fasta.py │ │ ├── hhpred.py │ │ ├── mrc.py │ │ ├── noe.py │ │ ├── procheck.py │ │ ├── psipred.py │ │ ├── svg.py │ │ ├── vasco.py │ │ ├── whatif.py │ │ └── wwpdb.py │ ├── nmr │ │ ├── __init__.py │ │ └── resources │ │ │ ├── AtomConnectivity.xml │ │ │ ├── RandomCoil.Corrections.tsv │ │ │ └── RandomCoil.Reference.tsv │ ├── sequence │ │ ├── __init__.py │ │ └── alignment.py │ ├── structure │ │ └── __init__.py │ └── utils │ │ └── __init__.py ├── build.py ├── core │ └── __init__.py ├── io │ ├── __init__.py │ ├── plots.py │ └── tsv.py ├── numeric │ ├── __init__.py │ └── integrators.py ├── statistics │ ├── __init__.py │ ├── ars.py │ ├── maxent.py │ ├── mixtures.py │ ├── pdf │ │ ├── __init__.py │ │ └── parameterized.py │ ├── rand.py │ ├── samplers │ │ ├── __init__.py │ │ └── mc │ │ │ ├── __init__.py │ │ │ ├── multichain.py │ │ │ ├── neqsteppropagator.py │ │ │ ├── propagators.py │ │ │ └── singlechain.py │ └── scalemixture.py ├── statmech │ ├── __init__.py │ ├── ensembles.py │ └── wham.py └── test │ ├── __init__.py │ ├── app.py │ ├── cases │ ├── __init__.py │ ├── bio │ │ ├── __init__.py │ │ ├── fragments │ │ │ └── __init__.py │ │ ├── hmm │ │ │ └── __init__.py │ │ ├── io │ │ │ ├── __init__.py │ │ │ ├── clans │ │ │ │ └── __init__.py │ │ │ ├── cs │ │ │ │ └── __init__.py │ │ │ ├── fasta │ │ │ │ └── __init__.py │ │ │ ├── hhpred │ │ │ │ └── __init__.py │ │ │ ├── mrc │ │ │ │ └── __init__.py │ │ │ ├── noe │ │ │ │ └── __init__.py │ │ │ ├── procheck │ │ │ │ └── __init__.py │ │ │ ├── whatif │ │ │ │ └── __init__.py │ │ │ └── wwpdb │ │ │ │ └── __init__.py │ │ ├── nmr │ │ │ └── __init__.py │ │ ├── sequence │ │ │ ├── __init__.py │ │ │ └── alignment │ │ │ │ └── __init__.py │ │ ├── structure │ │ │ └── __init__.py │ │ └── utils │ │ │ └── __init__.py │ ├── core │ │ └── __init__.py │ ├── io │ │ └── __init__.py │ ├── numeric │ │ ├── __init__.py │ │ └── integrators.py │ ├── statistics │ │ ├── __init__.py │ │ ├── ars.py │ │ ├── maxent.py │ │ ├── mixtures.py │ │ ├── pdf │ │ │ ├── __init__.py │ │ │ └── parameterized.py │ │ ├── rand.py │ │ ├── samplers │ │ │ └── __init__.py │ │ └── scalemixture.py │ └── statmech │ │ ├── __init__.py │ │ ├── ensembles.py │ │ └── wham.py │ └── data │ ├── 1C3W_10.mrc │ ├── 1d3z.legacy.pdb │ ├── 1d3z.regular.pdb │ ├── 1nz9.full.pickle │ ├── 1nz9.model1.pickle │ ├── 1nz9.pdb │ ├── 1nz9A.frags │ ├── 2JZC.sum │ ├── 2l01.v2.str │ ├── 2l01.v3.str │ ├── 3p1u.pdb │ ├── 3shm_ca.pdb │ ├── ISL5.1.isl │ ├── Sparky.peaks │ ├── Xeasy1.peaks │ ├── Xeasy2.peaks │ ├── ake-xray-ensemble-ca.pdb │ ├── csb.tsv │ ├── d1b24a2.hhm │ ├── d1ea0a1.hhr │ ├── d1nz0a_.a3m │ ├── d1nz0a_.hhm │ ├── d1nz0a_.mfasta │ ├── d1nz0a_.pdb │ ├── mapping.pdb │ ├── mapping2.pdb │ ├── mapping3.pdb │ ├── maxent.pickle │ ├── modified.pdb │ ├── modified2.pdb │ ├── out.clans │ ├── pdbout.txt │ ├── standard.tsv │ ├── struct.ali.mfasta │ ├── test.fa │ ├── test.hhm │ └── test2.hhm ├── epydoc.css ├── requirements.txt └── setup.py /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ${{ matrix.os }} 8 | strategy: 9 | matrix: 10 | include: 11 | - os: ubuntu-20.04 12 | python-version: "3.6" 13 | - os: ubuntu-22.04 14 | python-version: "3.12" 15 | 16 | steps: 17 | - uses: actions/checkout@v4 18 | - uses: actions/setup-python@v5 19 | with: 20 | python-version: ${{ matrix.python-version }} 21 | - run: pip install --editable .[dev] 22 | - run: python csb/build.py -o . 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # PyCharm 2 | .idea/ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | .spyproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | 98 | # mkdocs documentation 99 | /site 100 | 101 | # mypy 102 | .mypy_cache/ 103 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | dist: trusty 3 | python: 4 | - '2.7' 5 | - '3.6' 6 | install: 7 | - pip install --upgrade pip 8 | - pip install --editable .[dev] 9 | script: 10 | - python csb/build.py -o . 11 | deploy: 12 | provider: releases 13 | skip_cleanup: true 14 | api_key: 15 | secure: QE+OlJEfZm/u2SnFU40uRwRFrQrbevCRn1PyFGvKUze5JmUVBCJpWG5QJRIvCvkW5A34YF9kK802lWAHOnSk5luTh8N84gGEI+/vgX/cEE5DyNf3lCkoXJXCrcVIUcVCWnZ3S17gyWQsdFmUniAyrHGkOsU/HbkwwolCIAahmEKlP3vR2HGzHtnW2Q3XAoDNY/6BhxFlKdcmrstiWyzrh8mVFyF94/hBdGD0FEsUPye4vTN5z1iaLCtjiO2jzBsB3g3wvvPeRu/R/dxMVyBX3CG7hwKIVNH8vL/O8RE0wHHG0M3f77qw7I4GTg9xd6ODI3sjPgeEP3l4l2CK88XEAXamNo8xxfvUOGCTqxxUID3dU1ueut28vC/3iRCOqTJSzClFPfFAkDhZ1ReaTSxRuGpM86dQkQ9AVMWFc8lTzI1DdZlrCyStFySwYRGdZmPeq8o6tJDHel7OWObVh2UXBhQHjORgDskpxDtWuohS18A4NoWPC6j3Ct3PUPbCnSOzwXQCpHGE2ufGhHEAWNUgJHPK9yNCGRsaGcUnTlA/uU/Q02X7dEq1u2SdNs4f8FMEkOS/4DlCuHWvnmUvs6Zu4d7zk2+wpKb4cY5JM5eWTMH82r576Id8HH33nrOwZThJw5CJSrijx8flWMDfX0qPsbHCP91OXH2zDvD42/IamCA= 16 | file_glob: true 17 | file: "csb-*.*" 18 | on: 19 | tags: true 20 | python: "2.7" 21 | -------------------------------------------------------------------------------- /INSTALL.rst: -------------------------------------------------------------------------------- 1 | Detailed installation instructions can be found here: 2 | 3 | http://csb-toolbox.github.io/installation 4 | -------------------------------------------------------------------------------- /LICENSE.rst: -------------------------------------------------------------------------------- 1 | MIT 2 | Copyright (c) 2012 Michael Habeck 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 5 | 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | 8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include csb/bio/nmr/resources *.* 2 | recursive-include csb/test/data *.* 3 | recursive-include docs *.* 4 | include README.rst INSTALL.rst LICENSE.rst 5 | include requirements.txt 6 | 7 | recursive-exclude csb/test/data *.pickle -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Computational Structural Biology Toolbox 2 | ======================================== 3 | 4 | CSB is a python library and application framework, which can be used 5 | to solve problems in the field of structural bioinformatics. If 6 | you are a bioinformatician, software engineer or a researcher working 7 | in this field, chances are you may find something useful here. Our 8 | package consists of a few major components: 9 | 10 | 1. Core class library - object-oriented, granular, with an emphasis 11 | on design and clean interfaces. 12 | 13 | 2. Application framework - console applications ("protocols"), 14 | which consume objects from the core library in order to build 15 | something executable (and hopefully useful). 16 | 17 | 3. Test framework - ensures that the library *actually* works. 18 | 19 | 20 | Installation 21 | ------------ 22 | CSB is being developed on Linux. However, compatibility 23 | is a design goal and the package works on any platform, on any modern Python 24 | interpreter. If you find any issues on a platform/interpreter different from 25 | our development environment, please let us know. 26 | 27 | CSB and all of its dependencies can be installed with pip:: 28 | 29 | $ pip install csb 30 | 31 | See http://csb-toolbox.github.io/installation for more details. 32 | 33 | 34 | Running CSB Applications 35 | ------------------------ 36 | 37 | CSB is bundled with a number of executable console csb.apps. Each app 38 | provides a standard command line interface. To run any app, try:: 39 | 40 | $ csb-app --help 41 | 42 | where *csb-app* is the name of the application, such as ``csb-hhfrag``. 43 | For more details on our app framework, including guidelines for writing new 44 | applications, please refer to the API documentation, package "csb.apps". 45 | 46 | 47 | Documentation 48 | ------------- 49 | 50 | The project's web site at `GitHub `_ contains 51 | online documentation and samples. Visit us at: 52 | 53 | http://csb-toolbox.github.io 54 | 55 | Detailed API documentation can be found in the "docs/api" directory in the 56 | distribution package (docs/api/index.html). This documentaiton is also hosted 57 | on our web site: 58 | 59 | https://csb-toolbox.github.io/api-docs/ 60 | 61 | Many packages contain introductory module level documentation and samples/tutorials. 62 | These are also available in the HTML docs, but a quick way to access them is by using 63 | the built-in python help system. For example, for a general introduction 64 | see the module documentation of the root package:: 65 | 66 | $ python -c "import csb; help(csb)" 67 | 68 | If you are interested in a specific package, such as cs.bio.sequence, 69 | try:: 70 | 71 | $ python -c "import csb.bio.sequence; help(csb.bio.sequence)" 72 | 73 | 74 | Contact 75 | ------- 76 | 77 | CSB is developed by Michael Habeck's Computational Structural Biology 78 | `research group `_. 79 | 80 | For complete source code, contributions, support or bug reports please visit 81 | us on GitHub: 82 | 83 | http://github.com/csb-toolbox/ 84 | 85 | 86 | License 87 | ------- 88 | 89 | CSB is open source and distributed under OSI-approved MIT license. 90 | :: 91 | 92 | Copyright (c) 2012 Michael Habeck 93 | 94 | Permission is hereby granted, free of charge, to any person obtaining 95 | a copy of this software and associated documentation files (the 96 | "Software"), to deal in the Software without restriction, including 97 | without limitation the rights to use, copy, modify, merge, publish, 98 | distribute, sublicense, and/or sell copies of the Software, and to 99 | permit persons to whom the Software is furnished to do so, subject to 100 | the following conditions: 101 | 102 | The above copyright notice and this permission notice shall be 103 | included in all copies or substantial portions of the Software. 104 | 105 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 106 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 107 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 108 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 109 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 110 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 111 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 112 | 113 | ------------ 114 | 115 | .. image:: https://github.com/csb-toolbox/CSB/workflows/CI/badge.svg 116 |   :target: https://github.com/csb-toolbox/CSB/actions 117 | 118 | -------------------------------------------------------------------------------- /csb/apps/bfit.py: -------------------------------------------------------------------------------- 1 | """ 2 | Python application for robust structure superposition of two structures. 3 | bfit models non-rigid displacements in protein ensembles with outlier-tolerant 4 | probability distributions. 5 | """ 6 | import numpy 7 | 8 | import csb.apps 9 | import csb.bio.utils 10 | 11 | from csb.bio.io.wwpdb import LegacyStructureParser 12 | from csb.bio.sequence import SequenceAlignment 13 | 14 | 15 | class ExitCodes(csb.apps.ExitCodes): 16 | IO_ERROR = 2 17 | INPUT_ERROR = 3 18 | 19 | class AppRunner(csb.apps.AppRunner): 20 | 21 | @property 22 | def target(self): 23 | return BFitApp 24 | 25 | def command_line(self): 26 | 27 | cmd = csb.apps.ArgHandler(self.program, __doc__) 28 | 29 | # Input structures 30 | cmd.add_positional_argument('pdb1', str, 31 | 'full path to the first structure') 32 | 33 | cmd.add_positional_argument('pdb2', str, 34 | 'full path to the second structure') 35 | 36 | # Optional arguments 37 | cmd.add_scalar_option('chain1', 'c', str, 38 | 'Chain of the first structure', 39 | default='A') 40 | 41 | cmd.add_scalar_option('chain2', 'd', str, 42 | 'Chain of the second structure', 43 | default='A') 44 | 45 | cmd.add_scalar_option('scalemixture', 's', str, 46 | 'Scale mixture distribution', 47 | default='student', 48 | choices=['student', 'k']) 49 | 50 | 51 | cmd.add_scalar_option('alignment', 'a', str, 52 | 'Alignment in fasta format defining equivalent positions\n' 53 | + 'Assumes that chain1 is the first sequence of ' 54 | + 'the alignment and chain2 the second sequence') 55 | 56 | cmd.add_scalar_option('outfile', 'o', str, 57 | 'file to which the rotated second ' + 58 | 'structure will be written', 59 | default='bfit.pdb') 60 | 61 | cmd.add_scalar_option('niter', 'n', int, 62 | 'Number of optimization steps', 63 | default=200) 64 | 65 | cmd.add_boolean_option('em', None, 66 | 'Use the EM algorithm for optimsation', 67 | default = False) 68 | 69 | return cmd 70 | 71 | 72 | 73 | class BFitApp(csb.apps.Application): 74 | """ 75 | Python application for robust structure superposition of two protein structures 76 | """ 77 | 78 | def main(self): 79 | try: 80 | parser = LegacyStructureParser(self.args.pdb1) 81 | r = parser.parse() 82 | 83 | parser = LegacyStructureParser(self.args.pdb2) 84 | m = parser.parse() 85 | except IOError as e: 86 | self.exit('PDB file parsing failed\n' + str(e.value), ExitCodes.IO_ERROR) 87 | 88 | X = numpy.array(r[self.args.chain1].get_coordinates(['CA'], True)) 89 | Y = numpy.array(m[self.args.chain2].get_coordinates(['CA'], True)) 90 | 91 | if self.args.alignment is not None: 92 | align = SequenceAlignment.parse(file(self.args.alignment).read()) 93 | align = align[:2, :] 94 | 95 | matches = [] 96 | for i in range(1, align.length + 1): 97 | if not align.gap_at(i): 98 | matches.append([align.columns[i][0].rank - 1, 99 | align.columns[i][1].rank - 1]) 100 | matches = numpy.array(matches) 101 | X = X[matches[:, 0], :] 102 | Y = Y[matches[:, 1], :] 103 | 104 | 105 | if len(X) != len(Y): 106 | self.exit('Structures are of different lengths,' + 107 | ' please specify an alignment', 108 | ExitCodes.INPUT_ERROR) 109 | 110 | R, t = csb.bio.utils.bfit(X, Y, self.args.niter, 111 | self.args.scalemixture, self.args.em) 112 | 113 | m.transform(R, t) 114 | m.to_pdb(self.args.outfile) 115 | 116 | 117 | def main(): 118 | AppRunner().run() 119 | 120 | 121 | if __name__ == '__main__': 122 | main() 123 | 124 | -------------------------------------------------------------------------------- /csb/apps/bfite.py: -------------------------------------------------------------------------------- 1 | """ 2 | Python application for robust structure superposition of an ensemble of structures. 3 | bfite models non-rigid displacements in protein ensembles with outlier-tolerant 4 | probability distributions. 5 | """ 6 | import numpy 7 | 8 | import csb.apps 9 | import csb.bio.structure 10 | 11 | from csb.bio.io.wwpdb import LegacyStructureParser 12 | from csb.bio.utils import average_structure, fit, wfit 13 | from csb.statistics.scalemixture import ScaleMixture, GammaPrior 14 | 15 | 16 | class ExitCodes(csb.apps.ExitCodes): 17 | IO_ERROR = 2 18 | 19 | class AppRunner(csb.apps.AppRunner): 20 | 21 | @property 22 | def target(self): 23 | return BFitApp 24 | 25 | def command_line(self): 26 | 27 | cmd = csb.apps.ArgHandler(self.program, __doc__) 28 | 29 | # Input structures 30 | cmd.add_positional_argument('pdb', str, 31 | 'full path to the ensemble') 32 | 33 | # Optional arguments 34 | cmd.add_scalar_option('chain', 'c', str, 35 | 'Chain', 36 | default='A') 37 | 38 | cmd.add_scalar_option('scalemixture', 's', str, 39 | 'Scale mixture distribution', 40 | default='student', 41 | choices=['student', 'k']) 42 | 43 | cmd.add_scalar_option('alignment', 'a', str, 44 | 'Alignment in fasta format defining equivalent positions\n' 45 | + 'Assumes that chain1 is the first sequence of ' 46 | + 'the alignment and chain2 the second sequence') 47 | 48 | cmd.add_scalar_option('outfile', 'o', str, 49 | 'file to which the rotated second ' + 50 | 'structure will be written', 51 | default='bfit.pdb') 52 | 53 | cmd.add_scalar_option('niter', 'n', int, 54 | 'Number of optimization steps', 55 | default=200) 56 | 57 | return cmd 58 | 59 | 60 | 61 | class BFitApp(csb.apps.Application): 62 | """ 63 | Python application for robust structure superposition of two protein structures 64 | """ 65 | 66 | def main(self): 67 | try: 68 | parser = LegacyStructureParser(self.args.pdb) 69 | models = parser.models() 70 | 71 | except IOError as e: 72 | self.exit('PDB file parsing failed\n' + str(e.value), ExitCodes.IO_ERROR) 73 | 74 | if len(models) < 2: 75 | self.exit('PDB file contains only one model', ExitCodes.USAGE_ERROR) 76 | 77 | ensemble = parser.parse_models(models) 78 | X = numpy.array([model[self.args.chain].get_coordinates(['CA'], True) for model in ensemble]) 79 | x_mu = average_structure(X) 80 | #n = X.shape[1] 81 | m = X.shape[0] 82 | R = numpy.zeros((m, 3, 3)) 83 | t = numpy.ones((m, 3)) 84 | 85 | 86 | prior = GammaPrior() 87 | mixture = ScaleMixture(scales=X.shape[1], 88 | prior=prior, d=3) 89 | 90 | for i in range(m): 91 | R[i, :, :], t[i, :] = fit(x_mu, X[i]) 92 | 93 | # gibbs sampling cycle 94 | for j in range(self.args.niter): 95 | # apply rotation 96 | data = numpy.array([numpy.sum((x_mu - numpy.dot(X[i], numpy.transpose(R[i])) - t[i]) ** 2, -1) ** 0.5 97 | for i in range(m)]).T 98 | # sample scales 99 | mixture.estimate(data) 100 | # sample rotations 101 | for i in range(m): 102 | R[i, :, :], t[i, :] = wfit(x_mu, X[i], mixture.scales) 103 | 104 | 105 | out_ensemble = csb.bio.structure.Ensemble() 106 | 107 | for i, model in enumerate(ensemble): 108 | model.transform(R[i], t[i]) 109 | out_ensemble.models.append(model) 110 | 111 | out_ensemble.to_pdb(self.args.outfile) 112 | 113 | 114 | def main(): 115 | AppRunner().run() 116 | 117 | 118 | if __name__ == '__main__': 119 | main() -------------------------------------------------------------------------------- /csb/apps/embd.py: -------------------------------------------------------------------------------- 1 | """ 2 | Sharpening of EM maps by non-negative blind deconvolution. 3 | For details see: 4 | 5 | Hirsch M, Schoelkopf B and Habeck M (2010) 6 | A New Algorithm for Improving the Resolution of Cryo-EM Density Maps. 7 | """ 8 | 9 | import os 10 | import numpy 11 | import csb.apps 12 | 13 | from numpy import sum, sqrt 14 | 15 | from csb.numeric import convolve, correlate, trim 16 | from csb.bio.io.mrc import DensityMapReader, DensityMapWriter, DensityInfo, DensityMapFormatError 17 | 18 | 19 | class ExitCodes(csb.apps.ExitCodes): 20 | 21 | IO_ERROR = 2 22 | INVALID_DATA = 3 23 | ARGUMENT_ERROR = 4 24 | 25 | 26 | class AppRunner(csb.apps.AppRunner): 27 | 28 | @property 29 | def target(self): 30 | return DeconvolutionApp 31 | 32 | def command_line(self): 33 | 34 | 35 | cmd = csb.apps.ArgHandler(self.program, __doc__) 36 | 37 | cmd.add_scalar_option('psf-size', 's', int, 'size of the point spread function', default=15) 38 | cmd.add_scalar_option('output', 'o', str, 'output directory of the sharpened maps', default='.') 39 | cmd.add_scalar_option('iterations', 'i', int, 'number of iterations', default=1000) 40 | cmd.add_scalar_option('output-frequency', 'f', int, 'create a map file each f iterations', default=50) 41 | cmd.add_boolean_option('verbose', 'v', 'verbose mode') 42 | 43 | cmd.add_positional_argument('mapfile', str, 'Input Cryo EM file in CCP4 MRC format') 44 | 45 | return cmd 46 | 47 | 48 | class DeconvolutionApp(csb.apps.Application): 49 | 50 | def main(self): 51 | 52 | if not os.path.isfile(self.args.mapfile): 53 | DeconvolutionApp.exit('Input file not found.', code=ExitCodes.IO_ERROR) 54 | 55 | if not os.path.isdir(self.args.output): 56 | DeconvolutionApp.exit('Output directory does not exist.', code=ExitCodes.IO_ERROR) 57 | 58 | if self.args.psf_size < 1: 59 | DeconvolutionApp.exit('PSF size must be a positive number.', code=ExitCodes.ARGUMENT_ERROR) 60 | 61 | if self.args.iterations < 1: 62 | DeconvolutionApp.exit('Invalid number of iterations.', code=ExitCodes.ARGUMENT_ERROR) 63 | 64 | if self.args.output_frequency < 1: 65 | DeconvolutionApp.exit('Output frequency must be a positive number.', code=ExitCodes.ARGUMENT_ERROR) 66 | 67 | if self.args.iterations < self.args.output_frequency: 68 | DeconvolutionApp.exit('Output frequency is too low.', code=ExitCodes.ARGUMENT_ERROR) 69 | 70 | self.args.output = os.path.abspath(self.args.output) 71 | 72 | self.run() 73 | 74 | def run(self): 75 | 76 | writer = DensityMapWriter() 77 | 78 | self.log('Reading input density map...') 79 | try: 80 | input = DensityMapReader(self.args.mapfile).read() 81 | embd = Deconvolution(input.data, self.args.psf_size) 82 | 83 | except DensityMapFormatError as e: 84 | msg = 'Error reading input MRC file: {0}'.format(e) 85 | DeconvolutionApp.exit(msg, code=ExitCodes.INVALID_DATA) 86 | 87 | self.log('Running {0} iterations...'.format(self.args.iterations)) 88 | self.log(' Iteration Loss Correlation Output') 89 | 90 | for i in range(1, self.args.iterations + 1): 91 | embd.run_once() 92 | 93 | if i % self.args.output_frequency == 0: 94 | output = OutputPathBuilder(self.args, i) 95 | 96 | density = DensityInfo(embd.data, None, None, header=input.header) 97 | writer.write_file(output.fullpath, density) 98 | 99 | self.log('{0:>9}. {1:15.2f} {2:10.4f} {3}'.format( 100 | i, embd.loss, embd.correlation, output.filename)) 101 | 102 | self.log('Done: {0}.'.format(output.fullpath)) 103 | 104 | def log(self, *a, **k): 105 | 106 | if self.args.verbose: 107 | super(DeconvolutionApp, self).log(*a, **k) 108 | 109 | 110 | class OutputPathBuilder(object): 111 | 112 | def __init__(self, args, i): 113 | 114 | basename = os.path.basename(args.mapfile) 115 | file, extension = os.path.splitext(basename) 116 | 117 | self._newfile = '{0}.{1}{2}'.format(file, i, extension) 118 | self._path = os.path.join(args.output, self._newfile) 119 | 120 | @property 121 | def fullpath(self): 122 | return self._path 123 | 124 | @property 125 | def filename(self): 126 | return os.path.basename(self._newfile) 127 | 128 | class Util(object): 129 | 130 | @staticmethod 131 | def corr(x, y, center=False): 132 | 133 | if center: 134 | x = x - x.mean() 135 | y = y - y.mean() 136 | 137 | return sum(x * y) / sqrt(sum(x * x)) / sqrt(sum(x * x)) 138 | 139 | class Deconvolution(object): 140 | """ 141 | Blind deconvolution for n-dimensional images. 142 | 143 | @param data: EM density map data (data field of L{csb.bio.io.mrc.DensityInfo}) 144 | @type data: array 145 | @param psf_size: point spread function size 146 | @type psf_size: ints 147 | @param beta_x: hyperparameters of sparseness constraints 148 | @type beta_x: float 149 | @param beta_f: hyperparameters of sparseness constraints 150 | @type beta_f: float 151 | """ 152 | 153 | def __init__(self, data, psf_size, beta_x=1e-10, beta_f=1e-10, cache=True): 154 | 155 | self._f = [] 156 | self._x = [] 157 | self._y = numpy.array(data) 158 | self._loss = [] 159 | self._corr = [] 160 | 161 | self._ycache = None 162 | self._cache = bool(cache) 163 | 164 | self._beta_x = float(beta_x) 165 | self._beta_f = float(beta_f) 166 | 167 | shape_psf = (psf_size, psf_size, psf_size) 168 | self._initialize(shape_psf) 169 | 170 | @property 171 | def beta_x(self): 172 | return self._beta_x 173 | 174 | @property 175 | def beta_f(self): 176 | return self._beta_f 177 | 178 | @property 179 | def loss(self): 180 | """ 181 | Current loss value. 182 | """ 183 | if len(self._loss) > 0: 184 | return float(self._loss[-1]) 185 | else: 186 | return None 187 | 188 | @property 189 | def correlation(self): 190 | """ 191 | Current correlation value. 192 | """ 193 | if len(self._corr) > 0: 194 | return float(self._corr[-1]) 195 | else: 196 | return None 197 | 198 | @property 199 | def data(self): 200 | return trim(self._x, self._f.shape) 201 | 202 | def _initialize(self, shape_psf): 203 | """ 204 | Initialize with flat image and psf. 205 | """ 206 | self._f = numpy.ones(shape_psf) 207 | self._x = numpy.ones(numpy.array(self._y.shape) + numpy.array(shape_psf) - 1) 208 | 209 | self._normalize_psf() 210 | 211 | def _normalize_psf(self): 212 | self._f /= self._f.sum() 213 | 214 | def _calculate_image(self): 215 | return convolve(self._f, self._x) 216 | 217 | def calculate_image(self, cache=False): 218 | 219 | if cache and self._ycache is not None: 220 | return self._ycache 221 | else: 222 | y = self._calculate_image() 223 | if self._cache: 224 | self._ycache = y 225 | return y 226 | 227 | def _update_map(self): 228 | 229 | y = self.calculate_image() 230 | 231 | N = correlate(self._f, self._y) - self.beta_x 232 | D = correlate(self._f, y) 233 | 234 | self._x *= numpy.clip(N, 1e-300, 1e300) / numpy.clip(D, 1e-300, 1e300) 235 | 236 | def _update_psf(self): 237 | 238 | y = self.calculate_image() 239 | 240 | N = correlate(self._x, self._y) - self.beta_f 241 | D = correlate(self._x, y) 242 | 243 | self._f *= numpy.clip(N, 1e-300, 1e300) / numpy.clip(D, 1e-300, 1e300) 244 | self._normalize_psf() 245 | 246 | def eval_loss(self, cache=False): 247 | 248 | y = self.calculate_image(cache=cache) 249 | 250 | return 0.5 * ((self._y - y) ** 2).sum() + \ 251 | + self.beta_f * self._f.sum() + self.beta_x * self._x.sum() 252 | 253 | def eval_corr(self, cache=False): 254 | 255 | y = self.calculate_image(cache=cache) 256 | return Util.corr(self._y, y) 257 | 258 | def run_once(self): 259 | """ 260 | Run a single iteration. 261 | """ 262 | 263 | self._loss.append(self.eval_loss(cache=True)) 264 | self._corr.append(self.eval_corr(cache=True)) 265 | 266 | self._update_map() 267 | self._update_psf() 268 | 269 | def run(self, iterations): 270 | """ 271 | Run multiple iterations. 272 | 273 | @param iterations: number of iterations to run 274 | @type iterations: int 275 | """ 276 | for i in range(iterations): 277 | self.run_once() 278 | 279 | 280 | def main(): 281 | AppRunner().run() 282 | 283 | 284 | if __name__ == '__main__': 285 | main() -------------------------------------------------------------------------------- /csb/apps/helloworld.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a CSB HelloWorld dummy application. 3 | """ 4 | 5 | import sys 6 | import csb.apps 7 | 8 | 9 | class ExitCodes(csb.apps.ExitCodes): 10 | 11 | BAD_TEXT = 4 12 | 13 | 14 | class AppRunner(csb.apps.AppRunner): 15 | 16 | @property 17 | def target(self): 18 | return HelloWorldApp 19 | 20 | def command_line(self): 21 | 22 | text = "Hello World" 23 | 24 | cmd = csb.apps.ArgHandler(self.program, 'This program prints "Hello World".') 25 | 26 | cmd.add_scalar_option('text', 't', str, 'The text to print', default=text) 27 | cmd.add_boolean_option('upper', 'u', 'Print in upper case', default=False) 28 | 29 | return cmd 30 | 31 | def initapp(self, args): 32 | 33 | app = self.target 34 | if len(args.text) < 3: 35 | app.exit("Enter at least a few words", code=ExitCodes.BAD_TEXT, usage=True) 36 | 37 | return app(args) 38 | 39 | 40 | class HelloWorldApp(csb.apps.Application): 41 | 42 | def main(self): 43 | 44 | if self.args.upper: 45 | text = self.args.text.upper() 46 | else: 47 | text = self.args.text 48 | 49 | self.log(text) 50 | self.log('HW: done.') 51 | 52 | 53 | def main(): 54 | AppRunner(sys.argv).run() 55 | 56 | 57 | if __name__ == '__main__': 58 | main() 59 | -------------------------------------------------------------------------------- /csb/apps/hhsearch.py: -------------------------------------------------------------------------------- 1 | """ 2 | Python bindings for the HHsearch program. Capable of executing multiple 3 | HHsearch jobs in parallel. 4 | """ 5 | 6 | import multiprocessing as mp 7 | 8 | import csb.apps 9 | import csb.io 10 | import csb.bio.io 11 | 12 | 13 | class ExitCodes(csb.apps.ExitCodes): 14 | 15 | IO_ERROR = 2 16 | INVALID_DATA = 3 17 | EXT_TOOL_FAILURE = 4 18 | 19 | 20 | class AppRunner(csb.apps.AppRunner): 21 | 22 | @property 23 | def target(self): 24 | return HHsearchApp 25 | 26 | def command_line(self): 27 | 28 | cpu = mp.cpu_count() 29 | cmd = csb.apps.ArgHandler(self.program, __doc__) 30 | 31 | cmd.add_scalar_option('binary', 'b', str, 'full path to the HHsearch binary ', default='hhsearch') 32 | cmd.add_scalar_option('cpu', 'c', int, 'maximum degree of parallelism', default=cpu) 33 | cmd.add_scalar_option('database', 'd', str, 'the subject (database) HMM file', required=True) 34 | cmd.add_array_argument('query', str, 'query HMM file(s)') 35 | 36 | return cmd 37 | 38 | 39 | class HHsearchApp(csb.apps.Application): 40 | 41 | def main(self): 42 | 43 | queries = list(self.args.query) 44 | exe = HHsearch(self.args.binary, self.args.database) 45 | 46 | try: 47 | if len(queries) == 1: 48 | exe.cpu = self.args.cpu 49 | context = HHTask(queries[0]) 50 | results = [ exe.run(context) ] 51 | else: 52 | context = [ HHTask(q) for q in queries ] 53 | results = exe.runmany(context, workers=self.args.cpu) 54 | 55 | except IOError as io: 56 | HHsearchApp.exit(str(io), ExitCodes.IO_ERROR) 57 | 58 | except csb.io.InvalidCommandError as ose: 59 | msg = '{0!s}: {0.program}'.format(ose) 60 | HHsearchApp.exit(msg, ExitCodes.IO_ERROR) 61 | 62 | except csb.io.ProcessError as pe: 63 | message = 'Bad exit code from HHsearch: #{0.code}.\nSTDERR: {0.stderr}\nSTDOUT: {0.stdout}'.format(pe.context) 64 | HHsearchApp.exit(message, ExitCodes.EXT_TOOL_FAILURE) 65 | 66 | self.log('\nRank Hit Prob St End Qst Qend') 67 | self.log('-------------------------------------') 68 | 69 | for c in results: 70 | self.log('\n\n# QUERY:{0}\n'.format(c.queryfile)) 71 | if c.result: 72 | for hit in c.result: 73 | self.log('{0.rank:3}. {0.id:5} {0.probability:5.3f} {0.start:3} {0.end:3} {0.qstart:3} {0.qend:3}'.format(hit)) 74 | 75 | 76 | class Context(object): 77 | 78 | def __init__(self, query): 79 | 80 | self.__query = query 81 | self.__result = None 82 | 83 | @property 84 | def query(self): 85 | return self.__query 86 | 87 | @property 88 | def result(self): 89 | return self.__result 90 | @result.setter 91 | def result(self, result): 92 | self.__result = result 93 | 94 | 95 | class HHTask(Context): 96 | 97 | def __init__(self, queryfile): 98 | 99 | self.queryfile = queryfile 100 | query = open(queryfile).read() 101 | 102 | super(HHTask, self).__init__(query) 103 | 104 | 105 | def _task(args): 106 | 107 | try: 108 | binary, db, cpu, context = args 109 | return HHsearch(binary, db, cpu=cpu).run(context) 110 | except (KeyboardInterrupt, SystemExit): 111 | return 112 | 113 | class SecStructureScoring(object): 114 | 115 | OFF = 0 116 | AFTER = 1 117 | DURING = 2 118 | AFTER_PREDICTED = 3 119 | DURING_PREDICTED = 4 120 | 121 | class HHsearch(object): 122 | 123 | class Options(object): 124 | 125 | CPU = 'cpu' 126 | SS = 'ssm' 127 | MACT = 'mact' 128 | MAX_HITS = 'Z' 129 | MAX_ALI = 'B' 130 | MAX_E = 'E' 131 | MIN_P = 'p' 132 | 133 | def __init__(self, binary, db, cpu=None): 134 | 135 | self._program = binary 136 | self._db = db 137 | self._opt = {} 138 | self._parser = csb.bio.io.HHOutputParser() 139 | 140 | self.cpu = cpu 141 | self.ss = None 142 | self.mac_threshold = None 143 | self.max_hits = None 144 | self.max_alignments = None 145 | self.max_evalue = None 146 | self.min_probability = None 147 | 148 | @property 149 | def program(self): 150 | return self._program 151 | @program.setter 152 | def program(self, value): 153 | self._program = value 154 | 155 | @property 156 | def db(self): 157 | return self._db 158 | @db.setter 159 | def db(self, value): 160 | self._db = value 161 | 162 | @property 163 | def parser(self): 164 | return self._parser 165 | @parser.setter 166 | def parser(self, value): 167 | self._parser = value 168 | 169 | @property 170 | def cpu(self): 171 | return self._get(HHsearch.Options.CPU) 172 | @cpu.setter 173 | def cpu(self, value): 174 | self._opt[HHsearch.Options.CPU] = value 175 | 176 | @property 177 | def ss(self): 178 | return self._get(HHsearch.Options.SS) 179 | @ss.setter 180 | def ss(self, value): 181 | self._opt[HHsearch.Options.SS] = value 182 | 183 | @property 184 | def mac_threshold(self): 185 | return self._get(HHsearch.Options.MACT) 186 | @mac_threshold.setter 187 | def mac_threshold(self, value): 188 | self._opt[HHsearch.Options.MACT] = value 189 | 190 | @property 191 | def max_hits(self): 192 | return self._get(HHsearch.Options.MAX_HITS) 193 | @max_hits.setter 194 | def max_hits(self, value): 195 | self._opt[HHsearch.Options.MAX_HITS] = value 196 | 197 | @property 198 | def max_alignments(self): 199 | return self._get(HHsearch.Options.MAX_ALI) 200 | @max_alignments.setter 201 | def max_alignments(self, value): 202 | self._opt[HHsearch.Options.MAX_ALI] = value 203 | 204 | @property 205 | def max_evalue(self): 206 | return self._get(HHsearch.Options.MAX_E) 207 | @max_evalue.setter 208 | def max_evalue(self, value): 209 | self._opt[HHsearch.Options.MAX_E] = value 210 | 211 | @property 212 | def min_probability(self): 213 | return self._get(HHsearch.Options.MIN_P) 214 | @min_probability.setter 215 | def min_probability(self, value): 216 | self._opt[HHsearch.Options.MIN_P] = value 217 | 218 | def _get(self, option): 219 | 220 | if option in self._opt: 221 | return self._opt[option] 222 | else: 223 | return None 224 | 225 | def _options(self): 226 | 227 | options = [] 228 | 229 | for option in self._opt: 230 | value = self._opt[option] 231 | 232 | if value is not None and value != '': 233 | if isinstance(value, bool): 234 | options.append('-{0}'.format(option)) 235 | else: 236 | options.append('-{0} {1}'.format(option, value)) 237 | 238 | return ' '.join(options) 239 | 240 | def run(self, context): 241 | 242 | with csb.io.TempFile() as q: 243 | 244 | q.write(context.query) 245 | q.flush() 246 | 247 | with csb.io.TempFile() as o: 248 | 249 | cmd = '{0.program} -i {1} -d {0.db} -o {2} {3}'.format(self, q.name, o.name, self._options()) 250 | csb.io.Shell.runstrict(cmd) 251 | 252 | context.result = self.parser.parse_file(o.name) 253 | return context 254 | 255 | def runmany(self, contexts, workers=mp.cpu_count(), cpu=1): 256 | 257 | if workers > len(contexts): 258 | workers = len(contexts) 259 | 260 | results = [] 261 | taskargs = [(self.program, self.db, cpu, c) for c in contexts] 262 | 263 | pool = mp.Pool(workers) 264 | 265 | try: 266 | for c in pool.map(_task, taskargs): 267 | results.append(c) 268 | except KeyboardInterrupt: 269 | pass 270 | finally: 271 | pool.terminate() 272 | 273 | return results 274 | 275 | 276 | def main(): 277 | AppRunner().run() 278 | 279 | 280 | if __name__ == '__main__': 281 | main() -------------------------------------------------------------------------------- /csb/apps/promix.py: -------------------------------------------------------------------------------- 1 | """ 2 | ProMix: Take a protein structure ensemble and find a mixture of rigid 3 | segments or a mixture of conformers. Writes K copies of the ensemble 4 | (for segments) or K subsets of the ensemble (for conformers) as PDB 5 | files, each superposed on different components. 6 | 7 | Reference: Hirsch M, Habeck M. - Bioinformatics. 2008 Oct 1;24(19):2184-92 8 | """ 9 | 10 | import numpy 11 | 12 | import csb.apps 13 | import csb.bio.structure 14 | 15 | from csb.bio.io.wwpdb import LegacyStructureParser 16 | from csb.statistics import mixtures 17 | 18 | 19 | class ExitCodes(csb.apps.ExitCodes): 20 | IO_ERROR = 2 21 | 22 | class AppRunner(csb.apps.AppRunner): 23 | 24 | @property 25 | def target(self): 26 | return ProMixApp 27 | 28 | def command_line(self): 29 | cmd = csb.apps.ArgHandler(self.program, __doc__) 30 | 31 | cmd.add_scalar_option('components', 'K', int, 'Number of components', -1) 32 | cmd.add_scalar_option('type', 't', str, 'Type of mixture', 'segments', ('segments', 'conformers')) 33 | cmd.add_positional_argument('infile', str, 'input PDB file') 34 | 35 | return cmd 36 | 37 | def initapp(self, args): 38 | app = self.target 39 | return app(args) 40 | 41 | class ProMixApp(csb.apps.Application): 42 | 43 | def main(self): 44 | try: 45 | parser = LegacyStructureParser(self.args.infile) 46 | models = parser.models() 47 | except: 48 | self.exit('PDB file parsing failed', ExitCodes.IO_ERROR) 49 | 50 | if len(models) < 2: 51 | self.exit('PDB file contains only one model', ExitCodes.USAGE_ERROR) 52 | 53 | ensemble = parser.parse_models(models) 54 | X = numpy.array([model.get_coordinates(['CA'], True) for model in ensemble]) 55 | 56 | if self.args.type == 'segments': 57 | self.main_segments(ensemble, X) 58 | elif self.args.type == 'conformers': 59 | self.main_conformers(ensemble, X) 60 | else: 61 | raise ValueError('type must be "segments" or "conformers"') 62 | 63 | def main_segments(self, ensemble, X): 64 | 65 | mixture = mixtures.SegmentMixture.new(X, self.args.components) 66 | self.log('Number of segments: {0}'.format(mixture.K)) 67 | 68 | for k,(sigma,w) in enumerate(zip(mixture.sigma, mixture.w)): 69 | outfile = 'promix_segment_{0}.pdb'.format(k+1) 70 | self.log(' {0}: sigma = {1:6.3f}, w = {2:.3f}, file = {3}'.format(k+1, sigma, w, outfile)) 71 | 72 | for model, R, t in zip(ensemble, mixture.R, mixture.t): 73 | if k > 0: 74 | model.transform(R[k-1], t[k-1]) 75 | R = R[k].T 76 | t = -numpy.dot(R, t[k]) 77 | model.transform(R, t) 78 | 79 | ensemble.to_pdb(outfile) 80 | 81 | def main_conformers(self, ensemble, X): 82 | 83 | mixture = mixtures.ConformerMixture.new(X, self.args.components) 84 | self.log('Number of conformers: {0}'.format(mixture.K)) 85 | 86 | membership = mixture.membership 87 | 88 | for k,(sigma,w) in enumerate(zip(mixture.sigma, mixture.w)): 89 | outfile = 'promix_conformer_{0}.pdb'.format(k+1) 90 | self.log(' {0}: sigma = {1:6.3f}, w = {2:.3f}, file = {3}'.format(k+1, sigma, w, outfile)) 91 | 92 | ek = csb.bio.structure.Ensemble() 93 | 94 | for model, R, t, mk in zip(ensemble, mixture.R, mixture.t, membership): 95 | if mk != k: 96 | continue 97 | R = R[k].T 98 | t = -numpy.dot(R, t[k]) 99 | model.transform(R, t) 100 | ek.models.append(model) 101 | 102 | ek.to_pdb(outfile) 103 | 104 | 105 | def main(): 106 | AppRunner().run() 107 | 108 | 109 | if __name__ == '__main__': 110 | main() -------------------------------------------------------------------------------- /csb/bio/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Core bioinformatics abstractions and I/O. 3 | """ -------------------------------------------------------------------------------- /csb/bio/io/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | I/O for various biological file formats. 3 | """ 4 | 5 | from csb.bio.io.hhpred import HHOutputParser, HHProfileParser 6 | from csb.bio.io.hhpred import HHpredOutputParser, HHpredProfileParser 7 | from csb.bio.io.clans import ClansParser, ClansFileWriter 8 | from csb.bio.io.wwpdb import StructureParser, AsyncStructureParser, PDBHeaderParser 9 | from csb.bio.io.fasta import SequenceParser, PDBSequenceParser 10 | from csb.bio.io.dssp import DSSPParser, StrideParser 11 | 12 | __all__ = ['HHOutputParser', 'HHProfileParser', 'ClansParser', 13 | 'HHpredOutputParser', 'HHpredProfileParser', 'ISitesParser', 14 | 'StructureParser', 'AsyncStructureParser', 'PDBHeaderParser', 15 | 'SequenceParser', 'PDBSequenceParser', 'DSSPParser', 'StrideParser'] 16 | -------------------------------------------------------------------------------- /csb/bio/io/cs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple NMR STAR chemical shift readers. 3 | """ 4 | 5 | from csb.bio.nmr import ChemShiftInfo 6 | 7 | 8 | class ChemShiftFormatError(ValueError): 9 | pass 10 | 11 | 12 | class ChemShiftReader(object): 13 | """ 14 | Simple NMR STAR v2 chemical shift reader. 15 | 16 | @note: This is not a full-fledged, semantic NMR STAR parser. It handles 17 | only the chemical shift table. 18 | """ 19 | 20 | FRAME = 'save_assigned_chemical_shifts' 21 | 22 | RANK = '_Residue_seq_code' 23 | RESIDUE = '_Residue_label' 24 | ATOM = '_Atom_name' 25 | ELEMENT = '_Atom_type' 26 | SHIFT = '_Chem_shift_value' 27 | 28 | @staticmethod 29 | def create(frame=FRAME, version=2): 30 | """ 31 | Parser factory: create a new parser, given a saveframe name 32 | and format verison. 33 | 34 | @param frame: name of the saveframe to read 35 | @type frame: str 36 | @param version: NMR STAR format version 37 | @type version: int 38 | 39 | @return: an instance of any L{ChemShiftReader} class 40 | @rtype: L{ChemShiftReader} 41 | """ 42 | 43 | if version == 3: 44 | return ChemShift3Reader(frame=frame) 45 | elif version == 2: 46 | return ChemShiftReader(frame=frame) 47 | else: 48 | raise ValueError('Unknown NMR-STAR version') 49 | 50 | @staticmethod 51 | def guess(file, frame=FRAME): 52 | """ 53 | Parser factory: try to guess the correct NMR STAR version from a given 54 | file and create an appropriate parser. 55 | 56 | @param file: NMR STAR path and file name 57 | @type file: str 58 | @param frame: name of the saveframe to read 59 | @type frame: str 60 | 61 | @return: an instance of any L{ChemShiftReader} class 62 | @rtype: L{ChemShiftReader} 63 | 64 | @raise ChemShiftFormatError: on failure to determine the NMR STAR version 65 | """ 66 | 67 | with open(file) as cs: 68 | content = cs.read() 69 | 70 | if not content.strip(): 71 | return ChemShiftReader.create() 72 | elif ChemShift3Reader.SHIFT3 in content: 73 | return ChemShiftReader.create(frame, version=3) 74 | elif ChemShiftReader.SHIFT in content: 75 | return ChemShiftReader.create(frame, version=2) 76 | else: 77 | raise ChemShiftFormatError("Can't guess NMR-STAR version") 78 | 79 | def __init__(self, frame=FRAME): 80 | self._frame = frame 81 | 82 | def read_file(self, filename): 83 | """ 84 | Parse the specified file. 85 | 86 | @param filename: file path and name 87 | @type filename: str 88 | 89 | @rtype: tuple of L{ChemShiftInfo} 90 | """ 91 | with open(filename) as input: 92 | return self.read_shifts(input.read()) 93 | 94 | def read_shifts(self, star_table): 95 | """ 96 | Parse a given NMR STAR chemical shift table. 97 | 98 | @param star_table: NMR STAR chemical shift table 99 | @type star_table: str 100 | 101 | @rtype: tuple of L{ChemShiftInfo} 102 | @raise ChemShiftFormatError: on parse error 103 | """ 104 | 105 | shifts = [] 106 | 107 | init = False 108 | in_shifts = False 109 | fields = [] 110 | lines = iter(star_table.splitlines()) 111 | 112 | if self._frame in star_table: 113 | self._scroll(lines, self._frame) 114 | 115 | 116 | for l in lines: 117 | ls = l.strip() 118 | 119 | if not in_shifts: 120 | 121 | if ls == 'loop_': 122 | assert in_shifts is False and not fields and init is False 123 | init = True 124 | continue 125 | 126 | elif init and ls.startswith('_'): 127 | assert in_shifts is False 128 | fields.append(l.strip()) 129 | continue 130 | 131 | elif init and not ls: 132 | if len(fields) < 1: 133 | raise ChemShiftFormatError("No fields found in the CS table") 134 | in_shifts = True 135 | continue 136 | 137 | else: 138 | 139 | if ls == 'stop_': 140 | break 141 | 142 | elif ls.startswith('#'): 143 | continue 144 | 145 | elif ls: 146 | values = l.split() 147 | if len(values) < len(fields): 148 | raise ChemShiftFormatError("Insufficient number of values: {0}".format(l)) 149 | data = dict(zip(fields, values)) 150 | 151 | shifts.append(self._create_shift(data)) 152 | 153 | return tuple(shifts) 154 | 155 | def _scroll(self, iterator, field): 156 | 157 | for line in iterator: 158 | if line.lstrip().startswith(field): 159 | break 160 | 161 | def _create_shift(self, data): 162 | 163 | try: 164 | position = int(data[ChemShiftReader.RANK]) 165 | residue = data[ChemShiftReader.RESIDUE] 166 | name = data[ChemShiftReader.ATOM] 167 | element = data[ChemShiftReader.ELEMENT] 168 | shift = float(data[ChemShiftReader.SHIFT]) 169 | 170 | except KeyError as ke: 171 | raise ChemShiftFormatError("Required field {0} not found".format(str(ke))) 172 | except ValueError as ve: 173 | raise ChemShiftFormatError("Can't parse value: {0}".format(str(ve))) 174 | 175 | return ChemShiftInfo(position, residue, name, element, shift) 176 | 177 | 178 | class ChemShift3Reader(ChemShiftReader): 179 | """ 180 | Simple NMR STAR v3 chemical shift reader. 181 | 182 | @note: This is not a full-fledged, semantic NMR STAR parser. It handles 183 | only the chemical shift table. 184 | """ 185 | 186 | RANK3 = '_Atom_chem_shift.Seq_ID' 187 | RESIDUE3 = '_Atom_chem_shift.Comp_ID' 188 | ATOM3 = '_Atom_chem_shift.Atom_ID' 189 | ELEMENT3 = '_Atom_chem_shift.Atom_type' 190 | SHIFT3 = '_Atom_chem_shift.Val' 191 | 192 | def _create_shift(self, data): 193 | 194 | try: 195 | position = data[ChemShift3Reader.RANK3] 196 | residue = data[ChemShift3Reader.RESIDUE3] 197 | name = data[ChemShift3Reader.ATOM3] 198 | element = data[ChemShift3Reader.ELEMENT3] 199 | shift = data[ChemShift3Reader.SHIFT3] 200 | 201 | except KeyError as ke: 202 | raise ChemShiftFormatError("Required field {0} not found".format(str(ke))) 203 | except ValueError as ve: 204 | raise ChemShiftFormatError("Can't parse value: {0}".format(str(ve))) 205 | 206 | return ChemShiftInfo(position, residue, name, element, shift) 207 | -------------------------------------------------------------------------------- /csb/bio/io/dssp.py: -------------------------------------------------------------------------------- 1 | """ 2 | DSSP Parser 3 | """ 4 | 5 | import csb.core 6 | import csb.io 7 | 8 | from csb.bio.structure import SecStructures, UnknownSecStructureError 9 | 10 | 11 | class DSSPParseError(ValueError): 12 | pass 13 | 14 | 15 | class ResidueAssignmentInfo(object): 16 | 17 | def __init__(self, residue_id, accession, chain, secondary_structure, phi, psi): 18 | 19 | self.residue_id = residue_id 20 | self.accession = accession 21 | self.chain = chain 22 | self.secondary_structure = secondary_structure 23 | self.phi = phi 24 | self.psi = psi 25 | 26 | 27 | class DSSPParser(object): 28 | """ 29 | Simple DSSP Secondary Structure Parser. 30 | """ 31 | 32 | def parse(self, dssp_file): 33 | """ 34 | @param dssp_file: source DSSP file to parse 35 | @type dssp_file: str 36 | @return: a dictionary of L{ResidueAssignmentInfo} objects 37 | @rtype: dict 38 | """ 39 | 40 | data = {} 41 | start = False 42 | offset = 0 # assume old DSSP format 43 | accession = None 44 | 45 | for line in open(dssp_file): 46 | 47 | if not start: 48 | 49 | if line.startswith('HEADER'): 50 | accession = line[62:66].strip().lower() 51 | 52 | elif line.startswith(' # RESIDUE'): 53 | if len(line) >= 140: 54 | offset = 4 # the new DSSP format 55 | start = True 56 | else: 57 | if line[13] == '!': 58 | continue 59 | 60 | residue_id = line[6:11].strip() 61 | chain = line[11] 62 | try: 63 | ss = line[16].strip() 64 | if ss == '': 65 | ss = SecStructures.Gap 66 | else: 67 | ss = csb.core.Enum.parse(SecStructures, ss) 68 | except csb.core.EnumValueError as e: 69 | raise UnknownSecStructureError(str(e)) 70 | phi = float(line[104 + offset : 109 + offset]) 71 | psi = float(line[110 + offset : 115 + offset]) 72 | 73 | 74 | if chain not in data: 75 | data[chain] = {} 76 | 77 | data[chain][residue_id] = ResidueAssignmentInfo(residue_id, accession, chain, ss, phi, psi) 78 | 79 | return data 80 | 81 | class StrideParser(object): 82 | """ 83 | Simple STRIDE Secondary Structure Parser. 84 | """ 85 | 86 | def parse(self, stride_file): 87 | """ 88 | @param stride_file: source STRIDE file to parse 89 | @type stride_file: str 90 | @return: a dictionary of L{ResidueAssignmentInfo} objects 91 | @rtype: dict 92 | """ 93 | 94 | data = {} 95 | 96 | for line in open(stride_file): 97 | if line.startswith('ASG '): 98 | 99 | fields = line.split() 100 | 101 | residue_id = fields[3] 102 | chain = fields[2] 103 | accession = fields[-1].lower() 104 | try: 105 | ss = csb.core.Enum.parse(SecStructures, fields[5]) 106 | except csb.core.EnumValueError as e: 107 | raise UnknownSecStructureError(str(e)) 108 | phi = float(fields[7]) 109 | psi = float(fields[8]) 110 | 111 | if chain not in data: 112 | data[chain] = {} 113 | 114 | data[chain][residue_id] = ResidueAssignmentInfo(residue_id, accession, chain, ss, phi, psi) 115 | 116 | return data 117 | 118 | 119 | def get(accession, prefix='http://www.pdb.org/pdb/files/'): 120 | """ 121 | Download and parse a DSSP entry. 122 | 123 | @param accession: accession number of the entry 124 | @type accession: str 125 | @param prefix: download URL prefix 126 | @type prefix: str 127 | 128 | @return: see L{DSSPParser.parse} 129 | @rtype: dict 130 | """ 131 | dssp = csb.io.TempFile() 132 | 133 | browser = csb.io.urllib.urlopen(prefix + accession.lower() + '.dssp') 134 | dssp.write(browser.read().decode('utf-8')) 135 | dssp.flush() 136 | 137 | return DSSPParser().parse(dssp.name) 138 | 139 | -------------------------------------------------------------------------------- /csb/bio/io/noe.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple XEASY and Sparky peak list parsers. 3 | """ 4 | 5 | from abc import ABCMeta, abstractmethod 6 | from csb.bio.nmr import NOESpectrum 7 | 8 | 9 | class PeakListFormatError(ValueError): 10 | pass 11 | 12 | class BasePeakListReader(object): 13 | 14 | __metaclass__ = ABCMeta 15 | 16 | @abstractmethod 17 | def read(self, table): 18 | """ 19 | Parse a peak list table. 20 | 21 | @param table: input peak list table 22 | @type table: str 23 | @rtype: L{NOESpectrum} 24 | """ 25 | pass 26 | 27 | def read_file(self, filename): 28 | """ 29 | Parse a peak list file. 30 | 31 | @param filename: input file name 32 | @type filename: str 33 | @rtype: L{NOESpectrum} 34 | """ 35 | with open(filename) as input: 36 | return self.read(input.read()) 37 | 38 | def read_all(self, filenames): 39 | """ 40 | Parse a list of peak list files and merge the resulting spectra. 41 | All spectra must have identical dimensions. 42 | 43 | @param filenames: input file names 44 | @type filenames: iterable of str 45 | 46 | @return: joint spectrum 47 | @rtype: L{NOESpectrum} 48 | """ 49 | spectra = [self.read_file(f) for f in filenames] 50 | return NOESpectrum.join(*spectra) 51 | 52 | class SparkyPeakListReader(BasePeakListReader): 53 | """ 54 | Sparky NOE peak list parser. 55 | 56 | @param elements: list of element names for each dimension 57 | @type elements: list of (str or L{EnumItem}) 58 | @param connected: list of covalently connected dimension indices in the 59 | format: [(i1,i2),...] 60 | @type connected: list of (int,int) tuples 61 | """ 62 | 63 | def __init__(self, elements, connected): 64 | 65 | self._elements = list(elements) 66 | self._connected = [(d1, d2) for d1, d2 in connected] 67 | 68 | if len(self._elements) < 1: 69 | raise ValueError("Can't parse a 0-dimensional peak list") 70 | 71 | def read(self, table): 72 | """ 73 | Parse a Sparky peak list table. 74 | 75 | @param table: input peak list 76 | @type table: str 77 | @rtype: L{NOESpectrum} 78 | """ 79 | offset = 0 80 | spectrum = NOESpectrum(self._elements) 81 | 82 | for d1, d2 in self._connected: 83 | spectrum.connect(d1, d2) 84 | 85 | for l in table.splitlines(): 86 | if not l.strip() or ('w1' in l and 'w2' in l): 87 | if l.lstrip().lower().startswith('assignment'): 88 | offset = 1 89 | continue 90 | 91 | line = l.split()[offset:] 92 | try: 93 | float(line[-1]) # last item may or may not be a comment 94 | except ValueError: 95 | if len(line) > 0: 96 | line.pop() 97 | 98 | items = list(map(float, line)) 99 | intensity = items[-1] 100 | dimensions = items[:-1] 101 | 102 | if len(dimensions) != len(self._elements): 103 | raise PeakListFormatError("Expected {0} dimensional spectrum, got {1}".format( 104 | len(self._elements), len(dimensions))) 105 | 106 | spectrum.add(intensity, dimensions) 107 | 108 | return spectrum 109 | 110 | class XeasyPeakListReader(BasePeakListReader): 111 | """ 112 | XEASY NOE peak list parser. 113 | """ 114 | 115 | def __init__(self): 116 | pass 117 | 118 | def read(self, table): 119 | """ 120 | Parse an XEASY peak list table. 121 | 122 | @param table: input peak list 123 | @type table: str 124 | @rtype: L{NOESpectrum} 125 | """ 126 | lines = table.splitlines() 127 | spectrum = self._read_header(lines) 128 | 129 | for l in lines: 130 | if not l.strip() or l.startswith('#'): 131 | continue 132 | 133 | parts = l.split()[1:] 134 | peak = parts[:spectrum.num_dimensions] 135 | height = parts[spectrum.num_dimensions + 2] 136 | 137 | intensity = float(height) 138 | dimensions = map(float, peak) 139 | 140 | spectrum.add(intensity, dimensions) 141 | 142 | return spectrum 143 | 144 | 145 | def _read_header(self, lines): 146 | 147 | num = 0 148 | dim = {} 149 | el = {} 150 | el2 = {} 151 | connectivity = None 152 | 153 | for l in lines: 154 | if l.startswith('#'): 155 | if l[1:].lstrip().lower().startswith('number of dimensions'): 156 | num = int(l.split()[-1]) 157 | 158 | if l.startswith('#INAME'): 159 | parts = l.split()[1:] 160 | if len(parts) != 2: 161 | raise PeakListFormatError("Invalid Xeasy header") 162 | 163 | index = int(parts[0]) - 1 164 | if index < 0: 165 | raise PeakListFormatError("Invalid Xeasy header: dimension index < 1") 166 | 167 | element = ''.join(i for i in parts[1] if i.isalpha()) 168 | el[parts[1]] = index 169 | el2[element] = index 170 | 171 | dim[index] = element 172 | 173 | if l.startswith('#CYANAFORMAT'): 174 | connectivity = l.split()[1] 175 | 176 | if len(dim) != num or num == 0: 177 | raise PeakListFormatError("Invalid Xeasy header") 178 | 179 | elements = tuple(dim[i] for i in sorted(dim)) 180 | spectrum = NOESpectrum(elements) 181 | 182 | if connectivity: 183 | # cyanaformat - explicitly defines connected dimensions: 184 | # upper case dimensions are connected, e.g. "#CYANAFORMAT hCH" => 2-3 185 | if connectivity.upper() != ''.join(elements).upper(): 186 | raise ValueError("Invalid XEASY/CYANA header") 187 | for i1 in range(len(connectivity)): 188 | for i2 in range(len(connectivity)): 189 | e1, e2 = connectivity[i1], connectivity[i2] 190 | if i1 != i2 and e1.isupper() and e2.isupper(): 191 | spectrum.connect(i1, i2) 192 | else: 193 | # dimension labels starting with a number are connected, e.g. "1A B2 3C" => 1-3 194 | if len(el) != num: 195 | raise PeakListFormatError("Invalid XEASY header") 196 | for e1 in el: 197 | for e2 in el: 198 | if e1 != e2: 199 | element1 = dim[el[e1]] 200 | element2 = dim[el[e2]] 201 | 202 | num1 = e1.replace(element1, '') 203 | num2 = e2.replace(element2, '') 204 | 205 | if e1.startswith(num1) and e2.startswith(num2): 206 | spectrum.connect(el[e1], el[e2]) 207 | 208 | return spectrum 209 | 210 | 211 | class XeasyFileBuilder(object): 212 | """ 213 | XEASY output format builder. 214 | 215 | @param stream: destination stream, were the output is written 216 | @type stream: file 217 | """ 218 | 219 | def __init__(self, stream): 220 | self._out = stream 221 | 222 | def add_spectrum(self, spectrum): 223 | 224 | self.add_header(spectrum) 225 | self.add_peaks(spectrum) 226 | 227 | def add_header(self, spectrum): 228 | """ 229 | Write the XEASY header. 230 | 231 | @param spectrum: NOE spectrum 232 | @type spectrum: L{NOESpectrum} 233 | """ 234 | 235 | self._out.write( 236 | '# Number of dimensions {0}\n'.format(spectrum.num_dimensions)) 237 | 238 | conn = '' 239 | 240 | for en, e in enumerate(spectrum.dimensions, start=1): 241 | element = repr(e).upper() 242 | self._out.write('#INAME {0} {1}{0}\n'.format(en, element)) 243 | 244 | if spectrum.has_connected_dimensions(en - 1): 245 | conn += element.upper() 246 | else: 247 | conn += element.lower() 248 | 249 | self._out.write( 250 | '#CYANAFORMAT {0}\n'.format(conn)) 251 | 252 | def add_peaks(self, spectrum): 253 | """ 254 | Write all peaks from C{spectrum}. 255 | 256 | @param spectrum: NOE spectrum 257 | @type spectrum: L{NOESpectrum} 258 | """ 259 | 260 | for pn, peak in enumerate(spectrum, start=1): 261 | self._out.write("{0:5} ".format(pn)) 262 | 263 | for dim in range(spectrum.num_dimensions): 264 | data = "{0:7.3f} ".format(peak.get(dim)) 265 | self._out.write(data) 266 | 267 | self._out.write("2 U ") 268 | self._out.write("{0:18e} ".format(peak.intensity)) 269 | self._out.write("0.00e+00 m 0 0 0 0 0\n") 270 | -------------------------------------------------------------------------------- /csb/bio/io/procheck.py: -------------------------------------------------------------------------------- 1 | """ 2 | Procheck parser 3 | """ 4 | import os 5 | import re 6 | import shutil 7 | 8 | from csb.io import Shell 9 | from csb.io import TempFolder 10 | 11 | class ProcheckParser(): 12 | """ 13 | Simple Prochceck Summary parser 14 | """ 15 | def __init__(self): 16 | self.binary = 'procheck.scr' 17 | self.acc = 2.0 18 | 19 | def parse(self, fn): 20 | """ 21 | @param fn: source file to parse 22 | @type fn: str 23 | 24 | @return: dicttionary of parsed quality indicatiors 25 | """ 26 | info = dict() 27 | 28 | f_handler = open(os.path.expanduser(fn)) 29 | text = f_handler.read() 30 | 31 | input_file_name = re.compile('>>>-----.*?\n.*?\n' r'\s*\|\s*(\S+)\s+') 32 | residues = re.compile(r'(\d+)\s*residues\s\|') 33 | ramachandran_plot = re.compile(r'Ramachandran\splot:\s*(\d+\.\d+)' + 34 | r'%\s*core\s*(\d+\.\d+)%\s*allow\s*(\d+\.\d+)' + 35 | r'%\s*gener\s*(\d+\.\d+)%\s*disall') 36 | labelled_all = re.compile(r'Ramachandrans:\s*(\d+)\s*.*?out\sof\s*(\d+)') 37 | labelled_chi = re.compile(r'Chi1-chi2\splots:\s*(\d+)\s*.*?out\sof\s*(\d+)') 38 | bad_contacts = re.compile(r'Bad\scontacts:\s*(\d+)') 39 | g_factors = re.compile(r'G-factors\s*Dihedrals:\s*([0-9-+.]+)' + 40 | r'\s*Covalent:\s*([0-9-+.]+)\s*Overall:\s*([0-9-+.]+)') 41 | 42 | info['input_file'] = input_file_name.search(text).groups()[0] 43 | info['#residues'] = int(residues.search(text).groups()[0]) 44 | info['rama_core'], info['rama_allow'], info['rama_gener'], info['rama_disall'] = \ 45 | [float(g) for g in ramachandran_plot.search(text).groups()] 46 | info['g_dihedrals'], info['g_bond'], info['g_overall'] = \ 47 | [float(g) for g in g_factors.search(text).groups()] 48 | info['badContacts'] = int(bad_contacts.search(text).groups()[0]) 49 | info['labelledAll'] = float(labelled_all.search(text).groups()[0]) / \ 50 | float(labelled_all.search(text).groups()[1]) 51 | info['labelledChi'] = float(labelled_chi.search(text).groups()[0]) / \ 52 | float(labelled_chi.search(text).groups()[0]) 53 | 54 | f_handler.close() 55 | 56 | return info 57 | 58 | 59 | def run(self, pdb_file): 60 | """ 61 | Runs procheck for the given pdbfile and parses the output. 62 | Will fail if the procheck binary is not in the path. 63 | 64 | @param pdb_file: file to parse 65 | @return: dict of parsed values 66 | """ 67 | wd = os.getcwd() 68 | base = os.path.basename(pdb_file) 69 | 70 | with TempFolder() as tmp: 71 | shutil.copy(os.path.expanduser(pdb_file), tmp.name) 72 | os.chdir(tmp.name) 73 | Shell.run('{0} {1} {2}'.format(self.binary, 74 | os.path.join(tmp.name, base), 75 | self.acc)) 76 | summary = '.'.join([os.path.splitext(base)[0], 'sum']) 77 | out = self.parse(os.path.join(tmp.name, summary)) 78 | os.chdir(wd) 79 | 80 | return out 81 | -------------------------------------------------------------------------------- /csb/bio/io/psipred.py: -------------------------------------------------------------------------------- 1 | """ 2 | PSIPRED Parser 3 | """ 4 | 5 | import csb.core 6 | 7 | from csb.bio.structure import SecondaryStructure, SecStructures, UnknownSecStructureError 8 | 9 | 10 | class PSIPredParseError(ValueError): 11 | pass 12 | 13 | 14 | class PSIPredResidueInfo(object): 15 | 16 | def __init__(self, rank, residue, sec_structure, helix, strand, coil): 17 | 18 | self.rank = rank 19 | self.residue = residue 20 | self.sec_structure = sec_structure 21 | self.helix = helix 22 | self.coil = coil 23 | self.strand = strand 24 | 25 | 26 | class PSIPredParser(object): 27 | """ 28 | Simple PSI-PRED Secondary Structure Parser. 29 | """ 30 | 31 | def parse(self, psipred_file): 32 | """ 33 | @param psipred_file: source PSI-PRED *.horiz file to parse 34 | @type psipred_file: str 35 | @rtype: L{SecondaryStructure} 36 | """ 37 | 38 | ss = [] 39 | conf = [] 40 | 41 | for line in open(psipred_file): 42 | 43 | if line.startswith('Conf:'): 44 | conf.extend(line[6:].strip()) 45 | 46 | elif line.startswith('Pred:'): 47 | ss.append(line[6:].strip()) 48 | 49 | ss = ''.join(ss) 50 | conf = ''.join(conf) 51 | 52 | if len(ss) != len(conf): 53 | raise PSIPredParseError('Invalid PSI-PRED output file') 54 | 55 | if ss: 56 | return SecondaryStructure(ss, conf) 57 | else: 58 | return SecondaryStructure(None) 59 | 60 | def parse_scores(self, scores_file): 61 | """ 62 | @param scores_file: source PSI-PRED *.ss2 file to parse 63 | @type scores_file: str 64 | @rtype: list of L{PSIPredResidueInfo} 65 | """ 66 | residues = [] 67 | 68 | for line in open(scores_file): 69 | 70 | if line.startswith('#') or not line.strip(): 71 | continue 72 | else: 73 | line = line.split() 74 | 75 | rank = int(line[0]) 76 | residue = line[1] 77 | 78 | try: 79 | ss = csb.core.Enum.parse(SecStructures, line[2]) 80 | except csb.core.EnumValueError as e: 81 | raise UnknownSecStructureError(str(e)) 82 | 83 | coil, helix, strand = map(float, line[3:6]) 84 | 85 | residues.append(PSIPredResidueInfo(rank, residue, ss, helix, strand, coil)) 86 | 87 | return tuple(residues) 88 | -------------------------------------------------------------------------------- /csb/bio/io/svg.py: -------------------------------------------------------------------------------- 1 | """ 2 | Build SVG diagrams from various csb objects. 3 | """ 4 | 5 | import math 6 | import csb.core 7 | 8 | from csb.bio.structure import SecondaryStructure, SecStructures 9 | 10 | 11 | class SSCartoonBuilder(object): 12 | """ 13 | Creates 2D vector diagrams from L{SecondaryStructure} objects. 14 | 15 | @param ss: source secondary structure (either a SS string or a SS object) 16 | @type ss: str or L{SecondaryStructure} 17 | @param width: output width of the diagram in pixels 18 | @type width: int 19 | @param height: output height of the diagram in pixels 20 | @type height: int 21 | 22 | @param thickness: stroke-width (2px by default) 23 | @param helix: SVG color for helicies (red by default) 24 | @param strand: SVG color for strands (blue by default) 25 | @param coil: SVG color for coils (orange by default) 26 | @param gap: SVG color for gaps (grey by default) 27 | @param cap: stroke-linecap (round by default) 28 | """ 29 | 30 | def __init__(self, ss, width, height, thickness='2px', 31 | helix='#C24641', strand='#6698FF', coil='#FF8C00', gap='#E0E0E0', 32 | cap='round'): 33 | 34 | if ss: 35 | if isinstance(ss, csb.core.string): 36 | self._ss = SecondaryStructure(ss) 37 | else: 38 | self._ss = ss.clone() 39 | self._ss.to_three_state() 40 | self._residues = sum(e.length for e in self._ss) 41 | if self._residues == 0: 42 | raise ValueError('Zero-length secondary structure') 43 | else: 44 | raise ValueError('Invalid secondary structure') 45 | 46 | self.thickness = thickness 47 | self.helixcolor = helix 48 | self.strandcolor = strand 49 | self.coilcolor = coil 50 | self.gapcolor = gap 51 | self.cap = cap 52 | 53 | self._realwidth = float(width) 54 | self._width = self._realwidth - 2 # this is to compensate for antialiasing and rounded caps 55 | self._height = float(height) 56 | self._x = 0 57 | self._y = 0 58 | 59 | self._svg = '' 60 | 61 | def build(self): 62 | """ 63 | Build a SVG image using the current size and color settings. 64 | 65 | @return: SVG diagram 66 | @rtype: str (SVG document) 67 | """ 68 | 69 | self._x = 0 70 | self._y = 0 71 | self._svg = [r''' 73 | 74 | '''.format(self, self._height / 2.0)] 75 | 76 | for e in self._ss: 77 | 78 | if e.type == SecStructures.Helix: 79 | cartoon = self._helix(e.length) 80 | color = self.helixcolor 81 | 82 | elif e.type == SecStructures.Strand: 83 | cartoon = self._strand(e.length) 84 | color = self.strandcolor 85 | 86 | elif e.type == SecStructures.Coil: 87 | cartoon = self._coil(e.length) 88 | color = self.coilcolor 89 | 90 | elif e.type == SecStructures.Gap: 91 | cartoon = self._gap(e.length) 92 | color = self.gapcolor 93 | 94 | else: 95 | assert False, "Unhandled SS Type: {0!r}".format(e.type) 96 | 97 | path = r''' '''.format(color, self, cartoon) 99 | 100 | self._svg.append(path) 101 | 102 | self._svg.append(' ') 103 | self._svg.append('') 104 | return '\n'.join(self._svg) 105 | 106 | def _format(self, path): 107 | 108 | formatted = [] 109 | 110 | for i in path: 111 | 112 | if i == -0: 113 | i = 0 114 | 115 | if isinstance(i, float): 116 | i = round(i, ndigits=7) 117 | if i == -0: 118 | i = 0 119 | formatted.append('{0:.7f}'.format(i)) 120 | else: 121 | formatted.append(str(i)) 122 | 123 | return ' '.join(formatted) 124 | 125 | def _helix(self, length, arc_width=3.0): 126 | 127 | if length < 1: 128 | return '' 129 | 130 | helix_width = float(length) * self._width / self._residues 131 | helix_end = self._x + helix_width 132 | path = ['M', self._x, self._y, 'Q'] 133 | 134 | arcs = int(helix_width / arc_width) 135 | for i in range(1, arcs + 1): 136 | 137 | # quadratic bezier control points: sine curve's min, max and inflection points (0, 1, 0, -1, 0, 1 ...) 138 | # one arc is the curve from 0 to pi/2 139 | if i < arcs: 140 | # inner arc 141 | self._x += arc_width 142 | self._y = math.sin(math.pi * i / 2) * (self._height / 2.0) 143 | path.append(self._x) 144 | path.append(self._y) 145 | else: 146 | # last arc; stretch it to make the helix pixel-precise, ending also at y=0 147 | # also the number of arcs/controlpoints must be even, otherwise the path is broken 148 | 149 | # remaining pixels on x 150 | remainder = helix_end - self._x 151 | 152 | if i % 2 == 0: 153 | # even number of arcs, just extend the last arc with the remainder 154 | self._x += remainder 155 | self._y = 0 156 | path.append(self._x) 157 | path.append(self._y) 158 | else: 159 | # odd number of arcs 160 | 161 | # 1) keep this arc at the expected y, but stretch it half of the x remainder 162 | self._x += remainder / 2.0 163 | self._y = math.sin(math.pi * i / 2) * (self._height / 2.0) 164 | path.append(self._x) 165 | path.append(self._y) 166 | 167 | # 2) append a final arc, ending at [helix_end, 0] 168 | self._x += remainder / 2.0 169 | self._y = 0 170 | path.append(self._x) 171 | path.append(self._y) 172 | 173 | return self._format(path) 174 | 175 | def _strand(self, length, arrow_width=3.0): 176 | 177 | offset = 1.0 178 | strand_width = float(length) * self._width / self._residues 179 | path = ['M', self._x, self._y, 'H'] 180 | 181 | self._x += strand_width 182 | path.append(self._x) 183 | 184 | if offset < arrow_width < strand_width: 185 | arrow_start = self._x - offset - arrow_width 186 | path.extend(['M', self._x - offset, self._y]) 187 | path.extend(['L', arrow_start, self._y + self._height / 9]) 188 | path.extend(['L', arrow_start, self._y - self._height / 9]) 189 | path.extend(['L', self._x - offset, self._y]) 190 | 191 | return self._format(path) 192 | 193 | def _coil(self, length): 194 | 195 | coil_width = float(length) * self._width / self._residues 196 | path = ['M', self._x, self._y, 'Q'] 197 | 198 | # first control point 199 | self._x += coil_width / 2.0 200 | self._y = self._height / -2.0 201 | path.append(self._x) 202 | path.append(self._y) 203 | 204 | # second 205 | self._x += coil_width / 2.0 206 | self._y = 0 207 | path.append(self._x) 208 | path.append(self._y) 209 | 210 | return self._format(path) 211 | 212 | def _gap(self, length): 213 | 214 | return self._strand(length, arrow_width=0) 215 | -------------------------------------------------------------------------------- /csb/bio/io/vasco.py: -------------------------------------------------------------------------------- 1 | """ 2 | VASCO Chemical Shift format parser. 3 | """ 4 | 5 | import csb.core 6 | 7 | 8 | class ShiftInfo(object): 9 | 10 | def __init__(self, residue_id, amino_acid, nucleus, 11 | shift, element, secondary_structure): 12 | 13 | self.residue_id = residue_id 14 | self.nucleus = nucleus 15 | self.element = element 16 | self.amino_acid = amino_acid 17 | self.shift = shift 18 | self.secondary_structure = secondary_structure 19 | 20 | def __str__(self): 21 | return '{0.amino_acid} {0.nucleus} {0.shift}'.format(self) 22 | 23 | __repr__ = __str__ 24 | 25 | 26 | class ChemicalShiftContainer(csb.core.DictionaryContainer): 27 | 28 | def __init__(self, bmrb_id='', pdb_id='', sequence='', 29 | chain='', exptype=''): 30 | 31 | self.bmrb_id = bmrb_id 32 | self.pdb_id = pdb_id 33 | self.sequence = sequence 34 | self.chain = chain 35 | self.exptype = exptype 36 | 37 | super(ChemicalShiftContainer, self).__init__() 38 | 39 | class VascoStructureParser(object): 40 | """ 41 | Simple Vasco Parser 42 | """ 43 | 44 | def __init__(self): 45 | self._stream = None 46 | 47 | def parse(self, file_name, ignore_outliers=True): 48 | """ 49 | @param file_name: source file to parse 50 | @type file_name: str 51 | @return: a L{ChemicalShiftContainer} of L{ShiftInfo} objects 52 | @rtype: dict 53 | """ 54 | self._stream = open(file_name) 55 | shifts = self._parse_header() 56 | 57 | self._parse_shifts(shifts, ignore_outliers=ignore_outliers) 58 | self._stream.close() 59 | 60 | return shifts 61 | 62 | def _parse_header(self): 63 | 64 | bmrb_id = '' 65 | pdb_id = '' 66 | sequence = '' 67 | chain = '' 68 | exptype = '' 69 | self._stream.seek(0) 70 | 71 | while True: 72 | try: 73 | line = next(self._stream) 74 | except StopIteration : 75 | break 76 | 77 | if line.startswith('#'): 78 | if line[2:].startswith('BMRB ORIGIN'): 79 | bmrb_id = line[20:].strip() 80 | elif line[2:].startswith('PDB ORIGIN'): 81 | pdb_id = line[20:].strip() 82 | elif line[2:].startswith('SEQUENCE PDB'): 83 | sequence = line[20:].strip() 84 | chain = line[17] 85 | elif line[2:].startswith('PDB EXPTYPE'): 86 | exptype = line[20:].strip() 87 | else: 88 | break 89 | 90 | 91 | return ChemicalShiftContainer(bmrb_id, pdb_id, chain, 92 | sequence, exptype ) 93 | 94 | 95 | def _parse_shifts(self, data, ignore_outliers=True): 96 | 97 | while True: 98 | try: 99 | line = next(self._stream) 100 | except StopIteration: 101 | break 102 | 103 | if ignore_outliers and "Shift outlier" in line: 104 | continue 105 | 106 | chain_id = line[7] 107 | res_code = line[9:14].strip() 108 | res_label = line[16:19].strip() 109 | res_ss = line[21] 110 | nucleus_name = line[23:28].strip() 111 | nucleus_element = line[41] 112 | shift = float(line[43:52]) 113 | 114 | info = ShiftInfo(res_code, res_label, 115 | nucleus_name, shift, 116 | nucleus_element, res_ss) 117 | 118 | if not chain_id in data: 119 | data.append(chain_id, csb.core.OrderedDict()) 120 | 121 | if not res_code in data[chain_id]: 122 | data[chain_id][res_code] = {} 123 | 124 | 125 | data[chain_id][res_code][nucleus_name] = info 126 | 127 | -------------------------------------------------------------------------------- /csb/bio/io/whatif.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple WhatIf/WhatCheck Summary parser 3 | """ 4 | 5 | import re 6 | import os 7 | import shutil 8 | 9 | from csb.io import Shell 10 | from csb.io import TempFolder 11 | 12 | class WhatCheckParser(object): 13 | """ 14 | Simple WhatIf/WhatCheck Summary parser 15 | """ 16 | 17 | def __init__(self, binary='DO_WHATCHECK.COM'): 18 | self.binary = binary 19 | 20 | def parse_summary(self, fn): 21 | """ 22 | @param fn: whatif pdbout.txt file to parse 23 | @type fn: str 24 | 25 | @return: A dict containing some of the WhatCheck results 26 | @rtype: a dict 27 | """ 28 | f_handler = open(os.path.expanduser(fn)) 29 | text = f_handler.read() 30 | 31 | info = dict() 32 | re_ramachandran = re.compile(r'Ramachandran\s*Z-score\s*:\s*([0-9.Ee-]+)') 33 | re_1st = re.compile(r'1st\s*generation\s*packing\s*quality\s*:\s*([0-9.Ee-]+)') 34 | re_2nd = re.compile(r'2nd\s*generation\s*packing\s*quality\s*:\s*([0-9.Ee-]+)') 35 | re_backbone = re.compile(r'Backbone\s*conformation\s*Z-score\s*:\s*([0-9.Ee-]+)') 36 | re_rotamer = re.compile(r'chi-1\S*chi-2\s*rotamer\s*normality\s*:\s*([0-9.Ee-]+)') 37 | 38 | 39 | info['rama_z_score'] = float(re_ramachandran.search(text).groups(0)[0]) 40 | info['bb_z_score'] = float(re_backbone.search(text).groups(0)[0]) 41 | info['1st_packing_z_score'] = float(re_1st.search(text).groups(0)[0]) 42 | info['2nd_packing_z_score'] = float(re_2nd.search(text).groups(0)[0]) 43 | info['rotamer_score'] = float(re_rotamer.search(text).groups(0)[0]) 44 | 45 | f_handler.close() 46 | return info 47 | 48 | parse = parse_summary 49 | 50 | 51 | def run(self, pdb_file): 52 | """ 53 | Runs WhatCheck for the given pdbfile and parses the output. 54 | Will fail if the WhatCheck binary is not in the path. 55 | 56 | @param pdb_file: file to parse 57 | @return: dict of parsed values 58 | """ 59 | wd = os.getcwd() 60 | base = os.path.basename(pdb_file) 61 | 62 | with TempFolder() as tmp: 63 | shutil.copy(os.path.expanduser(pdb_file), tmp.name) 64 | os.chdir(tmp.name) 65 | Shell.run('{0} {1}'.format(self.binary, 66 | os.path.join(tmp.name, base))) 67 | out = self.parse_summary(os.path.join(tmp.name, 'pdbout.txt')) 68 | os.chdir(wd) 69 | 70 | return out 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /csb/bio/nmr/resources/RandomCoil.Corrections.tsv: -------------------------------------------------------------------------------- 1 | ALA H -0.01 -0.05 0.07 -0.10 2 | ALA HA -0.02 -0.03 -0.03 0.00 3 | ALA C -0.11 -0.77 -0.07 -0.02 4 | ALA CA -0.02 -0.17 0.06 0.01 5 | ALA N -0.12 -0.33 -0.57 -0.15 6 | ASN H -0.01 -0.03 0.13 -0.07 7 | ASN HA -0.01 -0.01 -0.02 -0.01 8 | ASN C -0.09 -0.66 -0.10 -0.03 9 | ASN CA -0.06 -0.03 0.23 0.01 10 | ASN N -0.18 -0.26 0.87 -0.17 11 | ASP H -0.02 -0.03 0.14 -0.11 12 | ASP HA -0.02 -0.01 -0.02 -0.01 13 | ASP C -0.08 -0.58 -0.13 -0.04 14 | ASP CA -0.03 0.00 0.25 -0.01 15 | ASP N -0.12 -0.20 0.86 -0.29 16 | ARG H 0.00 -0.02 0.15 -0.06 17 | ARG HA -0.02 -0.02 -0.02 0.00 18 | ARG C -0.06 -0.49 -0.19 -0.03 19 | ARG CA 0.00 -0.07 -0.01 0.02 20 | ARG N -0.06 -0.14 1.62 -0.06 21 | CYS H 0.00 -0.02 0.20 -0.07 22 | CYS HA -0.01 0.02 0.00 0.00 23 | CYS C -0.08 -0.51 -0.28 -0.07 24 | CYS CA -0.03 -0.07 0.10 -0.01 25 | CYS N -0.06 -0.26 3.07 0.00 26 | GLN H -0.01 -0.02 0.15 -0.06 27 | GLN HA -0.01 -0.02 -0.01 0.00 28 | GLN C -0.05 -0.48 -0.18 -0.03 29 | GLN CA -0.02 -0.06 0.04 0.01 30 | GLN N -0.06 -0.14 1.62 -0.06 31 | GLU H -0.01 -0.03 0.15 -0.07 32 | GLU HA -0.02 -0.02 -0.02 0.00 33 | GLU C -0.09 -0.48 -0.20 -0.03 34 | GLU CA -0.01 -0.08 0.05 0.01 35 | GLU N -0.06 -0.20 1.51 -0.12 36 | GLY H 0.00 0.00 0.00 0.00 37 | GLY HA2 0.00 0.00 0.00 0.00 38 | GLY C 0.00 0.00 0.00 0.00 39 | GLY CA 0.00 0.00 0.00 0.00 40 | GLY N 0.00 0.00 0.00 0.00 41 | HIS H -0.01 -0.04 0.20 0.00 42 | HIS HA -0.03 -0.06 0.01 0.01 43 | HIS C -0.10 -0.65 -0.22 -0.07 44 | HIS CA -0.05 -0.09 0.02 0.01 45 | HIS N -0.12 -0.55 1.68 0.17 46 | ILE H -0.01 -0.06 0.17 -0.09 47 | ILE HA -0.03 -0.02 -0.02 -0.01 48 | ILE C -0.20 -0.58 -0.18 -0.02 49 | ILE CA -0.07 -0.20 -0.01 0.02 50 | ILE N -0.18 -0.14 4.87 0.00 51 | LEU H 0.00 -0.03 0.14 -0.08 52 | LEU HA -0.04 -0.03 -0.05 -0.01 53 | LEU C -0.13 -0.50 -0.13 -0.01 54 | LEU CA -0.01 -0.10 0.03 0.02 55 | LEU N -0.06 -0.14 1.05 -0.06 56 | LYS H 0.00 -0.03 0.14 -0.06 57 | LYS HA -0.02 -0.02 -0.01 0.00 58 | LYS C -0.08 -0.50 -0.18 -0.03 59 | LYS CA -0.01 -0.11 -0.02 0.02 60 | LYS N -0.06 -0.20 1.57 -0.06 61 | MET H 0.00 -0.02 0.15 -0.06 62 | MET HA -0.02 -0.01 -0.01 0.00 63 | MET C -0.08 -0.41 -0.18 -0.02 64 | MET CA 0.00 0.10 -0.06 0.01 65 | MET N -0.06 -0.20 1.57 -0.06 66 | PHE H -0.03 -0.12 0.10 -0.37 67 | PHE HA -0.06 -0.09 -0.08 -0.04 68 | PHE C -0.27 -0.83 -0.25 -0.10 69 | PHE CA -0.07 -0.23 0.06 0.01 70 | PHE N -0.18 -0.49 2.78 -0.46 71 | PRO H -0.04 -0.18 0.19 -0.12 72 | PRO HA -0.01 0.11 -0.03 -0.01 73 | PRO C -0.47 -2.84 -0.09 -0.02 74 | PRO CA -0.22 -2.00 0.02 0.04 75 | PRO N -0.18 -0.32 0.87 -0.17 76 | SER H 0.00 -0.03 0.16 -0.08 77 | SER HA -0.01 0.02 0.00 -0.01 78 | SER C -0.08 -0.40 -0.15 -0.06 79 | SER CA 0.00 -0.08 0.13 0.00 80 | SER N -0.06 -0.03 2.55 -0.17 81 | THR H 0.01 0.00 0.14 -0.06 82 | THR HA -0.01 0.05 0.00 -0.01 83 | THR C -0.08 -0.19 -0.13 -0.05 84 | THR CA -0.01 -0.04 0.12 0.00 85 | THR N -0.06 -0.03 2.78 -0.12 86 | TRP H -0.08 -0.13 0.04 -0.62 87 | TRP HA -0.08 -0.10 -0.15 -0.16 88 | TRP C -0.26 -0.85 -0.30 -0.17 89 | TRP CA -0.02 -0.17 0.03 -0.08 90 | TRP N 0.00 -0.26 3.19 -0.64 91 | TYR H -0.04 -0.11 0.09 -0.42 92 | TYR HA -0.05 -0.10 -0.08 -0.04 93 | TYR C -0.28 -0.85 -0.24 -0.13 94 | TYR CA -0.07 -0.22 0.06 -0.01 95 | TYR N -0.24 -0.43 3.01 -0.52 96 | VAL H -0.01 -0.05 0.17 -0.08 97 | VAL HA -0.02 -0.01 -0.02 -0.01 98 | VAL C -0.20 -0.57 -0.18 -0.03 99 | VAL CA -0.07 -0.21 -0.02 0.01 100 | VAL N -0.24 -0.14 4.34 -0.06 101 | -------------------------------------------------------------------------------- /csb/bio/nmr/resources/RandomCoil.Reference.tsv: -------------------------------------------------------------------------------- 1 | ALA H 8.35 2 | ALA HA 4.35 3 | ALA HB 1.42 4 | ALA C 178.5 5 | ALA CA 52.82 6 | ALA CB 19.26 7 | ALA N 125 8 | ASN H 8.51 9 | ASN HA 4.79 10 | ASN HB2 2.88 11 | ASN HB3 2.81 12 | ASN HD21 7.59 13 | ASN HD22 7.01 14 | ASN C 176.1 15 | ASN CA 53.33 16 | ASN CB 39.09 17 | ASN CG 177.3 18 | ASN N 119 19 | ASP H 8.56 20 | ASP HA 4.82 21 | ASP HB2 2.98 22 | ASP HB3 2.91 23 | ASP C 175.9 24 | ASP CA 52.99 25 | ASP CB 38.33 26 | ASP CG 177.4 27 | ASP N 119.1 28 | ARG H 8.39 29 | ARG HA 4.38 30 | ARG HB2 1.91 31 | ARG HB3 1.79 32 | ARG HG2 1.68 33 | ARG HG3 1.64 34 | ARG HD2 3.2 35 | ARG HD3 3.2 36 | ARG HE 7.2 37 | ARG C 177.1 38 | ARG CA 56.48 39 | ARG CB 30.93 40 | ARG CG 27.33 41 | ARG CD 43.55 42 | ARG CZ 159.7 43 | ARG N 121.2 44 | CYS H 8.44 45 | CYS HA 4.59 46 | CYS HB2 2.98 47 | CYS HB3 2.98 48 | CYS C 175.3 49 | CYS CA 58.63 50 | CYS CB 28.34 51 | CYS N 118.8 52 | GLN H 8.44 53 | GLN HA 4.38 54 | GLN HB2 2.17 55 | GLN HB3 2.01 56 | GLN HG2 2.39 57 | GLN HG3 2.39 58 | GLN HE21 7.5 59 | GLN HE22 6.91 60 | GLN C 176.8 61 | GLN CA 56.22 62 | GLN CB 29.53 63 | GLN CG 33.96 64 | GLN CD 180.5 65 | GLN N 120.5 66 | GLU H 8.4 67 | GLU HA 4.42 68 | GLU HB2 2.18 69 | GLU HB3 2.01 70 | GLU HG2 2.5 71 | GLU HG3 2.5 72 | GLU C 176.8 73 | GLU CA 56.09 74 | GLU CB 28.88 75 | GLU CG 32.88 76 | GLU CD 180 77 | GLU N 120.2 78 | GLY H 8.41 79 | GLY HA2 4.02 80 | GLY HA 4.02 81 | GLY C 174.9 82 | GLY CA 45.39 83 | GLY N 107.5 84 | HIS H 8.56 85 | HIS HA 4.79 86 | HIS HB2 3.35 87 | HIS HB3 3.19 88 | HIS HE1 8.61 89 | HIS HD2 7.31 90 | HIS C 175.1 91 | HIS CA 55.39 92 | HIS CB 29.12 93 | HIS CE1 136.4 94 | HIS CD2 120.2 95 | HIS CG 131.4 96 | HIS N 118.1 97 | ILE H 8.17 98 | ILE HA 4.21 99 | ILE HB 1.89 100 | ILE HG12 1.48 101 | ILE HG13 1.19 102 | ILE HG2 0.93 103 | ILE HD1 0.88 104 | ILE C 177.1 105 | ILE CA 61.62 106 | ILE CB 38.91 107 | ILE CG1 27.46 108 | ILE CG2 17.47 109 | ILE CD1 13.16 110 | ILE N 120.4 111 | LEU H 8.28 112 | LEU HA 4.38 113 | LEU HB2 1.67 114 | LEU HB3 1.62 115 | LEU HG 1.62 116 | LEU HD1 0.93 117 | LEU HD2 0.88 118 | LEU C 178.2 119 | LEU CA 55.47 120 | LEU CB 42.46 121 | LEU CG 27.11 122 | LEU CD1 24.99 123 | LEU CD2 23.32 124 | LEU N 122.4 125 | LYS H 8.36 126 | LYS HA 4.36 127 | LYS HB2 1.89 128 | LYS HB3 1.77 129 | LYS HG2 1.47 130 | LYS HG3 1.42 131 | LYS HD2 1.68 132 | LYS HD3 1.68 133 | LYS C 177.4 134 | LYS CA 56.71 135 | LYS CB 33.21 136 | LYS CG 25.01 137 | LYS CD 29.33 138 | LYS CE 42.35 139 | LYS N 121.6 140 | MET H 8.42 141 | MET HA 4.52 142 | MET HB2 2.15 143 | MET HB3 2.03 144 | MET HG2 2.63 145 | MET HG3 2.64 146 | MET HE 2.11 147 | MET C 177.1 148 | MET CA 55.77 149 | MET CB 32.94 150 | MET CG 32.25 151 | MET CE 16.96 152 | MET N 120.3 153 | PHE H 8.31 154 | PHE HA 4.65 155 | PHE HB2 3.19 156 | PHE HB3 3.04 157 | PHE HD1 7.28 158 | PHE HE1 7.38 159 | PHE HZ 7.33 160 | PHE HE2 7.38 161 | PHE HD2 7.28 162 | PHE C 176.6 163 | PHE CA 58.09 164 | PHE CB 39.75 165 | PHE CG 139.2 166 | PHE CD1 132 167 | PHE CE1 131.5 168 | PHE CZ 130 169 | PHE CE2 131.5 170 | PHE CD2 132 171 | PHE N 120.7 172 | PRO HA 4.45 173 | PRO HB2 2.29 174 | PRO HB3 1.99 175 | PRO HG2 2.04 176 | PRO HG3 2.04 177 | PRO HD2 3.67 178 | PRO HD3 3.61 179 | PRO C 177.8 180 | PRO CA 63.7 181 | PRO CB 32.22 182 | PRO CG 27.32 183 | PRO CD 49.81 184 | PRO N 135.8 185 | SER H 8.43 186 | SER HA 4.51 187 | SER HB2 3.95 188 | SER HB3 3.9 189 | SER C 175.4 190 | SER CA 58.67 191 | SER CB 64.06 192 | SER N 115.5 193 | THR H 8.25 194 | THR HA 4.43 195 | THR HB 4.33 196 | THR HG2 1.22 197 | THR C 175.6 198 | THR CA 62.01 199 | THR CB 70.01 200 | THR CG2 21.6 201 | THR N 112 202 | TRP H 8.22 203 | TRP HA 4.7 204 | TRP HB2 3.34 205 | TRP HB3 3.25 206 | TRP HE1 10.63 207 | TRP HD1 7.28 208 | TRP HE3 7.65 209 | TRP HZ3 7.18 210 | TRP HH2 7.26 211 | TRP HZ2 7.51 212 | TRP C 177.1 213 | TRP CA 57.6 214 | TRP CB 29.75 215 | TRP CD1 127.4 216 | TRP CG 111.7 217 | TRP CE3 122.2 218 | TRP CZ3 124.8 219 | TRP CH2 121.1 220 | TRP CZ2 114.8 221 | TRP CE2 139 222 | TRP CD2 129.6 223 | TRP N 122.1 224 | TYR H 8.26 225 | TYR HA 4.58 226 | TYR HB2 3.09 227 | TYR HB3 2.97 228 | TYR HD1 7.15 229 | TYR HE1 6.86 230 | TYR HE2 6.86 231 | TYR HD2 7.15 232 | TYR C 176.7 233 | TYR CA 58.28 234 | TYR CB 38.94 235 | TYR CG 130.8 236 | TYR CD1 133.3 237 | TYR CE1 118.3 238 | TYR CZ 157.5 239 | TYR CE2 118.3 240 | TYR CD2 133.3 241 | TYR N 120.9 242 | VAL H 8.16 243 | VAL HA 4.16 244 | VAL HB 2.11 245 | VAL HG1 0.96 246 | VAL HG2 0.96 247 | VAL C 177 248 | VAL CA 62.61 249 | VAL CB 32.82 250 | VAL CG1 21.11 251 | VAL CG2 20.34 252 | VAL N 119.3 253 | -------------------------------------------------------------------------------- /csb/numeric/integrators.py: -------------------------------------------------------------------------------- 1 | """ 2 | provides various integration schemes and an abstract gradient class. 3 | """ 4 | 5 | import numpy 6 | 7 | from abc import ABCMeta, abstractmethod 8 | from csb.statistics.samplers.mc import State, TrajectoryBuilder 9 | from csb.numeric import InvertibleMatrix 10 | 11 | 12 | class AbstractIntegrator(object): 13 | """ 14 | Abstract integrator class. Subclasses implement different integration 15 | schemes for solving deterministic equations of motion. 16 | 17 | @param timestep: Integration timestep 18 | @type timestep: float 19 | 20 | @param gradient: Gradient of potential energy 21 | @type gradient: L{AbstractGradient} 22 | """ 23 | 24 | __metaclass__ = ABCMeta 25 | 26 | def __init__(self, timestep, gradient): 27 | 28 | self._timestep = timestep 29 | self._gradient = gradient 30 | 31 | def integrate(self, init_state, length, mass_matrix=None, return_trajectory=False): 32 | """ 33 | Integrates equations of motion starting from an initial state a certain 34 | number of steps. 35 | 36 | @param init_state: Initial state from which to start integration 37 | @type init_state: L{State} 38 | 39 | @param length: Nubmer of integration steps to be performed 40 | @type length: int 41 | 42 | @param mass_matrix: Mass matrix 43 | @type mass_matrix: n-dimensional L{InvertibleMatrix} with n being the dimension 44 | of the configuration space, that is, the dimension of 45 | the position / momentum vectors 46 | 47 | @param return_trajectory: Return complete L{Trajectory} instead of the initial 48 | and final states only (L{PropagationResult}). This reduces 49 | performance. 50 | @type return_trajectory: boolean 51 | 52 | @rtype: L{AbstractPropagationResult} 53 | """ 54 | 55 | builder = TrajectoryBuilder.create(full=return_trajectory) 56 | 57 | builder.add_initial_state(init_state) 58 | state = init_state.clone() 59 | 60 | for i in range(length - 1): 61 | state = self.integrate_once(state, i, mass_matrix=mass_matrix) 62 | builder.add_intermediate_state(state) 63 | 64 | state = self.integrate_once(state, length - 1, mass_matrix=mass_matrix) 65 | builder.add_final_state(state) 66 | 67 | return builder.product 68 | 69 | @abstractmethod 70 | def integrate_once(self, state, current_step, mass_matrix=None): 71 | """ 72 | Integrates one step starting from an initial state and an initial time 73 | given by the product of the timestep and the current_step parameter. 74 | The input C{state} is changed in place. 75 | 76 | @param state: State which to evolve one integration step 77 | @type state: L{State} 78 | 79 | @param current_step: Current integration step 80 | @type current_step: int 81 | 82 | @param mass_matrix: mass matrix 83 | @type mass_matrix: n-dimensional numpy array with n being the dimension 84 | of the configuration space, that is, the dimension of 85 | the position / momentum vectors 86 | @return: the altered state 87 | @rtype: L{State} 88 | """ 89 | pass 90 | 91 | def _get_inverse(self, mass_matrix): 92 | 93 | inverse_mass_matrix = None 94 | if mass_matrix is None: 95 | inverse_mass_matrix = 1.0 96 | else: 97 | if mass_matrix.is_unity_multiple: 98 | inverse_mass_matrix = mass_matrix.inverse[0][0] 99 | else: 100 | inverse_mass_matrix = mass_matrix.inverse 101 | 102 | return inverse_mass_matrix 103 | 104 | class LeapFrog(AbstractIntegrator): 105 | """ 106 | Leap Frog integration scheme implementation that calculates position and 107 | momenta at equal times. Slower than FastLeapFrog, but intermediate points 108 | in trajectories obtained using 109 | LeapFrog.integrate(init_state, length, return_trajectoy=True) are physical. 110 | """ 111 | 112 | def integrate_once(self, state, current_step, mass_matrix=None): 113 | 114 | inverse_mass_matrix = self._get_inverse(mass_matrix) 115 | 116 | i = current_step 117 | 118 | if i == 0: 119 | self._oldgrad = self._gradient(state.position, 0.) 120 | 121 | momentumhalf = state.momentum - 0.5 * self._timestep * self._oldgrad 122 | state.position = state.position + self._timestep * numpy.dot(inverse_mass_matrix, momentumhalf) 123 | self._oldgrad = self._gradient(state.position, (i + 1) * self._timestep) 124 | state.momentum = momentumhalf - 0.5 * self._timestep * self._oldgrad 125 | 126 | return state 127 | 128 | class FastLeapFrog(LeapFrog): 129 | """ 130 | Leap Frog integration scheme implementation that calculates position and 131 | momenta at unequal times by concatenating the momentum updates of two 132 | successive integration steps. 133 | WARNING: intermediate points in trajectories obtained by 134 | FastLeapFrog.integrate(init_state, length, return_trajectories=True) 135 | are NOT to be interpreted as phase-space trajectories, because 136 | position and momenta are not given at equal times! In the initial and the 137 | final state, positions and momenta are given at equal times. 138 | """ 139 | 140 | def integrate(self, init_state, length, mass_matrix=None, return_trajectory=False): 141 | 142 | inverse_mass_matrix = self._get_inverse(mass_matrix) 143 | 144 | builder = TrajectoryBuilder.create(full=return_trajectory) 145 | 146 | builder.add_initial_state(init_state) 147 | state = init_state.clone() 148 | 149 | state.momentum = state.momentum - 0.5 * self._timestep * self._gradient(state.position, 0.) 150 | 151 | for i in range(length-1): 152 | state.position = state.position + self._timestep * numpy.dot(inverse_mass_matrix, state.momentum) 153 | state.momentum = state.momentum - self._timestep * \ 154 | self._gradient(state.position, (i + 1) * self._timestep) 155 | builder.add_intermediate_state(state) 156 | 157 | state.position = state.position + self._timestep * numpy.dot(inverse_mass_matrix, state.momentum) 158 | state.momentum = state.momentum - 0.5 * self._timestep * \ 159 | self._gradient(state.position, length * self._timestep) 160 | builder.add_final_state(state) 161 | 162 | return builder.product 163 | 164 | class VelocityVerlet(AbstractIntegrator): 165 | """ 166 | Velocity Verlet integration scheme implementation. 167 | """ 168 | 169 | def integrate_once(self, state, current_step, mass_matrix=None): 170 | 171 | inverse_mass_matrix = self._get_inverse(mass_matrix) 172 | 173 | i = current_step 174 | 175 | if i == 0: 176 | self._oldgrad = self._gradient(state.position, 0.) 177 | 178 | state.position = state.position + self._timestep * numpy.dot(inverse_mass_matrix, state.momentum) \ 179 | - 0.5 * self._timestep ** 2 * numpy.dot(inverse_mass_matrix, self._oldgrad) 180 | newgrad = self._gradient(state.position, (i + 1) * self._timestep) 181 | state.momentum = state.momentum - 0.5 * self._timestep * (self._oldgrad + newgrad) 182 | self._oldgrad = newgrad 183 | 184 | return state 185 | 186 | class AbstractGradient(object): 187 | """ 188 | Abstract gradient class. Implementations evaluate the gradient of an energy 189 | function. 190 | """ 191 | 192 | __metaclass__ = ABCMeta 193 | 194 | @abstractmethod 195 | def evaluate(self, q, t): 196 | """ 197 | Evaluates the gradient at position q and time t. 198 | 199 | @param q: Position array 200 | @type q: One-dimensional numpy array 201 | 202 | @param t: Time 203 | @type t: float 204 | 205 | @rtype: numpy array 206 | """ 207 | pass 208 | 209 | def __call__(self, q, t): 210 | """ 211 | Evaluates the gradient at position q and time t. 212 | 213 | @param q: Position array 214 | @type q: One-dimensional numpy array 215 | 216 | @param t: Time 217 | @type t: float 218 | 219 | @rtype: numpy array 220 | """ 221 | State.check_flat_array(q) 222 | return self.evaluate(q, t) 223 | -------------------------------------------------------------------------------- /csb/statistics/ars.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adaptive Rejection Sampling (ARS) 3 | 4 | The ARS class generates a single random sample from a 5 | univariate distribution specified by an instance of the 6 | LogProb class, implemented by the user. An instance of 7 | LogProb returns the log of the probability density and 8 | its derivative. The log probability function passed must 9 | be concave. 10 | 11 | The user must also supply initial guesses. It is not 12 | essential that these values be very accurate, but performance 13 | will generally depend on their accuracy. 14 | """ 15 | 16 | from numpy import exp, log 17 | 18 | class Envelope(object): 19 | """ 20 | Envelope function for adaptive rejection sampling. 21 | 22 | The envelope defines a piecewise linear upper and lower 23 | bounding function of the concave log-probability. 24 | """ 25 | def __init__(self, x, h, dh): 26 | 27 | from numpy import array, inf 28 | 29 | self.x = array(x) 30 | self.h = array(h) 31 | self.dh = array(dh) 32 | self.z0 = -inf 33 | self.zk = inf 34 | 35 | def z(self): 36 | """ 37 | Support intervals for upper bounding function. 38 | """ 39 | from numpy import concatenate 40 | 41 | h = self.h 42 | dh = self.dh 43 | x = self.x 44 | 45 | z = (h[1:] - h[:-1] + x[:-1] * dh[:-1] - x[1:] * dh[1:]) / \ 46 | (dh[:-1] - dh[1:]) 47 | 48 | return concatenate(([self.z0], z, [self.zk])) 49 | 50 | def u(self, x): 51 | """ 52 | Piecewise linear upper bounding function. 53 | """ 54 | z = self.z()[1:-1] 55 | j = (x > z).sum() 56 | 57 | return self.h[j] + self.dh[j] * (x - self.x[j]) 58 | 59 | def u_max(self): 60 | 61 | z = self.z()[1:-1] 62 | 63 | return (self.h + self.dh * (z - self.x)).max() 64 | 65 | def l(self, x): 66 | """ 67 | Piecewise linear lower bounding function. 68 | """ 69 | from numpy import inf 70 | 71 | j = (x > self.x).sum() 72 | 73 | if j == 0 or j == len(self.x): 74 | return -inf 75 | else: 76 | j -= 1 77 | return ((self.x[j + 1] - x) * self.h[j] + (x - self.x[j]) * self.h[j + 1]) / \ 78 | (self.x[j + 1] - self.x[j]) 79 | 80 | def insert(self, x, h, dh): 81 | """ 82 | Insert new support point for lower bounding function 83 | (and indirectly for upper bounding function). 84 | """ 85 | from numpy import concatenate 86 | 87 | j = (x > self.x).sum() 88 | 89 | self.x = concatenate((self.x[:j], [x], self.x[j:])) 90 | self.h = concatenate((self.h[:j], [h], self.h[j:])) 91 | self.dh = concatenate((self.dh[:j], [dh], self.dh[j:])) 92 | 93 | def log_masses(self): 94 | 95 | from numpy import abs, putmask 96 | 97 | z = self.z() 98 | b = self.h - self.x * self.dh 99 | a = abs(self.dh) 100 | m = (self.dh > 0) 101 | q = self.x * 0. 102 | putmask(q, m, z[1:]) 103 | putmask(q, 1 - m, z[:-1]) 104 | 105 | log_M = b - log(a) + log(1 - exp(-a * (z[1:] - z[:-1]))) + \ 106 | self.dh * q 107 | 108 | return log_M 109 | 110 | def masses(self): 111 | 112 | z = self.z() 113 | b = self.h - self.x * self.dh 114 | a = self.dh 115 | 116 | return exp(b) * (exp(a * z[1:]) - exp(a * z[:-1])) / a 117 | 118 | def sample(self): 119 | 120 | from numpy.random import random 121 | from numpy import add 122 | from csb.numeric import log_sum_exp 123 | 124 | log_m = self.log_masses() 125 | log_M = log_sum_exp(log_m) 126 | c = add.accumulate(exp(log_m - log_M)) 127 | u = random() 128 | j = (u > c).sum() 129 | 130 | a = self.dh[j] 131 | z = self.z() 132 | 133 | xmin, xmax = z[j], z[j + 1] 134 | 135 | u = random() 136 | 137 | if a > 0: 138 | return xmax + log(u + (1 - u) * exp(-a * (xmax - xmin))) / a 139 | else: 140 | return xmin + log(u + (1 - u) * exp(a * (xmax - xmin))) / a 141 | 142 | 143 | class LogProb(object): 144 | 145 | def __call__(self, x): 146 | raise NotImplementedError() 147 | 148 | class Gauss(LogProb): 149 | 150 | def __init__(self, mu, sigma=1.): 151 | 152 | self.mu = float(mu) 153 | self.sigma = float(sigma) 154 | 155 | def __call__(self, x): 156 | 157 | return -0.5 * (x - self.mu) ** 2 / self.sigma ** 2, \ 158 | - (x - self.mu) / self.sigma ** 2 159 | 160 | 161 | class ARS(object): 162 | 163 | from numpy import inf 164 | 165 | def __init__(self, logp): 166 | 167 | self.logp = logp 168 | 169 | def initialize(self, x, z0=-inf, zmax=inf): 170 | 171 | from numpy import array 172 | 173 | self.hull = Envelope(array(x), *self.logp(array(x))) 174 | self.hull.z0 = z0 175 | self.hull.zk = zmax 176 | 177 | def sample(self, maxiter=100): 178 | 179 | from numpy.random import random 180 | 181 | for i in range(maxiter): 182 | 183 | x = self.hull.sample() 184 | l = self.hull.l(x) 185 | u = self.hull.u(x) 186 | w = random() 187 | 188 | if w <= exp(l - u): return x 189 | 190 | h, dh = self.logp(x) 191 | 192 | if w <= exp(h - u): return x 193 | 194 | self.hull.insert(x, h, dh) 195 | -------------------------------------------------------------------------------- /csb/statistics/rand.py: -------------------------------------------------------------------------------- 1 | """ 2 | Random number generators 3 | """ 4 | 5 | def probability_transform(shape, inv_cum, cum_min=0., cum_max=1.): 6 | """ 7 | Generic sampler based on the probability transform. 8 | 9 | @param shape: shape of the random sample 10 | @param inv_cum: inversion of the cumulative density function from which one seeks to sample 11 | @param cum_min: lower value of the cumulative distribution 12 | @param cum_max: upper value of the cumulative distribution 13 | @return: random variates of the PDF implied by the inverse cumulative distribution 14 | """ 15 | from numpy.random import random 16 | 17 | return inv_cum(cum_min + random(shape) * (cum_max - cum_min)) 18 | 19 | def truncated_gamma(shape=None, alpha=1., beta=1., x_min=None, x_max=None): 20 | """ 21 | Generate random variates from a lower-and upper-bounded gamma distribution. 22 | 23 | @param shape: shape of the random sample 24 | @param alpha: shape parameter (alpha > 0.) 25 | @param beta: scale parameter (beta >= 0.) 26 | @param x_min: lower bound of variate 27 | @param x_max: upper bound of variate 28 | @return: random variates of lower-bounded gamma distribution 29 | """ 30 | from scipy.special import gammainc, gammaincinv 31 | from numpy.random import gamma 32 | from numpy import inf 33 | 34 | if x_min is None and x_max is None: 35 | return gamma(alpha, 1 / beta, shape) 36 | elif x_min is None: 37 | x_min = 0. 38 | elif x_max is None: 39 | x_max = inf 40 | 41 | x_min = max(0., x_min) 42 | x_max = min(1e300, x_max) 43 | 44 | a = gammainc(alpha, beta * x_min) 45 | b = gammainc(alpha, beta * x_max) 46 | 47 | return probability_transform(shape, 48 | lambda x, alpha=alpha: gammaincinv(alpha, x), 49 | a, b) / beta 50 | 51 | def truncated_normal(shape=None, mu=0., sigma=1., x_min=None, x_max=None): 52 | """ 53 | Generates random variates from a lower-and upper-bounded normal distribution 54 | 55 | @param shape: shape of the random sample 56 | @param mu: location parameter 57 | @param sigma: width of the distribution (sigma >= 0.) 58 | @param x_min: lower bound of variate 59 | @param x_max: upper bound of variate 60 | @return: random variates of lower-bounded normal distribution 61 | """ 62 | from scipy.special import erf, erfinv 63 | from numpy.random import standard_normal 64 | from numpy import inf, sqrt 65 | 66 | if x_min is None and x_max is None: 67 | return standard_normal(shape) * sigma + mu 68 | elif x_min is None: 69 | x_min = -inf 70 | elif x_max is None: 71 | x_max = inf 72 | 73 | x_min = max(-1e300, x_min) 74 | x_max = min(+1e300, x_max) 75 | var = sigma ** 2 + 1e-300 76 | sigma = sqrt(2 * var) 77 | 78 | a = erf((x_min - mu) / sigma) 79 | b = erf((x_max - mu) / sigma) 80 | 81 | return probability_transform(shape, erfinv, a, b) * sigma + mu 82 | 83 | def sample_dirichlet(alpha, n_samples=1): 84 | """ 85 | Sample points from a dirichlet distribution with parameter alpha. 86 | 87 | @param alpha: alpha parameter of a dirichlet distribution 88 | @type alpha: array 89 | """ 90 | from numpy import array, sum, transpose, ones 91 | from numpy.random import gamma 92 | 93 | alpha = array(alpha, ndmin=1) 94 | X = gamma(alpha, 95 | ones(len(alpha)), 96 | [n_samples, len(alpha)]) 97 | 98 | return transpose(transpose(X) / sum(X, -1)) 99 | 100 | def sample_sphere3d(radius=1., n_samples=1): 101 | """ 102 | Sample points from 3D sphere. 103 | 104 | @param radius: radius of the sphere 105 | @type radius: float 106 | 107 | @param n_samples: number of samples to return 108 | @type n_samples: int 109 | 110 | @return: n_samples times random cartesian coordinates inside the sphere 111 | @rtype: numpy array 112 | """ 113 | from numpy.random import random 114 | from numpy import arccos, transpose, cos, sin, pi, power 115 | 116 | r = radius * power(random(n_samples), 1 / 3.) 117 | theta = arccos(2. * (random(n_samples) - 0.5)) 118 | phi = 2 * pi * random(n_samples) 119 | 120 | x = cos(phi) * sin(theta) * r 121 | y = sin(phi) * sin(theta) * r 122 | z = cos(theta) * r 123 | 124 | return transpose([x, y, z]) 125 | 126 | def sample_from_histogram(p, n_samples=1): 127 | """ 128 | returns the indice of bin according to the histogram p 129 | 130 | @param p: histogram 131 | @type p: numpy.array 132 | @param n_samples: number of samples to generate 133 | @type n_samples: integer 134 | """ 135 | 136 | from numpy import add, less, argsort, take, arange 137 | from numpy.random import random 138 | 139 | indices = argsort(p) 140 | indices = take(indices, arange(len(p) - 1, -1, -1)) 141 | 142 | c = add.accumulate(take(p, indices)) / add.reduce(p) 143 | 144 | return indices[add.reduce(less.outer(c, random(n_samples)), 0)] 145 | 146 | def gen_inv_gaussian(a, b, p, burnin=10): 147 | """ 148 | Sampler based on Gibbs sampling. 149 | Assumes scalar p. 150 | """ 151 | from numpy.random import gamma 152 | from numpy import sqrt 153 | 154 | s = a * 0. + 1. 155 | 156 | if p < 0: 157 | a, b = b, a 158 | 159 | for i in range(burnin): 160 | 161 | l = b + 2 * s 162 | m = sqrt(l / a) 163 | 164 | x = inv_gaussian(m, l, shape=m.shape) 165 | s = gamma(abs(p) + 0.5, x) 166 | 167 | if p >= 0: 168 | return x 169 | else: 170 | return 1 / x 171 | 172 | def inv_gaussian(mu=1., _lambda=1., shape=None): 173 | """ 174 | Generate random samples from inverse gaussian. 175 | """ 176 | from numpy.random import standard_normal, random 177 | from numpy import sqrt, less_equal, clip 178 | 179 | mu_2l = mu / _lambda / 2. 180 | Y = mu * standard_normal(shape) ** 2 181 | X = mu + mu_2l * (Y - sqrt(4 * _lambda * Y + Y ** 2)) 182 | U = random(shape) 183 | 184 | m = less_equal(U, mu / (mu + X)) 185 | 186 | return clip(m * X + (1 - m) * mu ** 2 / X, 1e-308, 1e308) 187 | 188 | def random_rotation(A, n_iter=10, initial_values=None): 189 | """ 190 | Generation of three-dimensional random rotations in 191 | fitting and matching problems, Habeck 2009. 192 | 193 | Generate random rotation R from:: 194 | 195 | exp(trace(dot(transpose(A), R))) 196 | 197 | @param A: generating parameter 198 | @type A: 3 x 3 numpy array 199 | 200 | @param n_iter: number of gibbs sampling steps 201 | @type n_iter: integer 202 | 203 | @param initial_values: initial euler angles alpha, beta and gamma 204 | @type initial_values: tuple 205 | 206 | @rtype: 3 x 3 numpy array 207 | """ 208 | from numpy import cos, sin, dot, pi, clip 209 | from numpy.linalg import svd, det 210 | from random import vonmisesvariate, randint 211 | from csb.numeric import euler 212 | 213 | 214 | def sample_beta(kappa, n=1): 215 | from numpy import arccos 216 | from csb.numeric import log, exp 217 | from numpy.random import random 218 | 219 | u = random(n) 220 | 221 | if kappa != 0.: 222 | x = clip(1 + 2 * log(u + (1 - u) * exp(-kappa)) / kappa, -1., 1.) 223 | else: 224 | x = 2 * u - 1 225 | 226 | if n == 1: 227 | return arccos(x)[0] 228 | else: 229 | return arccos(x) 230 | 231 | 232 | U, L, V = svd(A) 233 | 234 | if det(U) < 0: 235 | L[2] *= -1 236 | U[:, 2] *= -1 237 | if det(V) < 0: 238 | L[2] *= -1 239 | V[2] *= -1 240 | 241 | if initial_values is None: 242 | beta = 0. 243 | else: 244 | alpha, beta, gamma = initial_values 245 | 246 | for _i in range(n_iter): 247 | 248 | ## sample alpha and gamma 249 | phi = vonmisesvariate(0., clip(cos(beta / 2) ** 2 * (L[0] + L[1]), 1e-308, 1e10)) 250 | psi = vonmisesvariate(pi, sin(beta / 2) ** 2 * (L[0] - L[1])) 251 | u = randint(0, 1) 252 | 253 | alpha = 0.5 * (phi + psi) + pi * u 254 | gamma = 0.5 * (phi - psi) + pi * u 255 | 256 | ## sample beta 257 | kappa = cos(phi) * (L[0] + L[1]) + cos(psi) * (L[0] - L[1]) + 2 * L[2] 258 | beta = sample_beta(kappa) 259 | 260 | return dot(U, dot(euler(alpha, beta, gamma), V)) 261 | -------------------------------------------------------------------------------- /csb/statistics/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines abstract samplers. 3 | """ 4 | 5 | import numpy as np 6 | import csb.core 7 | 8 | from abc import ABCMeta, abstractmethod, abstractproperty 9 | 10 | 11 | class DimensionError(TypeError): 12 | pass 13 | 14 | class AbstractSampler(object): 15 | """ 16 | Abstract interface for sampling algorithms. 17 | """ 18 | 19 | __metaclass__ = ABCMeta 20 | 21 | @abstractmethod 22 | def sample(self): 23 | """ 24 | Draw a sample. 25 | @rtype: L{AbstractState} 26 | """ 27 | pass 28 | 29 | class AbstractState(object): 30 | """ 31 | Represents a point in phase-space. 32 | """ 33 | 34 | __metaclass__ = ABCMeta 35 | 36 | @abstractproperty 37 | def position(self): 38 | pass 39 | 40 | @abstractproperty 41 | def momentum(self): 42 | pass 43 | 44 | class State(AbstractState): 45 | """ 46 | Represents a point in phase-space. 47 | """ 48 | 49 | @staticmethod 50 | def check_flat_array(*args): 51 | """ 52 | Check whether arguments are flat, one-dimensional numpy arrays. 53 | """ 54 | 55 | for q in args: 56 | if not isinstance(q, np.ndarray): 57 | raise TypeError(q, 'numpy.ndarray expected!') 58 | 59 | if not len(q.squeeze().shape) <= 1: 60 | raise DimensionError(q, '1d numpy.ndarray expected!') 61 | 62 | @staticmethod 63 | def check_equal_length(q, p): 64 | """ 65 | Check whether arguments have equal length. 66 | """ 67 | 68 | if len(q) != len(p): 69 | raise DimensionError(p, 'momentum needs to have the same dimension as coordinates!') 70 | 71 | def __init__(self, position, momentum=None): 72 | 73 | self._position = None 74 | self._momentum = None 75 | 76 | self.position = position 77 | self.momentum = momentum 78 | 79 | def __eq__(self, other): 80 | 81 | return self.position == other.position and self.momentum == other.momentum 82 | 83 | @property 84 | def position(self): 85 | return self._position.copy() 86 | @position.setter 87 | def position(self, value): 88 | State.check_flat_array(value) 89 | self._position = np.array(value) 90 | 91 | @property 92 | def momentum(self): 93 | if self._momentum is None: 94 | return None 95 | else: 96 | return self._momentum.copy() 97 | @momentum.setter 98 | def momentum(self, value): 99 | if not value is None: 100 | State.check_flat_array(value) 101 | State.check_equal_length(value, self.position) 102 | self._momentum = np.array(value) 103 | else: 104 | self._momentum = None 105 | 106 | def clone(self): 107 | if self.momentum is not None: 108 | return self.__class__(self.position.copy(), self.momentum.copy()) 109 | else: 110 | return self.__class__(self.position.copy()) 111 | 112 | 113 | class EnsembleState(csb.core.BaseCollectionContainer, AbstractState): 114 | """ 115 | Defines an Ensemble Monte Carlo state; it is a read-only collection 116 | of State objects. 117 | 118 | @param items: initialization list of states 119 | @type items: list of L{States} 120 | """ 121 | 122 | def __init__(self, items): 123 | super(EnsembleState, self).__init__(items, type=State) 124 | 125 | @property 126 | def position(self): 127 | return np.array([s.position for s in self]) 128 | 129 | @property 130 | def momentum(self): 131 | return np.array([s.momentum for s in self]) 132 | -------------------------------------------------------------------------------- /csb/statistics/samplers/mc/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Abstract Monte Carlo samplers. 3 | """ 4 | 5 | import numpy.random 6 | 7 | import csb.numeric 8 | import csb.core 9 | 10 | from abc import ABCMeta, abstractmethod, abstractproperty 11 | from csb.statistics.samplers import AbstractSampler, AbstractState, State, EnsembleState 12 | 13 | class AbstractMC(AbstractSampler): 14 | """ 15 | Abstract Monte Carlo sampler class. Subclasses implement various 16 | Monte carlo equilibrium sampling schemes. 17 | 18 | @param state: Initial state 19 | @type state: L{AbstractState} 20 | """ 21 | 22 | __metaclass__ = ABCMeta 23 | 24 | def __init__(self, state): 25 | 26 | self._state = None 27 | self.state = state 28 | 29 | def _checkstate(self, state): 30 | 31 | if not isinstance(state, AbstractState): 32 | raise TypeError(state) 33 | 34 | @abstractproperty 35 | def energy(self): 36 | """ 37 | Energy of the current state. 38 | """ 39 | pass 40 | 41 | @property 42 | def state(self): 43 | """ 44 | Current state. 45 | """ 46 | return self._state 47 | @state.setter 48 | def state(self, value): 49 | self._checkstate(value) 50 | self._state = value 51 | 52 | @abstractmethod 53 | def sample(self): 54 | """ 55 | Draw a sample. 56 | @rtype: L{AbstractState} 57 | """ 58 | pass 59 | 60 | class AbstractPropagationResult(object): 61 | """ 62 | Abstract class providing the interface for the result 63 | of a deterministic or stochastic propagation of a state. 64 | """ 65 | 66 | __metaclass__ = ABCMeta 67 | 68 | @abstractproperty 69 | def initial(self): 70 | """ 71 | Initial state 72 | """ 73 | pass 74 | 75 | @abstractproperty 76 | def final(self): 77 | """ 78 | Final state 79 | """ 80 | pass 81 | 82 | @abstractproperty 83 | def heat(self): 84 | """ 85 | Heat produced during propagation 86 | @rtype: float 87 | """ 88 | pass 89 | 90 | class PropagationResult(AbstractPropagationResult): 91 | """ 92 | Describes the result of a deterministic or stochastic 93 | propagation of a state. 94 | 95 | @param initial: Initial state from which the 96 | propagation started 97 | @type initial: L{State} 98 | 99 | @param final: Final state in which the propagation 100 | resulted 101 | @type final: L{State} 102 | 103 | @param heat: Heat produced during propagation 104 | @type heat: float 105 | """ 106 | 107 | 108 | def __init__(self, initial, final, heat=0.0): 109 | 110 | if not isinstance(initial, AbstractState): 111 | raise TypeError(initial) 112 | 113 | if not isinstance(final, AbstractState): 114 | raise TypeError(final) 115 | 116 | self._initial = initial 117 | self._final = final 118 | self._heat = None 119 | 120 | self.heat = heat 121 | 122 | def __iter__(self): 123 | 124 | return iter([self._initial, self.final]) 125 | 126 | @property 127 | def initial(self): 128 | return self._initial 129 | 130 | @property 131 | def final(self): 132 | return self._final 133 | 134 | @property 135 | def heat(self): 136 | return self._heat 137 | @heat.setter 138 | def heat(self, value): 139 | self._heat = float(value) 140 | 141 | class Trajectory(csb.core.CollectionContainer, AbstractPropagationResult): 142 | """ 143 | Ordered collection of states, representing a phase-space trajectory. 144 | 145 | @param items: list of states defining a phase-space trajectory 146 | @type items: list of L{AbstractState} 147 | @param heat: heat produced during the trajectory 148 | @type heat: float 149 | @param work: work produced during the trajectory 150 | @type work: float 151 | """ 152 | 153 | def __init__(self, items, heat=0.0, work=0.0): 154 | 155 | super(Trajectory, self).__init__(items, type=AbstractState) 156 | 157 | self._heat = heat 158 | self._work = work 159 | 160 | @property 161 | def initial(self): 162 | return self[0] 163 | 164 | @property 165 | def final(self): 166 | return self[self.last_index] 167 | 168 | @property 169 | def heat(self): 170 | return self._heat 171 | @heat.setter 172 | def heat(self, value): 173 | self._heat = float(value) 174 | 175 | @property 176 | def work(self): 177 | return self._work 178 | @work.setter 179 | def work(self, value): 180 | self._work = float(value) 181 | 182 | class TrajectoryBuilder(object): 183 | """ 184 | Allows to build a Trajectory object step by step. 185 | 186 | @param heat: heat produced over the trajectory 187 | @type heat: float 188 | @param work: work produced during the trajectory 189 | @type work: float 190 | """ 191 | 192 | def __init__(self, heat=0.0, work=0.0): 193 | self._heat = heat 194 | self._work = work 195 | self._states = [] 196 | 197 | @staticmethod 198 | def create(full=True): 199 | """ 200 | Trajectory builder factory. 201 | 202 | @param full: if True, a TrajectoryBuilder instance designed 203 | to build a full trajectory with initial state, 204 | intermediate states and a final state. If False, 205 | a ShortTrajectoryBuilder instance designed to 206 | hold only the initial and the final state is 207 | returned 208 | @type full: boolean 209 | """ 210 | 211 | if full: 212 | return TrajectoryBuilder() 213 | else: 214 | return ShortTrajectoryBuilder() 215 | 216 | @property 217 | def product(self): 218 | """ 219 | The L{Trajectory} instance build by a specific instance of 220 | this class 221 | """ 222 | return Trajectory(self._states, heat=self._heat, work=self._work) 223 | 224 | def add_initial_state(self, state): 225 | """ 226 | Inserts a state at the beginning of the trajectory 227 | 228 | @param state: state to be added 229 | @type state: L{State} 230 | """ 231 | self._states.insert(0, state.clone()) 232 | 233 | def add_intermediate_state(self, state): 234 | """ 235 | Adds a state to the end of the trajectory 236 | 237 | @param state: state to be added 238 | @type state: L{State} 239 | """ 240 | self._states.append(state.clone()) 241 | 242 | def add_final_state(self, state): 243 | """ 244 | Adds a state to the end of the trajectory 245 | 246 | @param state: state to be added 247 | @type state: L{State} 248 | """ 249 | self._states.append(state.clone()) 250 | 251 | class ShortTrajectoryBuilder(TrajectoryBuilder): 252 | 253 | def add_intermediate_state(self, state): 254 | pass 255 | 256 | @property 257 | def product(self): 258 | """ 259 | The L{PropagationResult} instance built by a specific instance of 260 | this class 261 | """ 262 | 263 | if len(self._states) != 2: 264 | raise ValueError("Can't create a product, two states required") 265 | 266 | initial, final = self._states 267 | return PropagationResult(initial, final, heat=self._heat) 268 | 269 | 270 | class MCCollection(csb.core.BaseCollectionContainer): 271 | """ 272 | Collection of single-chain samplers. 273 | 274 | @param items: samplers 275 | @type items: list of L{AbstractSingleChainMC} 276 | """ 277 | 278 | def __init__(self, items): 279 | 280 | from csb.statistics.samplers.mc.singlechain import AbstractSingleChainMC 281 | 282 | super(MCCollection, self).__init__(items, type=AbstractSingleChainMC) 283 | 284 | 285 | def augment_state(state, temperature=1.0, mass_matrix=None): 286 | """ 287 | Augments a state with only positions given by momenta drawn 288 | from the Maxwell-Boltzmann distribution. 289 | 290 | @param state: State to be augmented 291 | @type state: L{State} 292 | 293 | @param temperature: Temperature of the desired Maxwell-Boltzmann 294 | distribution 295 | @type temperature: float 296 | 297 | @param mass_matrix: Mass matrix to be used in the Maxwell-Boltzmann 298 | distribution; None defaults to a unity matrix 299 | @type mass_matrix: L{InvertibleMatrix} 300 | 301 | @return: The initial state augmented with momenta 302 | @rtype: L{State} 303 | """ 304 | 305 | d = len(state.position) 306 | mm_unity = None 307 | 308 | if mass_matrix is None: 309 | mm_unity = True 310 | 311 | if mm_unity == None: 312 | mm_unity = mass_matrix.is_unity_multiple 313 | 314 | if mm_unity == True: 315 | momentum = numpy.random.normal(scale=numpy.sqrt(temperature), 316 | size=d) 317 | else: 318 | covariance_matrix = temperature * mass_matrix 319 | momentum = numpy.random.multivariate_normal(mean=numpy.zeros(d), 320 | cov=covariance_matrix) 321 | 322 | state.momentum = momentum 323 | 324 | return state 325 | -------------------------------------------------------------------------------- /csb/statmech/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Methods for statistical mechanics 3 | """ 4 | -------------------------------------------------------------------------------- /csb/statmech/ensembles.py: -------------------------------------------------------------------------------- 1 | """ 2 | Statistical Ensembles 3 | """ 4 | 5 | from csb.numeric import log, exp 6 | from abc import ABCMeta, abstractmethod 7 | 8 | 9 | class StatisticalEnsemble(object): 10 | 11 | __metaclass__ = ABCMeta 12 | 13 | def __call__(self, raw_energies): 14 | return exp(-self.energy(raw_energies)) 15 | 16 | def log_prob(self, raw_energies): 17 | return -self.energy(raw_energies) 18 | 19 | @abstractmethod 20 | def energy(self, raw_energies): 21 | """ 22 | Transforms the raw energies as if they were observed 23 | in this statistical ensemble 24 | """ 25 | pass 26 | 27 | def gradient(self, raw_energies): 28 | raise NotImplementedError() 29 | 30 | 31 | class BoltzmannEnsemble(StatisticalEnsemble): 32 | 33 | def __init__(self, beta=1.): 34 | 35 | self._beta = float(beta) 36 | 37 | @property 38 | def beta(self): 39 | """ 40 | Inverse temperature 41 | """ 42 | return self._beta 43 | @beta.setter 44 | def beta(self, value): 45 | value = float(value) 46 | if value <= 0.: 47 | raise ValueError("Inverse temperature {0} < 0".formate(value)) 48 | self._beta = value 49 | 50 | def energy(self, raw_energies): 51 | return raw_energies * self._beta 52 | 53 | class FermiEnsemble(BoltzmannEnsemble): 54 | 55 | def __init__(self, beta=1., e_max=0.): 56 | 57 | super(FermiEnsemble, self).__init__(beta) 58 | self._e_max = float(e_max) 59 | 60 | @property 61 | def e_max(self): 62 | """ 63 | Maximum energy 64 | """ 65 | return self._e_max 66 | @e_max.setter 67 | def e_max(self, value): 68 | self._e_max = float(value) 69 | 70 | def energy(self, raw_energies): 71 | 72 | from numpy import isinf 73 | 74 | if isinf(self.beta): 75 | m = (raw_energies >= self.e_max).astype('f') 76 | return - m * log(0.) 77 | else: 78 | x = 1 + exp(self.beta * (raw_energies - self.e_max)) 79 | return log(x) 80 | 81 | class TsallisEnsemble(StatisticalEnsemble): 82 | 83 | def __init__(self, q=1., e_min=0.): 84 | 85 | self._q = q 86 | self._e_min = e_min 87 | 88 | @property 89 | def q(self): 90 | """ 91 | q-analoge of the temperature 92 | """ 93 | return self._q 94 | @q.setter 95 | def q(self, value): 96 | if value <= 0.: 97 | raise ValueError("Inverse temperature {0} < 0".formate(value)) 98 | self._q = value 99 | 100 | @property 101 | def e_min(self): 102 | """ 103 | lower bound of the energy 104 | """ 105 | return self._e_min 106 | @e_min.setter 107 | def e_min(self, value): 108 | self._e_min = value 109 | 110 | def energy(self, raw_energies): 111 | q = self.q 112 | e_min = self.e_min 113 | 114 | if (q < 1 + 1e-10): 115 | return raw_energies * q 116 | else: 117 | return log(1 + (raw_energies - e_min) * (q - 1)) * q / (q - 1) + e_min 118 | 119 | 120 | class CompositeEnsemble(StatisticalEnsemble): 121 | 122 | def __init__(self, ensembles=[]): 123 | 124 | self._ensembles = ensembles 125 | 126 | @property 127 | def ensembles(self): 128 | """ 129 | Collection of statistical ensembles 130 | """ 131 | return self._ensembles 132 | @ensembles.setter 133 | def ensembles(self, value): 134 | if not isinstance(value, list): 135 | if len(value) > 0: 136 | if not isinstance(value[0], StatisticalEnsemble): 137 | raise ValueError("Not a list of statistical ensembles") 138 | else: 139 | self._enesmbles = value 140 | else: 141 | self._enesmbles = value 142 | 143 | def energy(self, raw_energies): 144 | return sum([self._ensembles[i].energy(raw_energies[i]) 145 | for i in range(len(self.ensembles))], 0) 146 | 147 | def gradient(self, raw_energies): 148 | return sum([self._ensembles[i].gradient(raw_energies[i]) 149 | for i in range(len(self.ensembles))], 0) 150 | -------------------------------------------------------------------------------- /csb/statmech/wham.py: -------------------------------------------------------------------------------- 1 | """ 2 | Estimate the free energy and density of states from tempered ensembles using 3 | histogram re-weighting. 4 | """ 5 | 6 | import numpy 7 | 8 | from csb.numeric import log, log_sum_exp 9 | from csb.statistics import histogram_nd 10 | 11 | from abc import abstractmethod, ABCMeta 12 | 13 | 14 | class AbstractWHAM(object): 15 | """ 16 | Abstract base class 17 | """ 18 | __metaclass__ = ABCMeta 19 | 20 | def __init__(self, ensembles, raw_energies, n): 21 | 22 | self._f = numpy.zeros(len(ensembles)) 23 | self._e = raw_energies 24 | self._n = n 25 | self._L = [] 26 | self._log_g = None 27 | self._ensembles = ensembles 28 | 29 | def log_g(self, normalize=True): 30 | """ 31 | Return the Density of states (DOS). 32 | 33 | @param normalize: Ensure that the density of states sums to one 34 | @rtype: float 35 | """ 36 | if normalize: 37 | return self._log_g - log_sum_exp(self._log_g) 38 | else: 39 | return self._log_g 40 | 41 | @property 42 | def free_energies(self): 43 | """ 44 | Free energies 45 | """ 46 | return self._f 47 | 48 | def _stop_criterium(self, tol=1e-10): 49 | """ 50 | general stop criterium; if the relative difference between 51 | sequential negative log likelihoods is less than a predefined 52 | tolerance 53 | 54 | @param tol: tolerance 55 | @type tol: float 56 | 57 | @rtype: boolean 58 | """ 59 | L = self._L 60 | return tol is not None and len(L) > 1 and \ 61 | abs((L[-2] - L[-1]) / (L[-2] + L[-1])) < tol 62 | 63 | 64 | @abstractmethod 65 | def estimate(self, *params): 66 | """ 67 | Estimate the density of states 68 | """ 69 | pass 70 | 71 | @abstractmethod 72 | def log_z(self, beta=1., ensembles=None): 73 | """ 74 | Compute the partition function for an ensemble at inverse temperature 75 | beta or for a defined ensemble 76 | 77 | @param beta: Inverse Temperature 78 | @type beta: float or list 79 | 80 | @param ensembles: List of ensembles for which the partition function should be evaluated 81 | @type ensembles: List of ensembles 82 | 83 | @rtype: float or array 84 | """ 85 | pass 86 | 87 | 88 | class WHAM(AbstractWHAM): 89 | """ 90 | Implementation of the original WHAM methods based on histograms. 91 | """ 92 | 93 | def __init__(self, ensembles, raw_energies, n): 94 | super(WHAM, self).__init__(ensembles, raw_energies, n) 95 | 96 | self._ex = None 97 | self._h = None 98 | 99 | def estimate(self, n_bins=100, n_iter=10000, tol=1e-10): 100 | 101 | self._L = [] 102 | h, e = histogram_nd(self._e, nbins=n_bins, normalize=False) 103 | self._ex = e = numpy.array(e) 104 | self._h = h 105 | f = self._f 106 | 107 | log_h = log(h) 108 | log_g = h * 0.0 109 | log_g -= log_sum_exp(log_g) 110 | log_n = log(self._n) 111 | 112 | e_ij = -numpy.squeeze(numpy.array([ensemble.energy(e) 113 | for ensemble in self._ensembles])).T 114 | 115 | for _i in range(n_iter): 116 | 117 | ## update density of states 118 | y = log_sum_exp(numpy.reshape((e_ij - f + log_n).T, 119 | (len(f), -1)), 0) 120 | log_g = log_h - numpy.reshape(y, log_g.shape) 121 | log_g -= log_sum_exp(log_g) 122 | 123 | ## update free energies 124 | f = log_sum_exp(numpy.reshape(e_ij.T + log_g.flatten(), 125 | (len(f), -1)).T, 0) 126 | self._L.append((self._n * f).sum() - (h * log_g).sum()) 127 | 128 | self._log_g = log_g 129 | self._f = f 130 | 131 | if self._stop_criterium(tol): 132 | break 133 | 134 | return f, log_g 135 | 136 | def log_z(self, beta=1., ensembles=None): 137 | """ 138 | Use trapezoidal rule to evaluate the partition function. 139 | """ 140 | from numpy import array, multiply, reshape 141 | 142 | is_float = False 143 | 144 | if type(beta) == float: 145 | beta = reshape(array(beta), (-1,)) 146 | is_float = True 147 | 148 | x = self._ex[0, 1:] - self._ex[0, :-1] 149 | y = self._ex[0] 150 | 151 | for i in range(1, self._ex.shape[0]): 152 | x = multiply.outer(x, self._ex[i, 1:] - self._ex[i, :-1]) 153 | y = multiply.outer(y, self._ex[i]) 154 | 155 | y = -multiply.outer(beta, y) + self._log_g 156 | y = reshape(array([y.T[1:], y.T[:-1]]), (2, -1)) 157 | y = log_sum_exp(y, 0) - log(2) 158 | y = reshape(y, (-1, len(beta))).T + log(x) 159 | 160 | log_z = log_sum_exp(y.T, 0) 161 | 162 | if is_float: 163 | return float(log_z) 164 | else: 165 | return log_z 166 | 167 | 168 | class NonparametricWHAM(AbstractWHAM): 169 | """ 170 | Implementation of the nonparametric WHAM outlined in Habeck 2012, in which histograms 171 | are reduced to delta peaks, this allows to use energies samples at different orders 172 | of magnitude, improving the accuracy of the DOS estimates. 173 | """ 174 | 175 | def estimate(self, n_iter=10000, tol=1e-10): 176 | 177 | e_ij = numpy.array([ensemble.energy(self._e) 178 | for ensemble in self._ensembles]).T 179 | 180 | f = self._f 181 | log_n = log(self._n) 182 | self._L = [] 183 | for _i in range(n_iter): 184 | 185 | ## update density of states 186 | log_g = -log_sum_exp((-e_ij - f + log_n).T, 0) 187 | log_g -= log_sum_exp(log_g) 188 | 189 | ## update free energies 190 | f = log_sum_exp((-e_ij.T + log_g).T, 0) 191 | self._L.append((self._n * f).sum() - log_g.sum()) 192 | 193 | self._f = f 194 | self._log_g = log_g 195 | 196 | if self._stop_criterium(tol): 197 | break 198 | 199 | return f, log_g 200 | 201 | def log_g(self, normalize=True): 202 | 203 | e_ij = numpy.array([ensemble.energy(self._e) 204 | for ensemble in self._ensembles]).T 205 | 206 | log_g = -log_sum_exp((-e_ij - self._f + log(self._n)).T, 0) 207 | 208 | if normalize: 209 | log_g -= log_sum_exp(log_g) 210 | 211 | return log_g 212 | 213 | def log_z(self, beta=1., ensembles=None): 214 | 215 | from numpy import multiply 216 | 217 | if ensembles is not None: 218 | e_ij_prime = numpy.array([ensemble.energy(self._e) 219 | for ensemble in ensembles]) 220 | else: 221 | e_ij_prime = multiply.outer(beta, self._e) 222 | 223 | 224 | log_z = log_sum_exp((-e_ij_prime + self.log_g()).T, 0) 225 | 226 | return log_z 227 | 228 | 229 | 230 | -------------------------------------------------------------------------------- /csb/test/app.py: -------------------------------------------------------------------------------- 1 | """ 2 | CSB Test Runner app. Run with -h to see the app's documentation. 3 | """ 4 | 5 | from csb.test import Console 6 | 7 | 8 | def main(): 9 | return Console('csb.test.cases.*') 10 | 11 | 12 | if __name__ == '__main__': 13 | main() 14 | -------------------------------------------------------------------------------- /csb/test/cases/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Root package, containing all test cases as sub-packages. 3 | """ -------------------------------------------------------------------------------- /csb/test/cases/bio/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csb-toolbox/CSB/1a858c9a8bbb5e528b06dc0ffb67cf151489413b/csb/test/cases/bio/__init__.py -------------------------------------------------------------------------------- /csb/test/cases/bio/io/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /csb/test/cases/bio/io/cs/__init__.py: -------------------------------------------------------------------------------- 1 | import csb.test as test 2 | 3 | from csb.bio.io.cs import ChemShiftReader, ChemShift3Reader, ChemShiftFormatError 4 | from csb.bio.structure import ChemElements 5 | from csb.bio.sequence import ProteinAlphabet 6 | 7 | 8 | 9 | @test.unit 10 | class TestChemShiftReader(test.Case): 11 | 12 | def setUp(self): 13 | 14 | super(TestChemShiftReader, self).setUp() 15 | 16 | self.parser = self.klass() 17 | self.file2 = self.config.getTestFile('2l01.v2.str') 18 | self.file3 = self.config.getTestFile('2l01.v3.str') 19 | 20 | @property 21 | def file(self): 22 | return self.file2 23 | 24 | @property 25 | def klass(self): 26 | return ChemShiftReader 27 | 28 | def testCreate(self): 29 | 30 | klass = self.klass 31 | 32 | self.assertTrue(isinstance(klass.create(version=2), ChemShiftReader)) 33 | self.assertTrue(isinstance(klass.create(version=3), ChemShift3Reader)) 34 | 35 | self.assertRaises(ValueError, klass.create, version=1) 36 | 37 | def testGuess(self): 38 | 39 | klass = self.klass 40 | 41 | self.assertTrue(isinstance(klass.guess(self.file2), ChemShiftReader)) 42 | self.assertTrue(isinstance(klass.guess(self.file3), ChemShift3Reader)) 43 | 44 | dummy = self.config.getTestFile("2JZC.sum") 45 | self.assertRaises(ChemShiftFormatError, klass.guess, dummy) 46 | 47 | def testReadShifts(self): 48 | 49 | content = open(self.file).read() 50 | cs = self.parser.read_shifts(content) 51 | 52 | self.assertEqual(len(cs), 11) 53 | 54 | self.assertEqual(cs[0].name, "HA") 55 | self.assertEqual(cs[0].element, ChemElements.H) 56 | self.assertEqual(cs[0].shift, 3.977) 57 | 58 | self.assertEqual(cs[1].name, "HB2") 59 | self.assertEqual(cs[1].shift, 2.092) 60 | 61 | self.assertEqual(cs[7].element, ChemElements.C) 62 | self.assertEqual(cs[7].residue, ProteinAlphabet.MET) 63 | 64 | self.assertEqual(cs[10].residue, ProteinAlphabet.LYS) 65 | self.assertEqual(cs[10].shift, 4.423) 66 | 67 | def testReadFile(self): 68 | 69 | cs = self.parser.read_file(self.file) 70 | self.assertEqual(len(cs), 11) 71 | 72 | @test.unit 73 | class TestChemShift3Reader(TestChemShiftReader): 74 | 75 | @property 76 | def file(self): 77 | return self.file3 78 | 79 | @property 80 | def klass(self): 81 | return ChemShift3Reader 82 | 83 | 84 | if __name__ == '__main__': 85 | 86 | test.Console() 87 | -------------------------------------------------------------------------------- /csb/test/cases/bio/io/mrc/__init__.py: -------------------------------------------------------------------------------- 1 | import csb.test as test 2 | 3 | from csb.io import MemoryStream 4 | from csb.bio.io.mrc import DensityMapReader, DensityMapWriter, DensityMapFormatError, HeaderInfo, ByteOrder 5 | 6 | 7 | @test.unit 8 | class TestDensityMapReader(test.Case): 9 | 10 | def setUp(self): 11 | 12 | super(TestDensityMapReader, self).setUp() 13 | 14 | self.file = self.config.getTestFile('1C3W_10.mrc') 15 | self.reader = DensityMapReader(self.file) 16 | self.rawheader = None 17 | 18 | with open(self.file, 'rb') as stream: 19 | self.rawheader = self.reader._rawheader(stream) 20 | 21 | def testReadRawHeader(self): 22 | self.assertEqual(len(self.rawheader), DensityMapReader.HEADER_SIZE) 23 | 24 | def testReadHeader(self): 25 | 26 | density = self.reader.read_header() 27 | 28 | self.assertEqual(density.data, None) 29 | self.assertEqual(density.header, self.rawheader) 30 | self.assertEqual(density.origin, [-36.0, -36.0, -36.0]) 31 | self.assertEqual(density.shape, (72, 72, 72)) 32 | self.assertEqual(density.spacing, (1.0, 1.0, 1.0)) 33 | 34 | def testRead(self): 35 | 36 | density = self.reader.read() 37 | 38 | self.assertIsNotNone(density.data) 39 | self.assertEqual(density.header, self.rawheader) 40 | self.assertEqual(density.origin, [-36.0, -36.0, -36.0]) 41 | self.assertEqual(density.shape, (72, 72, 72)) 42 | self.assertEqual(density.spacing, (1.0, 1.0, 1.0)) 43 | 44 | 45 | @test.unit 46 | class TestDensityMapWriter(test.Case): 47 | 48 | def setUp(self): 49 | 50 | super(TestDensityMapWriter, self).setUp() 51 | 52 | self.file = self.config.getTestFile('1C3W_10.mrc') 53 | self.writer = DensityMapWriter() 54 | self.reader = DensityMapReader(self.file) 55 | self.density = self.reader.read() 56 | 57 | def testWriteDensity(self): 58 | 59 | with self.config.getTempStream(mode='b') as temp: 60 | with open(self.file, 'rb') as source: 61 | self.writer.write(temp, self.density) 62 | temp.flush() 63 | if temp.content() != source.read(): 64 | self.fail('binary strings differ') 65 | 66 | def testReconstructHeader(self): 67 | 68 | raw = self.density.header 69 | self.density.header = None 70 | 71 | new = self.writer.reconstruct_header(self.density) 72 | 73 | original = self.reader._inspect(raw, ByteOrder.NATIVE) 74 | generated = self.reader._inspect(new, ByteOrder.NATIVE) 75 | 76 | for o, g in zip(original, generated): 77 | self.assertAlmostEqual(o, g, places=4) 78 | 79 | 80 | 81 | if __name__ == '__main__': 82 | 83 | test.Console() 84 | -------------------------------------------------------------------------------- /csb/test/cases/bio/io/noe/__init__.py: -------------------------------------------------------------------------------- 1 | import csb.test as test 2 | import csb.io 3 | 4 | from csb.bio.io.noe import SparkyPeakListReader, XeasyPeakListReader, XeasyFileBuilder 5 | from csb.bio.structure import ChemElements 6 | from csb.bio.sequence import ProteinAlphabet 7 | 8 | 9 | @test.unit 10 | class TestSparkyPeakListReader(test.Case): 11 | 12 | def setUp(self): 13 | 14 | super(TestSparkyPeakListReader, self).setUp() 15 | 16 | self.elements = (ChemElements.H, ChemElements.C, ChemElements.H) 17 | self.parser = SparkyPeakListReader(self.elements, [(1, 2)]) 18 | self.file = self.config.getTestFile('Sparky.peaks') 19 | 20 | def testRead(self): 21 | 22 | content = open(self.file).read() 23 | spectrum = self.parser.read(content) 24 | 25 | self.assertEqual(len(spectrum), 3) 26 | 27 | self.assertEqual(spectrum.min_intensity, 147454) 28 | self.assertEqual(spectrum.max_intensity, 204746) 29 | 30 | self.assertEqual(spectrum.element(0), self.elements[0]) 31 | self.assertEqual(spectrum.element(1), self.elements[1]) 32 | 33 | self.assertEqual(spectrum.dimensions, self.elements) 34 | self.assertEqual(spectrum.proton_dimensions, (0, 2)) 35 | self.assertEqual(spectrum.num_dimensions, 3) 36 | self.assertEqual(spectrum.num_proton_dimensions, 2) 37 | 38 | self.assertFalse(spectrum.has_element(ChemElements.Ca)) 39 | self.assertTrue(spectrum.has_element(ChemElements.C)) 40 | 41 | self.assertFalse(spectrum.has_connected_dimensions(0)) 42 | self.assertEqual(spectrum.connected_dimensions(0), ()) 43 | self.assertTrue(spectrum.has_connected_dimensions(1)) 44 | self.assertEqual(spectrum.connected_dimensions(1), (2,)) 45 | self.assertTrue(spectrum.has_connected_dimensions(2)) 46 | self.assertEqual(spectrum.connected_dimensions(2), (1,)) 47 | 48 | peaks = list(spectrum) 49 | self.assertEqual(peaks[0].intensity, 157921) 50 | self.assertEqual(peaks[0].get(0), 3.418) 51 | self.assertEqual(peaks[0].get(1), 114.437) 52 | self.assertEqual(peaks[0].get(2), 7.440) 53 | 54 | def testReadFile(self): 55 | 56 | spectrum = self.parser.read_file(self.file) 57 | self.assertEqual(len(spectrum), 3) 58 | 59 | def testReadAll(self): 60 | 61 | spectrum = self.parser.read_all([self.file, self.file]) 62 | self.assertEqual(len(spectrum), 6) 63 | 64 | 65 | @test.unit 66 | class TestXeasyPeakListReader(test.Case): 67 | 68 | def setUp(self): 69 | 70 | super(TestXeasyPeakListReader, self).setUp() 71 | 72 | self.elements = (ChemElements.H, ChemElements.C, ChemElements.H) 73 | self.parser = XeasyPeakListReader() 74 | self.file = self.config.getTestFile('Xeasy1.peaks') 75 | 76 | def testRead(self): 77 | 78 | content = open(self.file).read() 79 | spectrum = self.parser.read(content) 80 | 81 | self.assertEqual(len(spectrum), 3) 82 | 83 | self.assertEqual(spectrum.min_intensity, 1.291120e05) 84 | self.assertEqual(spectrum.max_intensity, 4.243830e05) 85 | 86 | self.assertEqual(spectrum.element(0), self.elements[0]) 87 | self.assertEqual(spectrum.element(1), self.elements[1]) 88 | 89 | self.assertEqual(spectrum.dimensions, self.elements) 90 | self.assertEqual(spectrum.proton_dimensions, (0, 2)) 91 | self.assertEqual(spectrum.num_dimensions, 3) 92 | self.assertEqual(spectrum.num_proton_dimensions, 2) 93 | 94 | self.assertFalse(spectrum.has_element(ChemElements.Ca)) 95 | self.assertTrue(spectrum.has_element(ChemElements.C)) 96 | 97 | self.assertFalse(spectrum.has_connected_dimensions(0)) 98 | self.assertEqual(spectrum.connected_dimensions(0), ()) 99 | self.assertTrue(spectrum.has_connected_dimensions(1)) 100 | self.assertEqual(spectrum.connected_dimensions(1), (2,)) 101 | self.assertTrue(spectrum.has_connected_dimensions(2)) 102 | self.assertEqual(spectrum.connected_dimensions(2), (1,)) 103 | 104 | peaks = list(spectrum) 105 | self.assertEqual(peaks[0].intensity, 1.565890e05) 106 | self.assertEqual(peaks[0].get(0), 7.050) 107 | self.assertEqual(peaks[0].get(1), 10.374) 108 | self.assertEqual(peaks[0].get(2), 0.889) 109 | 110 | @test.unit 111 | class TestXeasyPeakListReader2(TestXeasyPeakListReader): 112 | 113 | def setUp(self): 114 | 115 | super(TestXeasyPeakListReader2, self).setUp() 116 | 117 | self.elements = (ChemElements.H, ChemElements.C, ChemElements.H) 118 | self.parser = XeasyPeakListReader() 119 | self.file = self.config.getTestFile('Xeasy2.peaks') 120 | 121 | 122 | @test.unit 123 | class TestXeasyFileBuilder(test.Case): 124 | 125 | def setUp(self): 126 | super(TestXeasyFileBuilder, self).setUp() 127 | 128 | def testBuild(self): 129 | 130 | content = self.config.getContent("Xeasy1.peaks") 131 | spectrum = XeasyPeakListReader().read(content) 132 | 133 | stream = csb.io.MemoryStream() 134 | 135 | builder = XeasyFileBuilder(stream) 136 | builder.add_header(spectrum) 137 | builder.add_peaks(spectrum) 138 | 139 | self.assertEqual(stream.getvalue().strip(), content.strip()) 140 | 141 | 142 | 143 | 144 | if __name__ == '__main__': 145 | 146 | test.Console() 147 | 148 | -------------------------------------------------------------------------------- /csb/test/cases/bio/io/procheck/__init__.py: -------------------------------------------------------------------------------- 1 | import csb.test as test 2 | 3 | from csb.bio.io.procheck import ProcheckParser 4 | 5 | @test.functional 6 | class TestProcheckParser(test.Case): 7 | 8 | 9 | def setUp(self): 10 | 11 | super(TestProcheckParser, self).setUp() 12 | self.file = self.config.getTestFile('2JZC.sum') 13 | self.parser = ProcheckParser() 14 | 15 | def testParse(self): 16 | 17 | res = self.parser.parse(self.file) 18 | 19 | self.assertEqual(res['#residues'], 201) 20 | self.assertEqual(res['rama_core'], 69.5) 21 | self.assertEqual(res['rama_allow'], 22.6) 22 | self.assertEqual(res['rama_gener'], 5.6) 23 | self.assertEqual(res['rama_disall'], 2.3) 24 | 25 | self.assertEqual(res['g_dihedrals'], -0.1) 26 | self.assertEqual(res['g_bond'], 0.51) 27 | self.assertEqual(res['g_overall'], 0.14) 28 | 29 | self.assertEqual(res['badContacts'], 5581) 30 | 31 | 32 | if __name__ == '__main__': 33 | 34 | test.Console() 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /csb/test/cases/bio/io/whatif/__init__.py: -------------------------------------------------------------------------------- 1 | import csb.test as test 2 | 3 | from csb.bio.io.whatif import WhatCheckParser 4 | 5 | 6 | @test.functional 7 | class TestWhatCheckParser(test.Case): 8 | 9 | 10 | def setUp(self): 11 | super(TestWhatCheckParser, self).setUp() 12 | self.file = self.config.getTestFile('pdbout.txt') 13 | self.parser = WhatCheckParser() 14 | 15 | 16 | def testParse(self): 17 | 18 | res = self.parser.parse(self.file) 19 | self.assertEqual(res['rama_z_score'], -4.617) 20 | self.assertEqual(res['bb_z_score'], -1.421) 21 | self.assertEqual(res['1st_packing_z_score'], -3.436) 22 | self.assertEqual(res['2nd_packing_z_score'], -4.424) 23 | self.assertEqual(res['rotamer_score'], -2.103) 24 | 25 | 26 | if __name__ == '__main__': 27 | 28 | test.Console() 29 | 30 | 31 | -------------------------------------------------------------------------------- /csb/test/cases/bio/sequence/alignment/__init__.py: -------------------------------------------------------------------------------- 1 | import csb.test as test 2 | 3 | from csb.bio.sequence import RichSequence, SequenceTypes 4 | from csb.bio.sequence.alignment import IdentityMatrix, SimilarityMatrix 5 | from csb.bio.sequence.alignment import GlobalAlignmentAlgorithm, LocalAlignmentAlgorithm, AlignmentResult 6 | 7 | 8 | @test.unit 9 | class TestIdentityMatrix(test.Case): 10 | 11 | def setUp(self): 12 | 13 | super(TestIdentityMatrix, self).setUp() 14 | self.matrix = IdentityMatrix(2, -3) 15 | 16 | def testScore(self): 17 | self.assertEqual(self.matrix.score("a", "a"), 2) 18 | self.assertEqual(self.matrix.score("a", "b"), -3) 19 | 20 | @test.unit 21 | class TestSimilarityMatrix(test.Case): 22 | 23 | def setUp(self): 24 | 25 | super(TestSimilarityMatrix, self).setUp() 26 | self.matrix = SimilarityMatrix(SimilarityMatrix.BLOSUM62) 27 | 28 | def testScore(self): 29 | self.assertEqual(self.matrix.score("A", "A"), 4) 30 | self.assertEqual(self.matrix.score("A", "R"), -1) 31 | self.assertEqual(self.matrix.score("R", "A"), -1) 32 | 33 | 34 | @test.unit 35 | class TestGlobalAlignmentAlgorithm(test.Case): 36 | 37 | def setUp(self): 38 | 39 | super(TestGlobalAlignmentAlgorithm, self).setUp() 40 | 41 | self.seq1 = RichSequence('s1', '', 'CCABBBCBBCABAABCCEAAAAAAAAAAAAFAA', SequenceTypes.Protein) 42 | self.seq2 = RichSequence('s1', '', 'AZCBBABAABCCEF', SequenceTypes.Protein) 43 | self.algorithm = GlobalAlignmentAlgorithm(scoring=IdentityMatrix(1, -1), gap=0) 44 | 45 | def testAlign(self): 46 | 47 | ali = self.algorithm.align(self.seq1, self.seq2) 48 | 49 | self.assertEqual(ali.query.sequence, "CCA-BBBCBBCABAABCCEAAAAAAAAAAAAFAA") 50 | self.assertEqual(ali.subject.sequence, "--AZ---CBB-ABAABCCE------------F--") 51 | 52 | self.assertEqual(ali.query.residues[3], self.seq1.residues[3]) 53 | self.assertTrue(ali.query.residues[3] is self.seq1.residues[3]) 54 | 55 | self.assertEqual(ali.qstart, 1) 56 | self.assertEqual(ali.qend, 33) 57 | self.assertEqual(ali.start, 1) 58 | self.assertEqual(ali.end, 14) 59 | 60 | self.assertEqual(ali.length, 34) 61 | self.assertEqual(ali.gaps, 21) 62 | self.assertEqual(ali.identicals, 13) 63 | self.assertEqual(ali.identity, 13 / 34.0 ) 64 | self.assertEqual(ali.score, 13) 65 | 66 | def testEmptyAlignment(self): 67 | 68 | seq1 = RichSequence('s1', '', 'AAAA', SequenceTypes.Protein) 69 | seq2 = RichSequence('s2', '', 'BBBB', SequenceTypes.Protein) 70 | 71 | ali = self.algorithm.align(seq1, seq2) 72 | self.assertTrue(ali.is_empty) 73 | 74 | @test.unit 75 | class TestLocalAlignmentAlgorithm(test.Case): 76 | 77 | def setUp(self): 78 | 79 | super(TestLocalAlignmentAlgorithm, self).setUp() 80 | 81 | self.seq1 = RichSequence('s1', '', 'CCABBBCBBCABAABCCEAAAAAAAAAAAAFAA', SequenceTypes.Protein) 82 | self.seq2 = RichSequence('s1', '', 'AZCBBABAACBCCEF', SequenceTypes.Protein) 83 | self.algorithm = LocalAlignmentAlgorithm(scoring=IdentityMatrix(1, -1), gap=-1) 84 | 85 | def testAlign(self): 86 | 87 | ali = self.algorithm.align(self.seq1, self.seq2) 88 | 89 | self.assertEqual(ali.query.sequence, "CBBCABAA-BCCE") 90 | self.assertEqual(ali.subject.sequence, "CBB-ABAACBCCE") 91 | 92 | self.assertEqual(ali.qstart, 7) 93 | self.assertEqual(ali.qend, 18) 94 | self.assertEqual(ali.start, 3) 95 | self.assertEqual(ali.end, 14) 96 | 97 | self.assertEqual(ali.length, 13) 98 | self.assertEqual(ali.gaps, 2) 99 | self.assertEqual(ali.identicals, 11) 100 | self.assertEqual(ali.identity, 11 / 13.0 ) 101 | self.assertEqual(ali.score, 9) 102 | 103 | def testEmptyAlignment(self): 104 | 105 | seq1 = RichSequence('s1', '', 'AAAA', SequenceTypes.Protein) 106 | seq2 = RichSequence('s2', '', 'BBBB', SequenceTypes.Protein) 107 | 108 | ali = self.algorithm.align(seq1, seq2) 109 | self.assertTrue(ali.is_empty) 110 | 111 | 112 | @test.unit 113 | class TestAlignmentResult(test.Case): 114 | 115 | def setUp(self): 116 | 117 | super(TestAlignmentResult, self).setUp() 118 | 119 | self.seq1 = RichSequence('s1', '', 'AB-D', SequenceTypes.Protein) 120 | self.seq2 = RichSequence('s2', '', 'A-CD', SequenceTypes.Protein) 121 | self.ali = AlignmentResult(5.5, self.seq1, self.seq2, 10, 12, 20, 22) 122 | 123 | self.es = RichSequence('s1', '', '') 124 | self.empty = AlignmentResult(0, self.es, self.es, 0, 0, 0, 0) 125 | 126 | def testConstructor(self): 127 | 128 | self.assertRaises(ValueError, AlignmentResult, 1, self.es, self.es, 0, 0, 0, 0) 129 | self.assertRaises(ValueError, AlignmentResult, 0, self.es, self.es, 1, 0, 0, 0) 130 | self.assertRaises(ValueError, AlignmentResult, 0, self.es, self.es, 0, 1, 0, 0) 131 | self.assertRaises(ValueError, AlignmentResult, 0, self.es, self.es, 0, 0, 1, 0) 132 | self.assertRaises(ValueError, AlignmentResult, 0, self.es, self.es, 0, 0, 0, 1) 133 | 134 | self.assertRaises(ValueError, AlignmentResult, 1, self.seq1, self.seq2, 0, 0, 0, 0) 135 | 136 | def testStr(self): 137 | 138 | string = r""" 139 | 10 AB-D 12 140 | 20 A-CD 22 """.strip("\r\n") 141 | self.assertEqual(string, str(self.ali)) 142 | 143 | def testAlignment(self): 144 | 145 | ali = self.ali.alignment() 146 | self.assertEqual(ali.rows[1].sequence, self.seq1.sequence) 147 | self.assertEqual(ali.rows[2].sequence, self.seq2.sequence) 148 | 149 | def testQuery(self): 150 | self.assertEqual(self.ali.query.sequence, self.seq1.sequence) 151 | self.assertEqual(self.ali.query.residues[2], self.seq1.residues[2]) 152 | self.assertTrue(self.ali.query.residues[2] is self.seq1.residues[2]) 153 | 154 | def testSubject(self): 155 | self.assertEqual(self.ali.subject.sequence, self.seq2.sequence) 156 | self.assertEqual(self.ali.subject.residues[3], self.seq2.residues[3]) 157 | self.assertTrue(self.ali.subject.residues[3] is self.seq2.residues[3]) 158 | 159 | def testQstart(self): 160 | self.assertEqual(self.ali.qstart, 10) 161 | 162 | def testQend(self): 163 | self.assertEqual(self.ali.qend, 12) 164 | 165 | def testStart(self): 166 | self.assertEqual(self.ali.start, 20) 167 | 168 | def testEnd(self): 169 | self.assertEqual(self.ali.end, 22) 170 | 171 | def testLength(self): 172 | self.assertEqual(self.ali.length, 4) 173 | 174 | def testScore(self): 175 | self.assertEqual(self.ali.score, 5.5) 176 | 177 | def testGaps(self): 178 | self.assertEqual(self.ali.gaps, 2) 179 | 180 | def testIdenticals(self): 181 | self.assertEqual(self.ali.identicals, 2) 182 | 183 | def testIdentity(self): 184 | self.assertEqual(self.ali.identity, 0.5) 185 | 186 | def testIsEmpty(self): 187 | self.assertFalse(self.ali.is_empty) 188 | 189 | es = RichSequence('s1', '', '') 190 | empty = AlignmentResult(0, es, es, 0, 0, 0, 0) 191 | self.assertTrue(empty.is_empty) 192 | 193 | 194 | if __name__ == '__main__': 195 | 196 | test.Console() 197 | -------------------------------------------------------------------------------- /csb/test/cases/bio/utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | import numpy 4 | import multiprocessing 5 | 6 | import csb.test as test 7 | import csb.bio.utils as cbu 8 | import csb.io 9 | 10 | 11 | X1 = numpy.array([ 12 | [ 0., 0., 0.], 13 | [ 1., 0., 0.], 14 | [ 0., 1., 0.]]) 15 | 16 | X2 = numpy.array([ 17 | [ 0., 0., 0.], 18 | [ 1., 2., 0.], 19 | [-2., -1., 0.]]) 20 | 21 | X3 = numpy.array([ 22 | [ 0., 0., 0.], 23 | [ 2., -1., 0.], 24 | [-1., 2., 0.]]) 25 | 26 | RZ = numpy.array([ 27 | [ 0., 1., 0.], 28 | [-1., 0., 0.], 29 | [ 0., 0., 1.]]) 30 | 31 | X4 = numpy.array([ 32 | [ 0., 0., 0.], 33 | [ 1., 0., 0.], 34 | [ 0., 1., 0.], 35 | [ 1., 1., 0.]]) 36 | 37 | X5 = numpy.array([ 38 | [ 0., 0., 0.], 39 | [ 100., 0., 0.], 40 | [ 0., 100., 0.], 41 | [ 50., 50., 0.]]) 42 | 43 | X6 = numpy.array([ 44 | [ 0., 0., 0.], 45 | [ 100., 0., 0.], 46 | [ 0., 100., 0.], 47 | [ 60., 60., 0.]]) 48 | 49 | X7 = numpy.array([ 50 | [ 0., 0., 0.], 51 | [ 100., 0., 0.], 52 | [ 0., 100., 0.], 53 | [ 0., 0., 100.]]) 54 | 55 | 56 | @test.regression 57 | class Regressions(test.Case): 58 | 59 | def _timeoutTest(self): 60 | cbu.tm_superimpose([[1, 1, 1]], [[1, 1, 1]]) 61 | 62 | def _multiprocessingTest(self): 63 | return True 64 | 65 | def _runProcess(self, target, timeout=1.0): 66 | 67 | p = multiprocessing.Process(target=target) 68 | p.start() 69 | p.join(timeout=timeout) 70 | 71 | return p 72 | 73 | @test.skip("n/a on this platform", sys.platform.startswith('win')) 74 | def testTMSuperimpose(self): 75 | """ 76 | @see: [CSB 0000058] 77 | """ 78 | try: 79 | self._runProcess(target=self._multiprocessingTest) 80 | except: 81 | self.skipTest("may produce a false positive") 82 | 83 | p = self._runProcess(target=self._timeoutTest, timeout=5.0) 84 | 85 | if p.is_alive(): 86 | p.terminate() 87 | self.fail('timeout expired') 88 | 89 | def testRmsdMirrorImage(self): 90 | X, Y = X7, X7.copy() 91 | Y[:, 0] *= -1 92 | rmsd = cbu.rmsd(X, Y) 93 | 94 | self.assertAlmostEqual(rmsd, 50.0) 95 | 96 | 97 | @test.functional 98 | class TestUtils(test.Case): 99 | 100 | def assertArrayEqual(self, first, second, eps=1e-7): 101 | diff = numpy.asarray(first) - numpy.asarray(second) 102 | self.assertTrue((abs(diff) < eps).all()) 103 | 104 | def testFit(self): 105 | R, t = cbu.fit(X1, X2) 106 | Y = numpy.dot(X2, R.T) + t 107 | 108 | self.assertArrayEqual(R, RZ) 109 | self.assertArrayEqual(t, [0., 0., 0.]) 110 | self.assertArrayEqual(Y, X3) 111 | 112 | def testWFit(self): 113 | w = numpy.array([1., 1., 0.]) 114 | R, t = cbu.wfit(X1, X2, w) #@UnusedVariable 115 | 116 | d = 5.0**0.5 117 | self.assertArrayEqual(t, [-d / 2.0 + 0.5, 0., 0.]) 118 | 119 | def testScaleAndFit(self): 120 | R, t, s = cbu.scale_and_fit(2.0 * X1, X1) 121 | 122 | self.assertArrayEqual(R, numpy.identity(3)) 123 | self.assertArrayEqual(t, [0., 0., 0.]) 124 | self.assertAlmostEqual(s, 2.0) 125 | 126 | def testFitWellordered(self): 127 | R, t = cbu.fit_wellordered(X5, X6, 10, 1.0) #@UnusedVariable 128 | 129 | self.assertArrayEqual(t, [0., 0., 0.]) 130 | 131 | def testRmsd(self): 132 | rmsd = cbu.rmsd(X1, X2) 133 | 134 | self.assertAlmostEqual(rmsd, (4./3.)**0.5) 135 | 136 | def testWrmsd(self): 137 | w = numpy.array([1., 1., 0.]) 138 | rmsd = cbu.wrmsd(X1, X2, w) 139 | 140 | d = 5.0**0.5 141 | self.assertAlmostEqual(rmsd, d / 2.0 - 0.5) 142 | 143 | def testTorsionRmsd(self): 144 | rmsd = cbu.torsion_rmsd(X1[:,:2], X1[:,:2]) 145 | 146 | self.assertAlmostEqual(rmsd, 0.0) 147 | 148 | def testTmScore(self): 149 | score = cbu.tm_score(X1, X3) 150 | 151 | self.assertAlmostEqual(score, 0.4074, 4) 152 | 153 | def testTmSuperimpose(self): 154 | R, t, score = cbu.tm_superimpose(X1, X2) #@UnusedVariable 155 | 156 | self.assertAlmostEqual(score, 0.4074, 4) 157 | 158 | def testCenterOfMass(self): 159 | com = cbu.center_of_mass(X4) 160 | 161 | self.assertArrayEqual(com, [0.5, 0.5, 0.0]) 162 | 163 | def testRadiusOfGyration(self): 164 | gyradius = cbu.radius_of_gyration(X4) 165 | 166 | s2 = 2.0**0.5 167 | self.assertArrayEqual(gyradius, s2 / 2.0) 168 | 169 | def testSecondMoments(self): 170 | sm = cbu.second_moments(X1) 171 | 172 | # TODO: correct? 173 | sm_test = numpy.array([ 174 | [ 2./3., -1./3., 0. ], 175 | [-1./3., 2./3., 0. ], 176 | [ 0., 0., 0. ]]) 177 | self.assertArrayEqual(sm, sm_test) 178 | 179 | def testInertiaTensor(self): 180 | it = cbu.inertia_tensor(X1) 181 | 182 | # TODO: correct? 183 | it_test = numpy.array([ 184 | [ 2./3., 1./3., 0. ], 185 | [ 1./3., 2./3., 0. ], 186 | [ 0., 0., 4./3. ]]) 187 | self.assertArrayEqual(it, it_test) 188 | 189 | def testFindPairs(self): 190 | pairs11 = list(cbu.find_pairs(1.2, X1)) 191 | pairs12 = list(cbu.find_pairs(1.2, X1, X2)) 192 | 193 | self.assertEqual(len(pairs11), 2) 194 | self.assertEqual(len(pairs12), 3) 195 | 196 | def testDistanceMatrix(self): 197 | d = cbu.distance_matrix(X1) 198 | 199 | s2 = 2.0**0.5 200 | d_test = [ 201 | [ 0., 1., 1. ], 202 | [ 1., 0., s2 ], 203 | [ 1., s2, 0. ]] 204 | self.assertArrayEqual(d, d_test) 205 | 206 | def testDistance(self): 207 | d = cbu.distance(X1, X2) 208 | 209 | self.assertEqual(d.shape, (len(X1),)) 210 | self.assertArrayEqual(d[:2], [0., 2.]) 211 | 212 | def testRmsdCur(self): 213 | rmsd = cbu.rmsd_cur(X1, X2) 214 | 215 | self.assertAlmostEqual(rmsd, 2.0) 216 | 217 | 218 | def testRad(self): 219 | 220 | converted = cbu.rad([[0, 90, 180]])[0] 221 | 222 | self.assertEqual(converted[0], 0) 223 | self.assertAlmostEqual(converted[1], numpy.pi / 2.0) 224 | self.assertAlmostEqual(converted[2], numpy.pi) 225 | 226 | def testDeg(self): 227 | 228 | converted = cbu.deg([[0, numpy.pi / 2.0, numpy.pi]])[0] 229 | 230 | self.assertEqual(converted[0], 0) 231 | self.assertAlmostEqual(converted[1], 90) 232 | self.assertAlmostEqual(converted[2], 180) 233 | 234 | 235 | if __name__ == '__main__': 236 | 237 | test.Console() 238 | 239 | -------------------------------------------------------------------------------- /csb/test/cases/numeric/integrators.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import csb.test as test 3 | 4 | from math import cos 5 | 6 | from csb.numeric.integrators import LeapFrog, FastLeapFrog, VelocityVerlet, AbstractGradient 7 | from csb.statistics.samplers import State 8 | 9 | 10 | @test.functional 11 | class TestIntegrators(test.Case): 12 | 13 | def setUp(self): 14 | 15 | super(TestIntegrators, self).setUp() 16 | 17 | self.dt = 0.1 18 | self.grad = self._createGradient(1.) 19 | self.nsteps = 100 20 | self.state = State(np.array([1.]), np.array([0.])) 21 | 22 | def _createGradient(self, sigma): 23 | 24 | class Grad(AbstractGradient): 25 | def evaluate(self, q, t): 26 | return q / (sigma ** 2) 27 | 28 | return Grad() 29 | 30 | def _run(self, algorithm): 31 | 32 | result = algorithm.integrate(self.state, self.nsteps).final.position 33 | self.assertAlmostEqual(result, cos(self.nsteps * self.dt), delta=0.1) 34 | 35 | def testLeapFrog(self): 36 | 37 | algorithm = LeapFrog(self.dt, self.grad) 38 | self._run(algorithm) 39 | 40 | def testFastLeapFrog(self): 41 | 42 | algorithm = FastLeapFrog(self.dt, self.grad) 43 | self._run(algorithm) 44 | 45 | def testVelocityVerlet(self): 46 | 47 | algorithm = VelocityVerlet(self.dt, self.grad) 48 | self._run(algorithm) 49 | 50 | @test.regression 51 | class ReferenceRegressions(test.Case): 52 | """ 53 | @see: [0000108] 54 | """ 55 | 56 | def setUp(self): 57 | 58 | super(ReferenceRegressions, self).setUp() 59 | 60 | self.dt = 0.1 61 | self.grad = self._createGradient(1.) 62 | self.nsteps = 100 63 | self.state = State(np.array([1.]), np.array([0.])) 64 | 65 | def _createGradient(self, sigma): 66 | 67 | class Grad(AbstractGradient): 68 | def evaluate(self, q, t): 69 | return q / (sigma ** 2) 70 | 71 | return Grad() 72 | 73 | def _run(self, algorithm): 74 | 75 | result = algorithm.integrate(self.state, self.nsteps, return_trajectory=True) 76 | self.assertFalse(result[0].position[0] == result[10].position[0]) 77 | self.assertFalse(result[10].position[0] == result[20].position[0]) 78 | self.assertFalse(result[0].position == result.final.position) 79 | 80 | def testLeapFrog(self): 81 | 82 | algorithm = LeapFrog(self.dt, self.grad) 83 | self._run(algorithm) 84 | 85 | def testVelocityVerlet(self): 86 | 87 | algorithm = VelocityVerlet(self.dt, self.grad) 88 | self._run(algorithm) 89 | 90 | 91 | if __name__ == '__main__': 92 | 93 | test.Console() 94 | -------------------------------------------------------------------------------- /csb/test/cases/statistics/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy 3 | import numpy.random 4 | 5 | import csb.test as test 6 | 7 | from csb.statistics import Cumulative 8 | from csb.statistics import kurtosis, skewness, autocorrelation 9 | 10 | 11 | @test.functional 12 | class TestStatFunction(test.Case): 13 | 14 | 15 | def testCumulative(self): 16 | from scipy.stats import norm 17 | 18 | x = numpy.linspace(-5., 5., 200) 19 | samples = numpy.random.normal(size=100000) 20 | cumula = Cumulative(samples) 21 | c = cumula(x) 22 | 23 | cx = norm.cdf(x) 24 | for i in range(199): 25 | self.assertAlmostEqual(cx[i], c[i], delta=1e-2) 26 | 27 | 28 | def testKurtosis(self): 29 | samples = numpy.random.normal(size=100000) 30 | self.assertAlmostEqual(kurtosis(samples), 0., delta=1e-1) 31 | 32 | samples = numpy.random.uniform(-2., 2., size=100000) 33 | self.assertAlmostEqual(kurtosis(samples), -1.2, delta=1e-1) 34 | 35 | 36 | def testSkewness(self): 37 | samples = numpy.random.gamma(2., 0.5, size=100000) 38 | self.assertAlmostEqual(skewness(samples), 2. / numpy.sqrt(2.), delta=1e-1) 39 | 40 | def testAutorcorrelation(self): 41 | x = numpy.random.normal(size=1000) + numpy.sin(numpy.linspace(0., 2 * numpy.pi, 1000)) 42 | n = 10 43 | ac = autocorrelation(x, n) 44 | self.assertAlmostEqual(ac[0], 1., delta=1e-1) 45 | 46 | def testEntropy(self): 47 | pass 48 | 49 | def testCircvar(self): 50 | pass 51 | 52 | def testCircmean(self): 53 | pass 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /csb/test/cases/statistics/ars.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import csb.test as test 3 | 4 | from csb.statistics.ars import ARS, Gauss 5 | 6 | 7 | @test.functional 8 | class TestARS(test.Case): 9 | 10 | def testNormal(self): 11 | mu = 5. 12 | sigma = 1. 13 | ars = ARS(Gauss(mu, sigma)) 14 | ars.initialize([mu - 1., mu + 1.1], z0=-10., zmax=30) 15 | samples = numpy.array([ars.sample() for i in range(10000)]) 16 | 17 | self.assertAlmostEqual(mu, numpy.mean(samples), delta=0.5) 18 | self.assertAlmostEqual(sigma, numpy.std(samples), delta=0.5) 19 | 20 | 21 | if __name__ == '__main__': 22 | 23 | test.Console() 24 | -------------------------------------------------------------------------------- /csb/test/cases/statistics/maxent.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | import csb.test as test 4 | import csb.io 5 | 6 | from scipy.optimize import fmin_powell 7 | 8 | from csb.numeric import log_sum_exp 9 | from csb.statistics.maxent import MaxentModel, MaxentPosterior 10 | 11 | @test.functional 12 | class TestMaxent(test.Case): 13 | 14 | def setUp(self): 15 | super(TestMaxent, self).setUp() 16 | self.data_fn = self.config.getTestFile('maxent.pickle') 17 | 18 | @test.skip("slow") 19 | def testMaxent(self): 20 | k = 2 21 | data = csb.io.load(self.data_fn) 22 | model = MaxentModel(k) 23 | model.sample_weights() 24 | posterior = MaxentPosterior(model, data[:100000] / 180. * numpy.pi) 25 | 26 | model.get() * 1. 27 | 28 | x0 = posterior.model.get().flatten() 29 | target = lambda w:-posterior(w, n=50) 30 | x = fmin_powell(target, x0, disp=False) 31 | 32 | self.assertTrue(x != None) 33 | self.assertTrue(len(x) == k * k * 4) 34 | 35 | posterior.model.set(x) 36 | posterior.model.normalize(True) 37 | 38 | xx = numpy.linspace(0 , 2 * numpy.pi, 500) 39 | fx = posterior.model.log_prob(xx, xx) 40 | 41 | self.assertAlmostEqual(posterior.model.log_z(integration='simpson'), 42 | posterior.model.log_z(integration='trapezoidal'), 43 | places=2) 44 | 45 | self.assertTrue(fx != None) 46 | z = numpy.exp(log_sum_exp(numpy.ravel(fx))) 47 | self.assertAlmostEqual(z * xx[1] ** 2, 1., places=1) 48 | 49 | 50 | if __name__ == '__main__': 51 | 52 | test.Console() 53 | 54 | -------------------------------------------------------------------------------- /csb/test/cases/statistics/mixtures.py: -------------------------------------------------------------------------------- 1 | from numpy import array, linspace 2 | 3 | from csb import test 4 | from csb.bio.io.wwpdb import LegacyStructureParser 5 | from csb.statistics import mixtures 6 | 7 | 8 | @test.functional 9 | class TestMixtures(test.Case): 10 | 11 | w_ref_segments = array([ 12 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 13 | 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 15 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 17 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 18 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 19 | 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 20 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 21 | 2, 2, 2, 2]) 22 | 23 | w_ref_conformers = array([2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1]) 24 | 25 | def _ake_ensemble_coords(self): 26 | 27 | pdbfile = self.config.getTestFile('ake-xray-ensemble-ca.pdb') 28 | ensemble = LegacyStructureParser(pdbfile).parse_models() 29 | X = array([model.get_coordinates(['CA'], True) for model in ensemble]) 30 | 31 | self.assertEqual(X.shape, (16, 211, 3)) 32 | 33 | self._ake_ensemble_coords = lambda: X 34 | 35 | return X 36 | 37 | def testSegmentMixture(self): 38 | 39 | self._testMixture(mixtures.SegmentMixture, self.w_ref_segments) 40 | 41 | def testConformerMixture(self): 42 | 43 | self._testMixture(mixtures.ConformerMixture, self.w_ref_conformers, 14./16.) 44 | 45 | def _testMixture(self, cls, w_ref, min_overlap=0.9, repeats=5): 46 | 47 | X = self._ake_ensemble_coords() 48 | K = len(set(w_ref)) 49 | 50 | # non-randomized heuristic with BIC 51 | m = cls.new(X) 52 | overlap = m.overlap(w_ref) 53 | 54 | self.assertTrue(overlap >= min_overlap, 'mixture not reproduced with heuristic') 55 | 56 | # annealing (randomized initialization) 57 | m = cls(X, K, False) 58 | for _ in range(repeats): 59 | m.randomize_scales() 60 | m.anneal(linspace(2.0, 0.1, 10)) 61 | 62 | overlap = m.overlap(w_ref) 63 | if overlap >= min_overlap: 64 | break 65 | else: 66 | self.assertTrue(False, 'mixture not reproduced with annealing') 67 | 68 | 69 | if __name__ == '__main__': 70 | 71 | test.Console() 72 | 73 | # vi:expandtab:smarttab 74 | -------------------------------------------------------------------------------- /csb/test/cases/statistics/rand.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import warnings 3 | 4 | import csb.test as test 5 | 6 | from csb.numeric import exp, log_sum_exp, log 7 | from csb.statistics.rand import truncated_gamma, truncated_normal, sample_from_histogram 8 | from csb.statistics.pdf import Normal 9 | from csb.statistics import density 10 | 11 | 12 | @test.functional 13 | class TestRand(test.Case): 14 | 15 | def testTruncatedGamma(self): 16 | alpha = 2. 17 | beta = 1. 18 | x_min = 0.1 19 | x_max = 5. 20 | 21 | x = truncated_gamma(10000, alpha, beta, x_min, x_max) 22 | 23 | self.assertTrue((x <= x_max).all()) 24 | self.assertTrue((x >= x_min).all()) 25 | 26 | hy, hx = density(x, 100) 27 | hx = 0.5 * (hx[1:] + hx[:-1]) 28 | hy = hy.astype('d') 29 | 30 | with warnings.catch_warnings(record=True) as warning: 31 | warnings.simplefilter("always") 32 | 33 | hy /= (hx[1] - hx[0]) * hy.sum() 34 | 35 | self.assertLessEqual(len(warning), 1) 36 | 37 | if len(warning) == 1: 38 | warning = warning[0] 39 | self.assertEqual(warning.category, RuntimeWarning) 40 | self.assertTrue(str(warning.message).startswith('divide by zero encountered')) 41 | 42 | x = numpy.linspace(x_min, x_max, 1000) 43 | p = (alpha - 1) * log(x) - beta * x 44 | p -= log_sum_exp(p) 45 | p = exp(p) / (x[1] - x[0]) 46 | 47 | def testTruncatedNormal(self): 48 | 49 | mu = 2. 50 | sigma = 1. 51 | x_min = -1. 52 | x_max = 5. 53 | 54 | x = truncated_normal(10000, mu, sigma, x_min, x_max) 55 | 56 | self.assertAlmostEqual(numpy.mean(x), mu, delta=1e-1) 57 | self.assertAlmostEqual(numpy.var(x), sigma, delta=1e-1) 58 | 59 | self.assertTrue((x <= x_max).all()) 60 | self.assertTrue((x >= x_min).all()) 61 | 62 | hy, hx = density(x, 100) 63 | hx = 0.5 * (hx[1:] + hx[:-1]) 64 | hy = hy.astype('d') 65 | 66 | with warnings.catch_warnings(record=True) as warning: 67 | warnings.simplefilter("always") 68 | 69 | hy /= (hx[1] - hx[0]) * hy.sum() 70 | 71 | self.assertLessEqual(len(warning), 1) 72 | 73 | if len(warning) == 1: 74 | warning = warning[0] 75 | self.assertEqual(warning.category, RuntimeWarning) 76 | self.assertTrue(str(warning.message).startswith('divide by zero encountered')) 77 | 78 | x = numpy.linspace(mu - 5 * sigma, mu + 5 * sigma, 1000) 79 | 80 | p = -0.5 * (x - mu) ** 2 / sigma ** 2 81 | p -= log_sum_exp(p) 82 | p = exp(p) / (x[1] - x[0]) 83 | 84 | 85 | 86 | def testSampleFromHistogram(self): 87 | mu = 5. 88 | sigma = 1. 89 | 90 | normal = Normal(mu, sigma) 91 | 92 | x = normal.random(10000) 93 | hx, p = density(x, 100) 94 | 95 | samples = hx[sample_from_histogram(p, n_samples=10000)] 96 | 97 | self.assertAlmostEqual(mu, numpy.mean(samples), delta=0.5) 98 | self.assertAlmostEqual(sigma, numpy.std(samples), delta=0.5) 99 | 100 | 101 | if __name__ == '__main__': 102 | 103 | test.Console() 104 | -------------------------------------------------------------------------------- /csb/test/cases/statmech/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csb-toolbox/CSB/1a858c9a8bbb5e528b06dc0ffb67cf151489413b/csb/test/cases/statmech/__init__.py -------------------------------------------------------------------------------- /csb/test/cases/statmech/ensembles.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | import csb.numeric 4 | import csb.test as test 5 | 6 | from csb.statmech.ensembles import BoltzmannEnsemble, TsallisEnsemble, CompositeEnsemble 7 | 8 | 9 | @test.functional 10 | class TestEnergy(test.Case): 11 | 12 | def testBoltzmann(self): 13 | e = numpy.linspace(-50, 1000, 1000) 14 | 15 | be = BoltzmannEnsemble(beta=1,) 16 | te = be.energy(e) 17 | 18 | for i in range(len(e)): 19 | self.assertEqual(e[i], te[i]) 20 | 21 | be = BoltzmannEnsemble(beta=0.001,) 22 | te = be.energy(e) 23 | 24 | for i in range(len(e)): 25 | self.assertEqual(e[i] * 0.001, te[i]) 26 | 27 | def testTsallis(self): 28 | e = numpy.linspace(-50, 1000, 1000) 29 | 30 | tsallis = TsallisEnsemble(q=1.,) 31 | te = tsallis.energy(e) 32 | 33 | for i in range(len(e)): 34 | self.assertEqual(e[i], te[i]) 35 | 36 | tsallis = TsallisEnsemble(q=1.1, e_min= -50.) 37 | te = tsallis.energy(e) 38 | q = 1.1 39 | ee = q / (q - 1.) * csb.numeric.log(1 + (q - 1) * (e + 50.)) - 50 40 | 41 | for i in range(len(e)): 42 | self.assertAlmostEqual(ee[i], te[i], delta=1e-5) 43 | 44 | 45 | def testComposite(self): 46 | e1 = numpy.linspace(-50, 1000, 1000) 47 | e2 = numpy.linspace(-30, 3000, 1000) 48 | 49 | q = 1.1 50 | beta = 0.1 51 | ee = q / (q - 1.) * csb.numeric.log(1 + (q - 1) * (e1 + 50.)) - 50 52 | ee += e2 * beta 53 | 54 | ce = CompositeEnsemble([TsallisEnsemble(q=q, e_min= -50.), 55 | BoltzmannEnsemble(beta=beta,)]) 56 | 57 | cee = ce.energy([e1, e2]) 58 | 59 | for i in range(len(e1)): 60 | self.assertAlmostEqual(ee[i], cee[i], delta=1e-5) 61 | 62 | 63 | if __name__ == '__main__': 64 | 65 | test.Console() 66 | 67 | -------------------------------------------------------------------------------- /csb/test/cases/statmech/wham.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import csb.test as test 3 | 4 | from csb.statmech.ensembles import BoltzmannEnsemble 5 | from csb.statmech.wham import WHAM, NonparametricWHAM 6 | 7 | 8 | class FunnyGaussian(object): 9 | 10 | def __init__(self, d, k=100.): 11 | 12 | self.d = int(d) 13 | self.k = float(k) 14 | 15 | def sample(self, n_samples, inv_T=1): 16 | 17 | from numpy.random import standard_normal 18 | from numpy import sqrt, sum 19 | from csb.statistics.rand import truncated_gamma 20 | 21 | x = standard_normal((self.d, n_samples)) 22 | x /= sqrt(sum(x ** 2, 0)) 23 | 24 | r = truncated_gamma(n_samples, 0.5 * self.d, self.k * inv_T, 0., 0.5) 25 | r = (2 * r) ** 0.5 26 | 27 | return (x * r).T 28 | 29 | def energy(self, x): 30 | 31 | x = numpy.array(x) 32 | return 0.5 * self.k * numpy.sum(x ** 2, -1) 33 | 34 | def log_Z(self, beta=1.): 35 | 36 | from csb.numeric import log 37 | from scipy.special import gammainc, gammaln 38 | 39 | return log(0.5 * self.d) + log(gammainc(0.5 * self.d, 0.5 * self.k)) + \ 40 | gammaln(0.5 * self.d) + (0.5 * self.d) * (log(2) - log(self.k)) 41 | 42 | def log_g(self, energies): 43 | 44 | from csb.numeric import log 45 | return (0.5 * self.d - 1) * log(2 * energies / self.k) + log(self.d / self.k) 46 | 47 | 48 | @test.functional 49 | class TestWHAM(test.Case): 50 | 51 | def setUp(self): 52 | self.betas = numpy.linspace(1e-5, 1., 10) 53 | self.n = n = 1000 54 | 55 | gaussian = FunnyGaussian(10, 100.) 56 | 57 | self.samples = [] 58 | self.raw_energies = [] 59 | 60 | 61 | for beta in self.betas: 62 | self.samples.append(gaussian.sample(n, beta)) 63 | self.raw_energies.append(gaussian.energy(self.samples[-1])) 64 | 65 | self.raw_energies = numpy.array(self.raw_energies) 66 | self.ensembles = [BoltzmannEnsemble(beta=beta) for beta in self.betas] 67 | 68 | self.log_z = gaussian.log_Z() 69 | self.log_g = gaussian.log_g(numpy.ravel(self.raw_energies)) 70 | 71 | def testWHAM(self): 72 | 73 | w = WHAM(self.ensembles, 74 | numpy.ravel(self.raw_energies), 75 | numpy.array([self.n] * len(self.betas))) 76 | w.estimate() 77 | 78 | self.assertAlmostEqual(numpy.dot(numpy.array([1, -1]), 79 | w.log_z(numpy.array([1., 0.]))), 80 | self.log_z, delta=0.5) 81 | 82 | def testNonparametricWHAM(self): 83 | 84 | w = NonparametricWHAM(self.ensembles, 85 | numpy.ravel(self.raw_energies), 86 | [self.n] * len(self.betas)) 87 | w.estimate() 88 | ens = [BoltzmannEnsemble(beta=1.,), 89 | BoltzmannEnsemble(beta=0.)] 90 | self.assertAlmostEqual(numpy.dot(numpy.array([1, -1]), 91 | w.log_z(ensembles=ens)), 92 | self.log_z, delta=0.5) 93 | 94 | 95 | if __name__ == '__main__': 96 | 97 | test.Console() 98 | 99 | -------------------------------------------------------------------------------- /csb/test/data/1C3W_10.mrc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csb-toolbox/CSB/1a858c9a8bbb5e528b06dc0ffb67cf151489413b/csb/test/data/1C3W_10.mrc -------------------------------------------------------------------------------- /csb/test/data/1nz9A.frags: -------------------------------------------------------------------------------- 1 | position: 1 neighbors: 1 2 | 3 | 1nz9 A 1 A L 0.000 112.130 179.977 0.000 4 | 1nz9 A 2 Q L -134.597 155.738 179.916 0.000 5 | 1nz9 A 3 V L -143.771 148.819 -179.986 0.000 6 | 1nz9 A 4 A L -86.179 -44.448 -179.977 0.000 7 | 1nz9 A 5 F L -151.432 157.481 179.946 0.000 8 | 1nz9 A 6 R L -150.513 134.103 179.981 0.000 9 | 1nz9 A 7 E L -58.233 147.798 -179.882 0.000 10 | 1nz9 A 8 G L 79.541 6.522 -179.989 0.000 11 | 1nz9 A 9 D L -86.869 174.534 179.959 0.000 12 | 1nz9 A 10 Q L -113.960 132.607 -179.950 0.000 13 | 14 | position: 6 neighbors: 1 15 | 16 | 1nz9 A 6 R L -150.513 134.103 179.981 0.000 17 | 1nz9 A 7 E L -58.233 147.798 -179.882 0.000 18 | 1nz9 A 8 G L 79.541 6.522 -179.989 0.000 19 | 1nz9 A 9 D L -86.869 174.534 179.959 0.000 20 | 1nz9 A 10 Q L -113.960 132.607 -179.950 0.000 21 | 1nz9 A 11 V L -131.901 136.198 179.915 0.000 22 | 1nz9 A 12 R L -80.030 166.265 -179.985 0.000 23 | 1nz9 A 13 V L -79.855 167.337 -179.201 0.000 24 | 1nz9 A 14 V L -79.313 -60.298 -179.542 0.000 25 | 1nz9 A 15 S L -160.425 -164.178 -179.951 0.000 26 | 27 | position: 11 neighbors: 1 28 | 29 | 1nz9 A 11 V L -131.901 136.198 179.915 0.000 30 | 1nz9 A 12 R L -80.030 166.265 -179.985 0.000 31 | 1nz9 A 13 V L -79.855 167.337 -179.201 0.000 32 | 1nz9 A 14 V L -79.313 -60.298 -179.542 0.000 33 | 1nz9 A 15 S L -160.425 -164.178 -179.951 0.000 34 | 1nz9 A 16 G L 77.366 -168.013 179.593 0.000 35 | 1nz9 A 17 P L -60.374 -19.016 179.933 0.000 36 | 1nz9 A 18 F L -95.855 13.091 -179.361 0.000 37 | 1nz9 A 19 A L -79.136 39.692 179.336 0.000 38 | 1nz9 A 20 D L -160.702 34.873 -179.986 0.000 39 | 40 | position: 16 neighbors: 1 41 | 42 | 1nz9 A 16 G L 77.366 -168.013 179.593 0.000 43 | 1nz9 A 17 P L -60.374 -19.016 179.933 0.000 44 | 1nz9 A 18 F L -95.855 13.091 -179.361 0.000 45 | 1nz9 A 19 A L -79.136 39.692 179.336 0.000 46 | 1nz9 A 20 D L -160.702 34.873 -179.986 0.000 47 | 1nz9 A 21 F L -148.003 167.131 179.888 0.000 48 | 1nz9 A 22 T L -127.158 -173.979 -179.963 0.000 49 | 1nz9 A 23 G L -150.426 164.811 -179.999 0.000 50 | 1nz9 A 24 T L -103.992 108.474 -179.996 0.000 51 | 1nz9 A 25 V L -57.418 113.884 179.985 0.000 52 | 53 | position: 21 neighbors: 1 54 | 55 | 1nz9 A 21 F L -148.003 167.131 179.888 0.000 56 | 1nz9 A 22 T L -127.158 -173.979 -179.963 0.000 57 | 1nz9 A 23 G L -150.426 164.811 -179.999 0.000 58 | 1nz9 A 24 T L -103.992 108.474 -179.996 0.000 59 | 1nz9 A 25 V L -57.418 113.884 179.985 0.000 60 | 1nz9 A 26 T L -98.876 -33.916 179.895 0.000 61 | 1nz9 A 27 E L -153.483 158.479 179.975 0.000 62 | 1nz9 A 28 I L -129.880 158.690 -179.997 0.000 63 | 1nz9 A 29 N L -135.295 86.706 179.938 0.000 64 | 1nz9 A 30 P L -71.694 -34.126 -179.928 0.000 65 | 66 | position: 26 neighbors: 1 67 | 68 | 1nz9 A 26 T L -98.876 -33.916 179.895 0.000 69 | 1nz9 A 27 E L -153.483 158.479 179.975 0.000 70 | 1nz9 A 28 I L -129.880 158.690 -179.997 0.000 71 | 1nz9 A 29 N L -135.295 86.706 179.938 0.000 72 | 1nz9 A 30 P L -71.694 -34.126 -179.928 0.000 73 | 1nz9 A 31 E L -58.984 -33.552 -179.959 0.000 74 | 1nz9 A 32 R L -110.233 -61.226 -179.937 0.000 75 | 1nz9 A 33 G L 127.040 42.732 -179.996 0.000 76 | 1nz9 A 34 K L -115.588 170.344 179.961 0.000 77 | 1nz9 A 35 V L -155.367 137.079 179.998 0.000 78 | 79 | position: 31 neighbors: 1 80 | 81 | 1nz9 A 31 E L -58.984 -33.552 -179.959 0.000 82 | 1nz9 A 32 R L -110.233 -61.226 -179.937 0.000 83 | 1nz9 A 33 G L 127.040 42.732 -179.996 0.000 84 | 1nz9 A 34 K L -115.588 170.344 179.961 0.000 85 | 1nz9 A 35 V L -155.367 137.079 179.998 0.000 86 | 1nz9 A 36 K L -114.688 128.253 -179.986 0.000 87 | 1nz9 A 37 V L -139.876 142.506 179.883 0.000 88 | 1nz9 A 38 M L -89.925 104.020 -179.791 0.000 89 | 1nz9 A 39 V L -91.762 171.108 179.934 0.000 90 | 1nz9 A 40 T L -132.415 115.344 -179.976 0.000 91 | 92 | position: 36 neighbors: 1 93 | 94 | 1nz9 A 36 K L -114.688 128.253 -179.986 0.000 95 | 1nz9 A 37 V L -139.876 142.506 179.883 0.000 96 | 1nz9 A 38 M L -89.925 104.020 -179.791 0.000 97 | 1nz9 A 39 V L -91.762 171.108 179.934 0.000 98 | 1nz9 A 40 T L -132.415 115.344 -179.976 0.000 99 | 1nz9 A 41 I L -120.233 133.825 179.963 0.000 100 | 1nz9 A 42 F L 61.736 30.418 179.965 0.000 101 | 1nz9 A 43 G L 70.276 45.256 -179.931 0.000 102 | 1nz9 A 44 R L -159.979 138.860 179.979 0.000 103 | 1nz9 A 45 E L -60.803 151.942 -179.994 0.000 104 | 105 | position: 41 neighbors: 1 106 | 107 | 1nz9 A 41 I L -120.233 133.825 179.963 0.000 108 | 1nz9 A 42 F L 61.736 30.418 179.965 0.000 109 | 1nz9 A 43 G L 70.276 45.256 -179.931 0.000 110 | 1nz9 A 44 R L -159.979 138.860 179.979 0.000 111 | 1nz9 A 45 E L -60.803 151.942 -179.994 0.000 112 | 1nz9 A 46 T L -160.015 91.165 179.949 0.000 113 | 1nz9 A 47 P L -68.372 117.453 -179.998 0.000 114 | 1nz9 A 48 V L -132.835 154.984 179.741 0.000 115 | 1nz9 A 49 E L -98.797 138.100 -179.796 0.000 116 | 1nz9 A 50 L L -144.335 -162.203 178.424 0.000 117 | 118 | position: 46 neighbors: 1 119 | 120 | 1nz9 A 46 T L -160.015 91.165 179.949 0.000 121 | 1nz9 A 47 P L -68.372 117.453 -179.998 0.000 122 | 1nz9 A 48 V L -132.835 154.984 179.741 0.000 123 | 1nz9 A 49 E L -98.797 138.100 -179.796 0.000 124 | 1nz9 A 50 L L -144.335 -162.203 178.424 0.000 125 | 1nz9 A 51 D L -153.110 156.707 -179.714 0.000 126 | 1nz9 A 52 F L -45.858 -29.377 -179.493 0.000 127 | 1nz9 A 53 S L -76.012 -3.453 -179.846 0.000 128 | 1nz9 A 54 Q L -131.768 15.076 179.869 0.000 129 | 1nz9 A 55 V L -142.001 170.027 -179.926 0.000 130 | 131 | position: 51 neighbors: 1 132 | 133 | 1nz9 A 51 D L -153.110 156.707 -179.714 0.000 134 | 1nz9 A 52 F L -45.858 -29.377 -179.493 0.000 135 | 1nz9 A 53 S L -76.012 -3.453 -179.846 0.000 136 | 1nz9 A 54 Q L -131.768 15.076 179.869 0.000 137 | 1nz9 A 55 V L -142.001 170.027 -179.926 0.000 138 | 1nz9 A 56 V L -142.412 146.709 -179.953 0.000 139 | 1nz9 A 57 K L -83.362 131.334 -179.997 0.000 140 | 1nz9 A 58 A L -125.576 0.000 0.000 0.000 141 | 142 | position: 56 neighbors: 1 143 | 144 | 1nz9 A 56 V L -142.412 146.709 -179.953 0.000 145 | 1nz9 A 57 K L -83.362 131.334 -179.997 0.000 146 | 1nz9 A 58 A L -125.576 0.000 0.000 0.000 147 | 148 | -------------------------------------------------------------------------------- /csb/test/data/2JZC.sum: -------------------------------------------------------------------------------- 1 | 2 | +----------<<< P R O C H E C K S U M M A R Y >>>----------+ 3 | | | 4 | | 2JZC.pdb 1.5 201 residues | 5 | | | 6 | *| Ramachandran plot: 69.5% core 22.6% allow 5.6% gener 2.3% disall | 7 | | | 8 | *| All Ramachandrans: 37 labelled residues (out of 199) | 9 | *| Chi1-chi2 plots: 9 labelled residues (out of 123) | 10 | +| Main-chain params: 4 better 0 inside 2 worse | 11 | | Side-chain params: 5 better 0 inside 0 worse | 12 | | | 13 | *| Residue properties: Max.deviation: 42.0 Bad contacts: 5581 | 14 | +| Bond len/angle: 3.3 Morris et al class: 2 2 2 | 15 | | | 16 | | G-factors Dihedrals: -0.10 Covalent: 0.51 Overall: 0.14 | 17 | | | 18 | | M/c bond lengths: 99.1% within limits 0.9% highlighted | 19 | | M/c bond angles: 99.9% within limits 0.1% highlighted | 20 | | Planar groups: 100.0% within limits 0.0% highlighted | 21 | | | 22 | +----------------------------------------------------------------------------+ 23 | + May be worth investigating further. * Worth investigating further. 24 | 25 | -------------------------------------------------------------------------------- /csb/test/data/2l01.v2.str: -------------------------------------------------------------------------------- 1 | save_assigned_chem_shift_list 2 | save_ 3 | 4 | save_assigned_chem_shift_list_1 5 | 6 | loop_ 7 | _Atom_shift_assign_ID 8 | _Residue_author_seq_code 9 | _Residue_seq_code 10 | _Residue_label 11 | _Atom_name 12 | _Atom_type 13 | _Chem_shift_value 14 | _Chem_shift_value_error 15 | _Chem_shift_ambiguity_code 16 | 17 | 1 1 1 MET HA H 3.977 0.020 1 18 | 2 1 1 MET HB2 H 2.092 0.020 1 19 | 3 1 1 MET HB3 H 2.092 0.020 1 20 | 4 1 1 MET HE H 2.111 0.020 1 21 | 5 1 1 MET HG2 H 2.580 0.020 1 22 | 6 1 1 MET HG3 H 2.580 0.020 1 23 | 7 1 1 MET CA C 55.300 0.200 1 24 | 8 1 1 MET CB C 33.840 0.200 1 25 | 9 1 1 MET CE C 16.841 0.200 1 26 | 10 1 1 MET CG C 30.975 0.200 1 27 | 11 2 2 LYS HA H 4.423 0.020 1 28 | 29 | stop_ 30 | 31 | save_ 32 | -------------------------------------------------------------------------------- /csb/test/data/2l01.v3.str: -------------------------------------------------------------------------------- 1 | save_assigned_chem_shift_list 2 | save_ 3 | 4 | save_assigned_chem_shift_list_1 5 | 6 | loop_ 7 | _Atom_chem_shift.ID 8 | _Atom_chem_shift.Assembly_atom_ID 9 | _Atom_chem_shift.Entity_assembly_ID 10 | _Atom_chem_shift.Entity_ID 11 | _Atom_chem_shift.Comp_index_ID 12 | _Atom_chem_shift.Seq_ID 13 | _Atom_chem_shift.Comp_ID 14 | _Atom_chem_shift.Atom_ID 15 | _Atom_chem_shift.Atom_type 16 | _Atom_chem_shift.Atom_isotope_number 17 | _Atom_chem_shift.Val 18 | _Atom_chem_shift.Val_err 19 | _Atom_chem_shift.Assign_fig_of_merit 20 | _Atom_chem_shift.Ambiguity_code 21 | _Atom_chem_shift.Occupancy 22 | _Atom_chem_shift.Resonance_ID 23 | _Atom_chem_shift.Auth_entity_assembly_ID 24 | _Atom_chem_shift.Auth_seq_ID 25 | _Atom_chem_shift.Auth_comp_ID 26 | _Atom_chem_shift.Auth_atom_ID 27 | _Atom_chem_shift.Details 28 | _Atom_chem_shift.Entry_ID 29 | _Atom_chem_shift.Assigned_chem_shift_list_ID 30 | 31 | 1 . 1 1 1 1 MET HA H 1 3.977 0.020 . 1 . . . 1 MET HA . 17025 1 32 | 2 . 1 1 1 1 MET HB2 H 1 2.092 0.020 . 1 . . . 1 MET HB2 . 17025 1 33 | 3 . 1 1 1 1 MET HB3 H 1 2.092 0.020 . 1 . . . 1 MET HB3 . 17025 1 34 | 4 . 1 1 1 1 MET HE1 H 1 2.111 0.020 . 1 . . . 1 MET HE . 17025 1 35 | 5 . 1 1 1 1 MET HG2 H 1 2.580 0.020 . 1 . . . 1 MET HG2 . 17025 1 36 | 6 . 1 1 1 1 MET HG3 H 1 2.580 0.020 . 1 . . . 1 MET HG3 . 17025 1 37 | 7 . 1 1 1 1 MET CA C 13 55.300 0.200 . 1 . . . 1 MET CA . 17025 1 38 | 8 . 1 1 1 1 MET CB C 13 33.840 0.200 . 1 . . . 1 MET CB . 17025 1 39 | 9 . 1 1 1 1 MET CE C 13 16.841 0.200 . 1 . . . 1 MET CE . 17025 1 40 | 10 . 1 1 1 1 MET CG C 13 30.975 0.200 . 1 . . . 1 MET CG . 17025 1 41 | 12 . 1 1 2 2 LYS HA H 1 4.423 0.020 . 1 . . . 2 LYS HA . 17025 1 42 | 43 | stop_ 44 | 45 | save_ 46 | -------------------------------------------------------------------------------- /csb/test/data/Sparky.peaks: -------------------------------------------------------------------------------- 1 | Assignment w1 w2 w3 Data Height Note 2 | 3 | ?-?-? 3.418 114.437 7.440 157921 4 | ?-?-? 0.972 114.476 7.443 204746 5 | ?-?-? 1.147 114.481 7.445 147454 6 | -------------------------------------------------------------------------------- /csb/test/data/Xeasy1.peaks: -------------------------------------------------------------------------------- 1 | # Number of dimensions 3 2 | #INAME 1 H1 3 | #INAME 2 C2 4 | #INAME 3 H3 5 | #CYANAFORMAT hCH 6 | 1 7.050 10.374 0.889 2 U 1.565890e+05 0.00e+00 m 0 0 0 0 0 7 | 2 8.921 10.397 0.892 2 U 1.291120e+05 0.00e+00 m 0 0 0 0 0 8 | 3 2.307 10.430 0.891 2 U 4.243830e+05 0.00e+00 m 0 0 0 0 0 9 | 10 | -------------------------------------------------------------------------------- /csb/test/data/Xeasy2.peaks: -------------------------------------------------------------------------------- 1 | # Number of dimensions 3 2 | #INAME 1 H1 3 | #INAME 2 2C 4 | #INAME 3 3H 5 | 1 7.050 10.374 0.889 2 U 1.565890e+05 0.00e+00 m 0 0 0 0 0 6 | 2 8.921 10.397 0.892 2 U 1.291120e+05 0.00e+00 m 0 0 0 0 0 7 | 3 2.307 10.430 0.891 2 U 4.243830e+05 0.00e+00 m 0 0 0 0 0 8 | 9 | -------------------------------------------------------------------------------- /csb/test/data/csb.tsv: -------------------------------------------------------------------------------- 1 | # @TSV ID:int A:float B:str 2 | 11 11.1 Row eleven 3 | 12 12.2 Row twelve 4 | 13 Row thirteen 5 | -------------------------------------------------------------------------------- /csb/test/data/d1nz0a_.a3m: -------------------------------------------------------------------------------- 1 | >d1nz0a_ d.14.1.2 (A:) RNase P protein {Thermotoga maritima [TaxId: 2336]} 2 | ERLRLRRDFLLIFKEGKSLQNEYFVVLFRKNGMDYSRLGIVVKRKFGKATRRNKLKRWVREIFRRNKGVIPKGFDIVVIPRKKLSEEFERVDFWTVREKLLNLLKRIEG 3 | >gi|108802371|ref|YP_642568.1|(7-116:118) ribonuclease P [Mycobacterium sp. MCS] gi|119866064|ref|YP_936016.1| ribonuclease P [Mycobacterium sp. KMS] gi|126438351|ref|YP_001074042.1| ribonuclease P [Mycobacterium sp. JLS] gi|123177783|sp|Q1B0S2.1|RNPA_MYCSS RecName: Full=Ribonuclease P protein component; Short=RNaseP protein; Short=RNase P protein; AltName: Full=Protein C5 gi|166226724|sp|A3Q8S4.1|RNPA_MYCSJ RecName: Full=Ribonuclease P protein component; Short=RNaseP protein; Short=RNase P protein; AltName: Full=Protein C5 gi|166226725|sp|A1U8R8.1|RNPA_MYCSK RecName: Full=Ribonuclease P protein component; Short=RNaseP protein; Short=RNase P protein; AltName: Full=Protein C5 gi|108772790|gb|ABG11512.1| ribonuclease P protein component [Mycobacterium sp. MCS] gi|119692153|gb|ABL89226.1| ribonuclease P protein component [Mycobacterium sp. KMS] gi|126238151|gb|ABO01552.1| ribonuclease P protein component [Mycobacterium sp. JLS] E=3e-08 s/c=0.55 id=17% cov=100% 4 | -RMTRSTEFSTTVSKGVRSAQPDLVLHMANvlDDPSGPRVGLVVAKSVGNAVVRHRVSRRLRHSVHPMLDELQPGHRLVIRALPGAASATSARLHQELSAALRRARPRVEA 5 | >gi|227373914|ref|ZP_03857386.1|(6-99:111) ribonuclease P protein component [Thermobaculum terrenum ATCC BAA-798] gi|227062537|gb|EEI01571.1| ribonuclease P protein component [Thermobaculum terrenum ATCC BAA-798] E=3e-06 s/c=0.57 id=23% cov=87% 6 | -RLTSSKDWKEVRTRGRCSRSSFATICVLFEGESE-KFGFAAAKSIGSVAKRNRAKRRLREAFRQTYKFGSKPCLVIAIA----GPECLTMDFQELKSKL--------- 7 | >gi|124010240|ref|ZP_01694895.1|(9-122:122) ribonuclease P protein component [Microscilla marina ATCC 23134] gi|123983732|gb|EAY24164.1| ribonuclease P protein component [Microscilla marina ATCC 23134] E=8e-05 s/c=0.43 id=24% cov=99% 8 | ERLKSKKIIQSLFPKGKDAFVYPIKvkyILHPTPSNTPPQVLFTVPKRtFKRAVDRNAIKRLLKEAYRLNKHLLhdeAGSYKIAYIAFVYIAK--EKLPFDTIERKTISVFERLKG 9 | >gi|139352214|gb|ECE59672.1|(37-150:150) hypothetical protein GOS_6065400 [marine metagenome] gi|142774203|gb|EDA48250.1| hypothetical protein GOS_1993299 [marine metagenome] gi|139024765|gb|ECC88500.1| hypothetical protein GOS_5642689 [marine metagenome] gi|139647524|gb|ECG49761.1| hypothetical protein GOS_5517516 [marine metagenome] E=0.0002 s/c=0.42 id=21% cov=96% 10 | ESLKKSSHFGTVLKN-RVINNDFYTIYRKKNfikkasNEKKLYISFVMKKKVGNAVKRNRIKRKLKgvvQKMLKINNSINLNYTYVIFGKEKIYSEHSNSLFKNMEKSFNKINK---- 11 | >gi|137813163|gb|EBW14305.1|(5-114:118) hypothetical protein GOS_6793674 [marine metagenome] gi|143750626|gb|EDG59861.1| hypothetical protein GOS_754256 [marine metagenome] E=2e-12 s/c=0.68 id=24% cov=99% 12 | KRMTKRGDFLRAQQGNIKYITSSVVIQLIPNDIQgkfSTRVGFTASKKIGNAVKRNYAKRLMRSLVYRQSNELASSFDYVFIARQAILNKKFYLIESEIMRVLKHFNKNI-- 13 | >gi|148654187|ref|YP_001281280.1|(10-116:130) ribonuclease P protein component [Psychrobacter sp. PRwf-1] gi|229470482|sp|A5WI39.1|RNPA_PSYWF RecName: Full=Ribonuclease P protein component; Short=RNaseP protein; Short=RNase P protein; AltName: Full=Protein C5 gi|148573271|gb|ABQ95330.1| ribonuclease P protein component [Psychrobacter sp. PRwf-1] E=1e-11 s/c=0.67 id=24% cov=97% 14 | KRLLKPAEFKPVFNQPlFKVHQTHFMAFAYDSDHLQARLGMAItKKKIPTAVARNTIKRIIREQFRHTHAQLPA-LDVVFILKKSTKALSNEQMRQEISDILSKVISK--- 15 | >gi|142801636|gb|EDA68688.1|(13-118:120) hypothetical protein GOS_1956086 [marine metagenome] E=4e-05 s/c=0.48 id=23% cov=95% 16 | --LKVNSSTIKILNNKPVYNSKILKLYTIPNSEDGPRLAIQITKRaIRLAVTRNLVRRKIKEDFRANYAEIAKHDCLLVISSKisSAKHEISDILMQEWKQSLKSLEK---- 17 | >gi|143373151|gb|EDE62902.1|(90-193:197) hypothetical protein GOS_1097530 [marine metagenome] E=0.0003 s/c=0.46 id=21% cov=95% 18 | -RLSRSHEFQRLRREGTRVRSGYLwCVMLQDPSLPGPAVAFAIGRPFGSAVRRNRLRRQLRSILSDRESAMGGG--MFLIGVNNPHRDLPMPSFAQLTHDIDEILNK--- -------------------------------------------------------------------------------- /csb/test/data/d1nz0a_.mfasta: -------------------------------------------------------------------------------- 1 | >d1nz0a_ d.14.1.2 (A:) RNase P protein {Thermotoga maritima [TaxId: 2336]} 2 | ERLRLRRDFLLIFKEG-KSLQNEYF-V---VLFRK--N------GMD---YSRLGIVV-KRK-FGKATRRNKLKRWVR---EIFRRNKGVI---PKGFDIVVIPRK--KLSEEFERVDFWTVREKLLNLLKRIEG 3 | >gi|108802371|ref|YP_642568.1|(7-116:118) ribonuclease P [Mycobacterium sp. MCS] gi|119866064|ref|YP_936016.1| ribonuclease P [Mycobacterium sp. KMS] gi|126438351|ref|YP_001074042.1| ribonuclease P [Mycobacterium sp. JLS] gi|123177783|sp|Q1B0S2.1|RNPA_MYCSS RecName: Full=Ribonuclease P protein component; Short=RNaseP protein; Short=RNase P protein; AltName: Full=Protein C5 gi|166226724|sp|A3Q8S4.1|RNPA_MYCSJ RecName: Full=Ribonuclease P protein component; Short=RNaseP protein; Short=RNase P protein; AltName: Full=Protein C5 gi|166226725|sp|A1U8R8.1|RNPA_MYCSK RecName: Full=Ribonuclease P protein component; Short=RNaseP protein; Short=RNase P protein; AltName: Full=Protein C5 gi|108772790|gb|ABG11512.1| ribonuclease P protein component [Mycobacterium sp. MCS] gi|119692153|gb|ABL89226.1| ribonuclease P protein component [Mycobacterium sp. KMS] gi|126238151|gb|ABO01552.1| ribonuclease P protein component [Mycobacterium sp. JLS] E=3e-08 s/c=0.55 id=17% cov=100% 4 | -RMTRSTEFSTTVSKG-VRSAQPDL-V---LHMANVLD------DPS---GPRVGLVV-AKS-VGNAVVRHRVSRRLR---HSVHPMLDEL---QPGHRLVIRALP--GAASATSARLHQELSAALRRARPRVEA 5 | >gi|227373914|ref|ZP_03857386.1|(6-99:111) ribonuclease P protein component [Thermobaculum terrenum ATCC BAA-798] gi|227062537|gb|EEI01571.1| ribonuclease P protein component [Thermobaculum terrenum ATCC BAA-798] E=3e-06 s/c=0.57 id=23% cov=87% 6 | -RLTSSKDWKEVRTRG-RCSRSSFA-T---ICVLF--E------GES---E-KFGFAA-AKS-IGSVAKRNRAKRRLR---EAFRQTYKFG---SKPCLVIAIA------GPECLTMDFQELKSKL--------- 7 | >gi|124010240|ref|ZP_01694895.1|(9-122:122) ribonuclease P protein component [Microscilla marina ATCC 23134] gi|123983732|gb|EAY24164.1| ribonuclease P protein component [Microscilla marina ATCC 23134] E=8e-05 s/c=0.43 id=24% cov=99% 8 | ERLKSKKIIQSLFPKG-KDAFVYPI-KVKYILHPT--P------SNT---PPQVLFTV-PKRTFKRAVDRNAIKRLLK---EAYRLNKHLLHDEAGSYKIAYIAFV--YIAK--EKLPFDTIERKTISVFERLKG 9 | >gi|139352214|gb|ECE59672.1|(37-150:150) hypothetical protein GOS_6065400 [marine metagenome] gi|142774203|gb|EDA48250.1| hypothetical protein GOS_1993299 [marine metagenome] gi|139024765|gb|ECC88500.1| hypothetical protein GOS_5642689 [marine metagenome] gi|139647524|gb|ECG49761.1| hypothetical protein GOS_5517516 [marine metagenome] E=0.0002 s/c=0.42 id=21% cov=96% 10 | ESLKKSSHFGTVLKN--RVINNDFY-T---IYRKK--NFIKKASNEK---KLYISFVM-KKK-VGNAVKRNRIKRKLKGVVQKMLKINNSI---NLNYTYVIFGKE--KIYSEHSNSLFKNMEKSFNKINK---- 11 | >gi|137813163|gb|EBW14305.1|(5-114:118) hypothetical protein GOS_6793674 [marine metagenome] gi|143750626|gb|EDG59861.1| hypothetical protein GOS_754256 [marine metagenome] E=2e-12 s/c=0.68 id=24% cov=99% 12 | KRMTKRGDFLRAQQGN-IKYITSSV-V---IQLIP--N------DIQGKFSTRVGFTA-SKK-IGNAVKRNYAKRLMR---SLVYRQSNEL---ASSFDYVFIARQ--AILNKKFYLIESEIMRVLKHFNKNI-- 13 | >gi|148654187|ref|YP_001281280.1|(10-116:130) ribonuclease P protein component [Psychrobacter sp. PRwf-1] gi|229470482|sp|A5WI39.1|RNPA_PSYWF RecName: Full=Ribonuclease P protein component; Short=RNaseP protein; Short=RNase P protein; AltName: Full=Protein C5 gi|148573271|gb|ABQ95330.1| ribonuclease P protein component [Psychrobacter sp. PRwf-1] E=1e-11 s/c=0.67 id=24% cov=97% 14 | KRLLKPAEFKPVFNQPLFKVHQTHF-M---AFAYD--S------DHL---QARLGMAITKKK-IPTAVARNTIKRIIR---EQFRHTHAQL---PA-LDVVFILKK--STKALSNEQMRQEISDILSKVISK--- 15 | >gi|142801636|gb|EDA68688.1|(13-118:120) hypothetical protein GOS_1956086 [marine metagenome] E=4e-05 s/c=0.48 id=23% cov=95% 16 | --LKVNSSTIKILNNK-PVYNSKIL-K---LYTIP--N------SED---GPRLAIQI-TKRAIRLAVTRNLVRRKIK---EDFRANYAEI---AKHDCLLVISSKISSAKHEISDILMQEWKQSLKSLEK---- 17 | >gi|143373151|gb|EDE62902.1|(90-193:197) hypothetical protein GOS_1097530 [marine metagenome] E=0.0003 s/c=0.46 id=21% cov=95% 18 | -RLSRSHEFQRLRREG-TRVRSGYLWC---VMLQD--P------SLP---GPAVAFAI-GRP-FGSAVRRNRLRRQLR---SILSDRESAM---GGG--MFLIGVN--NPHRDLPMPSFAQLTHDIDEILNK--- -------------------------------------------------------------------------------- /csb/test/data/mapping.pdb: -------------------------------------------------------------------------------- 1 | HEADER RIBOSOME 30-MAR-01 1GIY 2 | COMPND MOL_ID: 1; 3 | COMPND 2 MOLECULE: 50S RIBOSOMAL PROTEIN L3; 4 | COMPND 3 CHAIN: E; 5 | SEQRES 1 E 338 LEU VAL ASN ASP GLU PRO ASN SER PRO ARG GLU GLY MET 6 | SEQRES 2 E 338 GLU GLU THR VAL PRO VAL THR VAL ILE GLU THR PRO PRO 7 | ATOM 3430 CA MET E 65 -35.315 183.547 344.254 1.00 0.00 C 8 | ATOM 3431 CA GLU E 66 -31.330 184.145 343.173 1.00 0.00 C 9 | ATOM 3432 CA THR E 67 -27.574 184.326 344.054 1.00 0.00 C 10 | ATOM 3433 CA VAL E 68 -25.637 187.772 343.919 1.00 0.00 C 11 | TER 3634 VAL E 68 12 | END 13 | -------------------------------------------------------------------------------- /csb/test/data/mapping2.pdb: -------------------------------------------------------------------------------- 1 | HEADER RIBOSOME 30-MAR-01 1GIY 2 | COMPND MOL_ID: 1; 3 | COMPND 2 MOLECULE: 50S RIBOSOMAL PROTEIN L3; 4 | COMPND 3 CHAIN: E; 5 | SEQRES 1 E 338 LEU VAL ASN ASP GLU PRO ASN SER PRO ARG GLU GLY MET 6 | SEQRES 2 E 338 GLU THR VAL PRO VAL THR VAL ILE GLU THR PRO PRO 7 | ATOM 3430 CA MET E 65 -35.315 183.547 344.254 1.00 0.00 C 8 | ATOM 3433 CA VAL E 68 -25.637 187.772 343.919 1.00 0.00 C 9 | TER 3634 VAL E 68 10 | END 11 | -------------------------------------------------------------------------------- /csb/test/data/mapping3.pdb: -------------------------------------------------------------------------------- 1 | HEADER RIBOSOME 30-MAR-01 1GIY 2 | COMPND MOL_ID: 1; 3 | COMPND 2 MOLECULE: 50S RIBOSOMAL PROTEIN L3; 4 | COMPND 3 CHAIN: E; 5 | SEQRES 1 E 338 LEU VAL ASN ASP GLU PRO ASN 6 | ATOM 3430 CA SER E 65 -35.315 183.547 344.254 1.00 0.00 C 7 | ATOM 3433 CA GLY E 68 -25.637 187.772 343.919 1.00 0.00 C 8 | TER 3634 GLY E 68 9 | END 10 | -------------------------------------------------------------------------------- /csb/test/data/maxent.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csb-toolbox/CSB/1a858c9a8bbb5e528b06dc0ffb67cf151489413b/csb/test/data/maxent.pickle -------------------------------------------------------------------------------- /csb/test/data/modified.pdb: -------------------------------------------------------------------------------- 1 | HEADER . 12-Mar-13 TEST 2 | COMPND 1 MOL_ID: 1; 3 | COMPND 2 MOLECULE: HYPOTHETICAL PROTEIN RV0983; 4 | COMPND 3 CHAIN: A; 5 | SEQRES 1 A 20 MET PRO PRO GLY SER VAL GLU GLN VAL ALA ALA LYS VAL 6 | SEQRES 2 A 20 VAL PRO SER VAL VAL MSE 7 | ATOM 95 N MSE A 21 55.075 23.677 19.139 1.00 33.35 N 8 | ATOM 96 CA MSE A 21 54.672 23.803 17.741 1.00 37.62 C 9 | ATOM 97 C MSE A 21 54.539 22.403 17.111 1.00 35.23 C 10 | ATOM 98 O MSE A 21 55.344 21.485 17.419 1.00 34.50 O 11 | ATOM 99 CB MSE A 21 55.662 24.626 16.915 1.00 34.96 C 12 | ATOM 100 CG MSE A 21 55.211 24.834 15.458 1.00 34.65 C 13 | ATOM 101 SE MSE A 21 56.402 26.367 14.841 1.00 52.51 Se 14 | ATOM 102 CE MSE A 21 56.143 26.398 12.634 1.00 50.46 C 15 | TER 16 | END 17 | -------------------------------------------------------------------------------- /csb/test/data/modified2.pdb: -------------------------------------------------------------------------------- 1 | HEADER . 12-Mar-13 TEST 2 | COMPND 1 MOL_ID: 1; 3 | COMPND 2 MOLECULE: HYPOTHETICAL PROTEIN RV0983; 4 | COMPND 3 CHAIN: A; 5 | SEQRES 1 A 20 MSE PRO PRO GLY SER VAL GLU GLN VAL ALA ALA LYS VAL 6 | SEQRES 2 A 20 VAL PRO SER VAL VAL MET 7 | ATOM 95 N MSE A 21 55.075 23.677 19.139 1.00 33.35 N 8 | ATOM 96 CA MSE A 21 54.672 23.803 17.741 1.00 37.62 C 9 | ATOM 97 C MSE A 21 54.539 22.403 17.111 1.00 35.23 C 10 | ATOM 98 O MSE A 21 55.344 21.485 17.419 1.00 34.50 O 11 | ATOM 99 CB MSE A 21 55.662 24.626 16.915 1.00 34.96 C 12 | ATOM 100 CG MSE A 21 55.211 24.834 15.458 1.00 34.65 C 13 | ATOM 101 SE MSE A 21 56.402 26.367 14.841 1.00 52.51 Se 14 | ATOM 102 CE MSE A 21 56.143 26.398 12.634 1.00 50.46 C 15 | TER 16 | END 17 | -------------------------------------------------------------------------------- /csb/test/data/standard.tsv: -------------------------------------------------------------------------------- 1 | 11 11.1 Row eleven 2 | 12 12.2 Row twelve 3 | 13 13.3 Row thirteen 4 | -------------------------------------------------------------------------------- /csb/test/data/struct.ali.mfasta: -------------------------------------------------------------------------------- 1 | >3p1uB 2 | ENPDKPTDDV-NYNMNEPRLASTLR--G 3 | >1d3zA 4 | EV-EPS-DTIENVK------AKIQDKEG 5 | -------------------------------------------------------------------------------- /csb/test/data/test.fa: -------------------------------------------------------------------------------- 1 | >gi|148654187 ribonuclease P protein component 2 | KRLLKPAEFKPVFNQPlFKVHQTHFMAFAYDSDHLQARLGMAItKKKIPTAVARNTIKRIIREQFRHTHAQLPALDVVF 3 | ILKKSTKALSNEQMRQEISDILSKVISK 4 | 5 | >gi|142801636|gb|EDA68688.1 hypothetical protein GOS_1956086 6 | LKVNSSTIKILNNKPVYNSKILKLYTIPNSEDGPRLAIQITKRaIRLAVTRNLVRRKIKEDFRANYAEIAKHDCLLVIS 7 | SKisSAKHEISDILMQEWKQSLKSLEK 8 | 9 | >gi|143373151 10 | RLSRSHEFQRLRREGTRVRSGYLwCVMLQDPSLPGPAVAFAIGRPFGSAVRRNRLRRQLRSILSDRESAMGGGMFLIGV 11 | NNPHRDLPMPSFAQLTHDIDEILNK -------------------------------------------------------------------------------- /csb/test/data/test.hhm: -------------------------------------------------------------------------------- 1 | HHsearch 1.5 2 | NAME name 3 | FAM fam 4 | LENG 2 match states, 2 columns in multiple alignment 5 | NEFF 10 6 | PCT False 7 | SEQ 8 | # 9 | NULL 5346 8263 5863 10 | HMM A C E 11 | M->M M->I M->D I->M I->I D->M D->D Neff Neff_I Neff_D 12 | 321 3321 3321 * * * * * * * 13 | A 1 0 1000 1584 1 14 | 321 3321 3321 1000 1000 1000 1000 * * * 15 | 16 | C 2 0 1000 1584 2 17 | 152 3321 * 1000 1000 0 * * * * 18 | 19 | // -------------------------------------------------------------------------------- /csb/test/data/test2.hhm: -------------------------------------------------------------------------------- 1 | HHsearch 1.5 2 | NAME name 3 | FAM fam 4 | LENG 2 match states, 2 columns in multiple alignment 5 | NEFF 0.0 6 | PCT False 7 | SEQ 8 | # 9 | NULL 5346 8263 5863 10 | HMM A C E 11 | M->M M->I M->D I->M I->I D->M D->D Neff Neff_I Neff_D 12 | 0 * 0 * * * * * * * 13 | A 1 0 1000 1584 1 14 | 321 3321 3321 1000 1000 1000 1000 * * * 15 | 16 | C 2 0 1000 1584 2 17 | 0 * * 0 * 0 * * * * 18 | 19 | // -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | matplotlib 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from setuptools import setup, find_packages 4 | from csb.build import ROOT 5 | from io import open 6 | 7 | try: 8 | __doc__ = open('README.rst', encoding="utf-8").read() 9 | except IOError: 10 | __doc__ = "" 11 | 12 | 13 | NAME = ROOT 14 | AUTHOR = "Michael Habeck et al." 15 | EMAIL = "ivan.kalev@gmail.com" 16 | URL = "http://github.com/csb-toolbox" 17 | SUMMARY = "Computational Structural Biology Toolbox" 18 | DESCRIPTION = __doc__ 19 | LICENSE = 'MIT' 20 | 21 | REQUIREMENTS = open("requirements.txt", encoding="utf-8").readlines() 22 | DEV_REQUIREMENTS = ["setuptools"] 23 | 24 | v = {} 25 | exec(open(ROOT + "/__init__.py", encoding="utf-8").read(), v) 26 | VERSION = v["Version"]() 27 | 28 | 29 | def build(): 30 | 31 | return setup( 32 | name=NAME, 33 | packages=find_packages(), 34 | include_package_data=True, 35 | version=VERSION.short, 36 | author=AUTHOR, 37 | author_email=EMAIL, 38 | url=URL, 39 | description=SUMMARY, 40 | long_description=DESCRIPTION, 41 | license=LICENSE, 42 | install_requires=REQUIREMENTS, 43 | tests_require=DEV_REQUIREMENTS, 44 | extras_require={ 45 | 'dev': DEV_REQUIREMENTS 46 | }, 47 | test_suite="csb.test.cases", 48 | entry_points={ 49 | 'console_scripts': [ 50 | 'csb-test = csb.test.app:main', 51 | 'csb-bfit = csb.apps.bfit:main', 52 | 'csb-bfite = csb.apps.bfite:main', 53 | 'csb-csfrag = csb.apps.csfrag:main', 54 | 'csb-hhfrag = csb.apps.hhfrag:main', 55 | 'csb-buildhmm = csb.apps.buildhmm:main', 56 | 'csb-hhsearch = csb.apps.hhsearch:main', 57 | 'csb-precision = csb.apps.precision:main', 58 | 'csb-promix = csb.apps.promix:main', 59 | 'csb-embd = csb.apps.embd:main' 60 | ] 61 | }, 62 | classifiers=( 63 | 'Development Status :: 5 - Production/Stable', 64 | 'Intended Audience :: Developers', 65 | 'Intended Audience :: Science/Research', 66 | 'License :: OSI Approved :: MIT License', 67 | 'Operating System :: OS Independent', 68 | 'Programming Language :: Python', 69 | 'Programming Language :: Python :: 3.6', 70 | 'Programming Language :: Python :: 3.7', 71 | 'Programming Language :: Python :: 3.8', 72 | 'Programming Language :: Python :: 3.9', 73 | 'Programming Language :: Python :: 3.10', 74 | 'Programming Language :: Python :: 3.11', 75 | 'Programming Language :: Python :: 3.12', 76 | 'Topic :: Scientific/Engineering', 77 | 'Topic :: Scientific/Engineering :: Bio-Informatics', 78 | 'Topic :: Scientific/Engineering :: Mathematics', 79 | 'Topic :: Scientific/Engineering :: Physics', 80 | 'Topic :: Software Development :: Libraries' 81 | ) 82 | ) 83 | 84 | 85 | if __name__ == '__main__': 86 | build() 87 | --------------------------------------------------------------------------------