├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── .travis.yml
├── INSTALL.rst
├── LICENSE.rst
├── MANIFEST.in
├── README.rst
├── csb
    ├── __init__.py
    ├── apps
    │   ├── __init__.py
    │   ├── bfit.py
    │   ├── bfite.py
    │   ├── buildhmm.py
    │   ├── csfrag.py
    │   ├── embd.py
    │   ├── helloworld.py
    │   ├── hhfrag.py
    │   ├── hhsearch.py
    │   ├── precision.py
    │   └── promix.py
    ├── bio
    │   ├── __init__.py
    │   ├── fragments
    │   │   ├── __init__.py
    │   │   └── rosetta.py
    │   ├── hmm
    │   │   ├── __init__.py
    │   │   └── pseudocounts.py
    │   ├── io
    │   │   ├── __init__.py
    │   │   ├── clans.py
    │   │   ├── cs.py
    │   │   ├── dssp.py
    │   │   ├── fasta.py
    │   │   ├── hhpred.py
    │   │   ├── mrc.py
    │   │   ├── noe.py
    │   │   ├── procheck.py
    │   │   ├── psipred.py
    │   │   ├── svg.py
    │   │   ├── vasco.py
    │   │   ├── whatif.py
    │   │   └── wwpdb.py
    │   ├── nmr
    │   │   ├── __init__.py
    │   │   └── resources
    │   │   │   ├── AtomConnectivity.xml
    │   │   │   ├── RandomCoil.Corrections.tsv
    │   │   │   └── RandomCoil.Reference.tsv
    │   ├── sequence
    │   │   ├── __init__.py
    │   │   └── alignment.py
    │   ├── structure
    │   │   └── __init__.py
    │   └── utils
    │   │   └── __init__.py
    ├── build.py
    ├── core
    │   └── __init__.py
    ├── io
    │   ├── __init__.py
    │   ├── plots.py
    │   └── tsv.py
    ├── numeric
    │   ├── __init__.py
    │   └── integrators.py
    ├── statistics
    │   ├── __init__.py
    │   ├── ars.py
    │   ├── maxent.py
    │   ├── mixtures.py
    │   ├── pdf
    │   │   ├── __init__.py
    │   │   └── parameterized.py
    │   ├── rand.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   └── mc
    │   │   │   ├── __init__.py
    │   │   │   ├── multichain.py
    │   │   │   ├── neqsteppropagator.py
    │   │   │   ├── propagators.py
    │   │   │   └── singlechain.py
    │   └── scalemixture.py
    ├── statmech
    │   ├── __init__.py
    │   ├── ensembles.py
    │   └── wham.py
    └── test
    │   ├── __init__.py
    │   ├── app.py
    │   ├── cases
    │       ├── __init__.py
    │       ├── bio
    │       │   ├── __init__.py
    │       │   ├── fragments
    │       │   │   └── __init__.py
    │       │   ├── hmm
    │       │   │   └── __init__.py
    │       │   ├── io
    │       │   │   ├── __init__.py
    │       │   │   ├── clans
    │       │   │   │   └── __init__.py
    │       │   │   ├── cs
    │       │   │   │   └── __init__.py
    │       │   │   ├── fasta
    │       │   │   │   └── __init__.py
    │       │   │   ├── hhpred
    │       │   │   │   └── __init__.py
    │       │   │   ├── mrc
    │       │   │   │   └── __init__.py
    │       │   │   ├── noe
    │       │   │   │   └── __init__.py
    │       │   │   ├── procheck
    │       │   │   │   └── __init__.py
    │       │   │   ├── whatif
    │       │   │   │   └── __init__.py
    │       │   │   └── wwpdb
    │       │   │   │   └── __init__.py
    │       │   ├── nmr
    │       │   │   └── __init__.py
    │       │   ├── sequence
    │       │   │   ├── __init__.py
    │       │   │   └── alignment
    │       │   │   │   └── __init__.py
    │       │   ├── structure
    │       │   │   └── __init__.py
    │       │   └── utils
    │       │   │   └── __init__.py
    │       ├── core
    │       │   └── __init__.py
    │       ├── io
    │       │   └── __init__.py
    │       ├── numeric
    │       │   ├── __init__.py
    │       │   └── integrators.py
    │       ├── statistics
    │       │   ├── __init__.py
    │       │   ├── ars.py
    │       │   ├── maxent.py
    │       │   ├── mixtures.py
    │       │   ├── pdf
    │       │   │   ├── __init__.py
    │       │   │   └── parameterized.py
    │       │   ├── rand.py
    │       │   ├── samplers
    │       │   │   └── __init__.py
    │       │   └── scalemixture.py
    │       └── statmech
    │       │   ├── __init__.py
    │       │   ├── ensembles.py
    │       │   └── wham.py
    │   └── data
    │       ├── 1C3W_10.mrc
    │       ├── 1d3z.legacy.pdb
    │       ├── 1d3z.regular.pdb
    │       ├── 1nz9.full.pickle
    │       ├── 1nz9.model1.pickle
    │       ├── 1nz9.pdb
    │       ├── 1nz9A.frags
    │       ├── 2JZC.sum
    │       ├── 2l01.v2.str
    │       ├── 2l01.v3.str
    │       ├── 3p1u.pdb
    │       ├── 3shm_ca.pdb
    │       ├── ISL5.1.isl
    │       ├── Sparky.peaks
    │       ├── Xeasy1.peaks
    │       ├── Xeasy2.peaks
    │       ├── ake-xray-ensemble-ca.pdb
    │       ├── csb.tsv
    │       ├── d1b24a2.hhm
    │       ├── d1ea0a1.hhr
    │       ├── d1nz0a_.a3m
    │       ├── d1nz0a_.hhm
    │       ├── d1nz0a_.mfasta
    │       ├── d1nz0a_.pdb
    │       ├── mapping.pdb
    │       ├── mapping2.pdb
    │       ├── mapping3.pdb
    │       ├── maxent.pickle
    │       ├── modified.pdb
    │       ├── modified2.pdb
    │       ├── out.clans
    │       ├── pdbout.txt
    │       ├── standard.tsv
    │       ├── struct.ali.mfasta
    │       ├── test.fa
    │       ├── test.hhm
    │       └── test2.hhm
├── epydoc.css
├── requirements.txt
└── setup.py


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ${{ matrix.os }}
 8 |     strategy:
 9 |       matrix:
10 |         include:
11 |           - os: ubuntu-20.04
12 |             python-version: "3.6"
13 |           - os: ubuntu-22.04
14 |             python-version: "3.12"
15 | 
16 |     steps:
17 |     - uses: actions/checkout@v4
18 |     - uses: actions/setup-python@v5
19 |       with:
20 |         python-version: ${{ matrix.python-version }}
21 |     - run: pip install --editable .[dev]
22 |     - run: python csb/build.py -o .
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # PyCharm
  2 | .idea/
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | .hypothesis/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | local_settings.py
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | 
 91 | # Spyder project settings
 92 | .spyderproject
 93 | .spyproject
 94 | 
 95 | # Rope project settings
 96 | .ropeproject
 97 | 
 98 | # mkdocs documentation
 99 | /site
100 | 
101 | # mypy
102 | .mypy_cache/
103 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | dist: trusty
 3 | python:
 4 | - '2.7'
 5 | - '3.6'
 6 | install:
 7 | - pip install --upgrade pip
 8 | - pip install --editable .[dev]
 9 | script:
10 | - python csb/build.py -o .
11 | deploy:
12 |   provider: releases
13 |   skip_cleanup: true
14 |   api_key:
15 |     secure: QE+OlJEfZm/u2SnFU40uRwRFrQrbevCRn1PyFGvKUze5JmUVBCJpWG5QJRIvCvkW5A34YF9kK802lWAHOnSk5luTh8N84gGEI+/vgX/cEE5DyNf3lCkoXJXCrcVIUcVCWnZ3S17gyWQsdFmUniAyrHGkOsU/HbkwwolCIAahmEKlP3vR2HGzHtnW2Q3XAoDNY/6BhxFlKdcmrstiWyzrh8mVFyF94/hBdGD0FEsUPye4vTN5z1iaLCtjiO2jzBsB3g3wvvPeRu/R/dxMVyBX3CG7hwKIVNH8vL/O8RE0wHHG0M3f77qw7I4GTg9xd6ODI3sjPgeEP3l4l2CK88XEAXamNo8xxfvUOGCTqxxUID3dU1ueut28vC/3iRCOqTJSzClFPfFAkDhZ1ReaTSxRuGpM86dQkQ9AVMWFc8lTzI1DdZlrCyStFySwYRGdZmPeq8o6tJDHel7OWObVh2UXBhQHjORgDskpxDtWuohS18A4NoWPC6j3Ct3PUPbCnSOzwXQCpHGE2ufGhHEAWNUgJHPK9yNCGRsaGcUnTlA/uU/Q02X7dEq1u2SdNs4f8FMEkOS/4DlCuHWvnmUvs6Zu4d7zk2+wpKb4cY5JM5eWTMH82r576Id8HH33nrOwZThJw5CJSrijx8flWMDfX0qPsbHCP91OXH2zDvD42/IamCA=
16 |   file_glob: true
17 |   file: "csb-*.*"
18 |   on:
19 |     tags: true
20 |     python: "2.7"
21 | 


--------------------------------------------------------------------------------
/INSTALL.rst:
--------------------------------------------------------------------------------
1 | Detailed installation instructions can be found here:
2 | 
3 | http://csb-toolbox.github.io/installation
4 | 


--------------------------------------------------------------------------------
/LICENSE.rst:
--------------------------------------------------------------------------------
1 | MIT
2 | Copyright (c) 2012 Michael Habeck
3 | 
4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
5 | 
6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
7 | 
8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include csb/bio/nmr/resources *.*
2 | recursive-include csb/test/data *.*
3 | recursive-include docs *.*
4 | include README.rst INSTALL.rst LICENSE.rst
5 | include requirements.txt
6 | 
7 | recursive-exclude csb/test/data *.pickle


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | Computational Structural Biology Toolbox
  2 | ========================================
  3 | 
  4 | CSB is a python library and application framework, which can be used
  5 | to solve problems in the field of structural bioinformatics. If
  6 | you are a bioinformatician, software engineer or a researcher working
  7 | in this field, chances are you may find something useful here. Our
  8 | package consists of a few major components:
  9 | 
 10 | 1. Core class library - object-oriented, granular, with an emphasis
 11 |    on design and clean interfaces.
 12 | 
 13 | 2. Application framework - console applications ("protocols"),
 14 |    which consume objects from the core library in order to build
 15 |    something executable (and hopefully useful).
 16 | 
 17 | 3. Test framework - ensures that the library *actually* works.
 18 | 
 19 | 
 20 | Installation 
 21 | ------------
 22 | CSB is being developed on Linux. However, compatibility
 23 | is a design goal and the package works on any platform, on any modern Python
 24 | interpreter. If you find any issues on a platform/interpreter different from
 25 | our development environment, please let us know.
 26 | 
 27 | CSB and all of its dependencies can be installed with pip::
 28 | 
 29 |     $ pip install csb
 30 | 
 31 | See http://csb-toolbox.github.io/installation for more details.
 32 | 
 33 | 
 34 | Running CSB Applications
 35 | ------------------------
 36 | 
 37 | CSB is bundled with a number of executable console csb.apps. Each app
 38 | provides a standard command line interface. To run any app, try::
 39 | 
 40 |     $ csb-app --help
 41 |     
 42 | where *csb-app* is the name of the application, such as ``csb-hhfrag``.
 43 | For more details on our app framework, including guidelines for writing new
 44 | applications, please refer to the API documentation, package "csb.apps".
 45 | 
 46 | 
 47 | Documentation
 48 | -------------
 49 | 
 50 | The project's web site at `GitHub <http://github.com/csb-toolbox>`_ contains
 51 | online documentation and samples. Visit us at:
 52 |     
 53 | http://csb-toolbox.github.io
 54 | 
 55 | Detailed API documentation can be found in the "docs/api" directory in the
 56 | distribution package (docs/api/index.html). This documentaiton is also hosted
 57 | on our web site:
 58 | 
 59 | https://csb-toolbox.github.io/api-docs/
 60 | 
 61 | Many packages contain introductory module level documentation and samples/tutorials.
 62 | These are also available in the HTML docs, but a quick way to access them is by using
 63 | the built-in python help system. For example, for a general introduction
 64 | see the module documentation of the root package::
 65 | 
 66 |     $ python -c "import csb; help(csb)"
 67 | 
 68 | If you are interested in a specific package, such as cs.bio.sequence,
 69 | try::    
 70 |     
 71 |     $ python -c "import csb.bio.sequence; help(csb.bio.sequence)"
 72 | 
 73 | 
 74 | Contact
 75 | -------
 76 | 
 77 | CSB is developed by Michael Habeck's Computational Structural Biology
 78 | `research group <http://www.stochastik.math.uni-goettingen.de/index.php?id=172>`_.
 79 |     
 80 | For complete source code, contributions, support or bug reports please visit
 81 | us on GitHub:
 82 |   
 83 | http://github.com/csb-toolbox/
 84 |     
 85 | 
 86 | License
 87 | -------
 88 | 
 89 | CSB is open source and distributed under OSI-approved MIT license.
 90 | ::
 91 | 
 92 |     Copyright (c) 2012 Michael Habeck
 93 |     
 94 |     Permission is hereby granted, free of charge, to any person obtaining
 95 |     a copy of this software and associated documentation files (the
 96 |     "Software"), to deal in the Software without restriction, including
 97 |     without limitation the rights to use, copy, modify, merge, publish,
 98 |     distribute, sublicense, and/or sell copies of the Software, and to
 99 |     permit persons to whom the Software is furnished to do so, subject to
100 |     the following conditions:
101 |     
102 |     The above copyright notice and this permission notice shall be
103 |     included in all copies or substantial portions of the Software.
104 |     
105 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
106 |     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
107 |     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
108 |     IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
109 |     CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
110 |     TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
111 |     SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
112 |     
113 | ------------
114 | 
115 | .. image:: https://github.com/csb-toolbox/CSB/workflows/CI/badge.svg
116 |    :target: https://github.com/csb-toolbox/CSB/actions
117 | 
118 | 


--------------------------------------------------------------------------------
/csb/apps/bfit.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Python application for robust structure superposition of two structures.
  3 | bfit models non-rigid displacements in protein ensembles with outlier-tolerant
  4 | probability distributions.
  5 | """
  6 | import numpy
  7 | 
  8 | import csb.apps
  9 | import csb.bio.utils
 10 | 
 11 | from csb.bio.io.wwpdb import LegacyStructureParser
 12 | from csb.bio.sequence import SequenceAlignment
 13 | 
 14 | 
 15 | class ExitCodes(csb.apps.ExitCodes):
 16 |     IO_ERROR = 2
 17 |     INPUT_ERROR = 3
 18 | 
 19 | class AppRunner(csb.apps.AppRunner):
 20 | 
 21 |     @property
 22 |     def target(self):
 23 |         return BFitApp
 24 | 
 25 |     def command_line(self):
 26 |         
 27 |         cmd = csb.apps.ArgHandler(self.program, __doc__)
 28 | 
 29 |         # Input structures
 30 |         cmd.add_positional_argument('pdb1', str,
 31 |                                     'full path to the first structure')
 32 | 
 33 |         cmd.add_positional_argument('pdb2', str,
 34 |                                     'full path to the second structure')
 35 | 
 36 |         # Optional arguments 
 37 |         cmd.add_scalar_option('chain1', 'c', str,
 38 |                               'Chain of the first structure',
 39 |                               default='A')
 40 | 
 41 |         cmd.add_scalar_option('chain2', 'd', str,
 42 |                               'Chain of the second structure',
 43 |                               default='A')
 44 |         
 45 |         cmd.add_scalar_option('scalemixture', 's', str,
 46 |                               'Scale mixture distribution',
 47 |                               default='student',
 48 |                               choices=['student', 'k'])
 49 | 
 50 |         
 51 |         cmd.add_scalar_option('alignment', 'a', str,
 52 |                               'Alignment in fasta format defining equivalent positions\n'
 53 |                               + 'Assumes that chain1 is the first sequence of '
 54 |                               + 'the alignment and chain2 the second sequence')
 55 | 
 56 |         cmd.add_scalar_option('outfile', 'o', str,
 57 |                               'file to which the rotated second ' + 
 58 |                               'structure will be written',
 59 |                               default='bfit.pdb')
 60 | 
 61 |         cmd.add_scalar_option('niter', 'n', int,
 62 |                               'Number of optimization steps',
 63 |                               default=200)
 64 | 
 65 |         cmd.add_boolean_option('em', None,
 66 |                                'Use the EM algorithm for optimsation',
 67 |                                default = False)
 68 | 
 69 |         return cmd
 70 | 
 71 | 
 72 | 
 73 | class BFitApp(csb.apps.Application):
 74 |     """
 75 |     Python application for robust structure superposition of two protein structures
 76 |     """
 77 | 
 78 |     def main(self):
 79 |         try:
 80 |             parser = LegacyStructureParser(self.args.pdb1)
 81 |             r = parser.parse()
 82 | 
 83 |             parser = LegacyStructureParser(self.args.pdb2)
 84 |             m = parser.parse()
 85 |         except IOError as e:
 86 |             self.exit('PDB file parsing failed\n' + str(e.value), ExitCodes.IO_ERROR)
 87 | 
 88 |         X = numpy.array(r[self.args.chain1].get_coordinates(['CA'], True))
 89 |         Y = numpy.array(m[self.args.chain2].get_coordinates(['CA'], True))
 90 | 
 91 |         if self.args.alignment is not None:
 92 |             align = SequenceAlignment.parse(file(self.args.alignment).read())
 93 |             align = align[:2, :]
 94 |             
 95 |             matches = []
 96 |             for i in range(1, align.length + 1):
 97 |                 if not align.gap_at(i):
 98 |                     matches.append([align.columns[i][0].rank - 1,
 99 |                                     align.columns[i][1].rank - 1])
100 |             matches = numpy.array(matches)
101 |             X = X[matches[:, 0], :]
102 |             Y = Y[matches[:, 1], :]
103 | 
104 |         
105 |         if len(X) != len(Y):
106 |             self.exit('Structures are of different lengths,' + 
107 |                       ' please specify an alignment',
108 |                       ExitCodes.INPUT_ERROR)
109 | 
110 |         R, t = csb.bio.utils.bfit(X, Y, self.args.niter,
111 |                 self.args.scalemixture, self.args.em)
112 | 
113 |         m.transform(R, t)
114 |         m.to_pdb(self.args.outfile)
115 |         
116 | 
117 | def main():
118 |     AppRunner().run()
119 |     
120 |     
121 | if __name__ == '__main__':
122 |     main()
123 |     
124 | 


--------------------------------------------------------------------------------
/csb/apps/bfite.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Python application for robust structure superposition of an ensemble of structures.
  3 | bfite models non-rigid displacements in protein ensembles with outlier-tolerant
  4 | probability distributions.
  5 | """
  6 | import numpy
  7 | 
  8 | import csb.apps
  9 | import csb.bio.structure
 10 | 
 11 | from csb.bio.io.wwpdb import LegacyStructureParser
 12 | from csb.bio.utils import average_structure, fit, wfit
 13 | from csb.statistics.scalemixture import ScaleMixture, GammaPrior
 14 | 
 15 | 
 16 | class ExitCodes(csb.apps.ExitCodes):
 17 |     IO_ERROR = 2
 18 | 
 19 | class AppRunner(csb.apps.AppRunner):
 20 | 
 21 |     @property
 22 |     def target(self):
 23 |         return BFitApp
 24 | 
 25 |     def command_line(self):
 26 |         
 27 |         cmd = csb.apps.ArgHandler(self.program, __doc__)
 28 | 
 29 |         # Input structures
 30 |         cmd.add_positional_argument('pdb', str,
 31 |                                     'full path to the ensemble')
 32 | 
 33 |         # Optional arguments 
 34 |         cmd.add_scalar_option('chain', 'c', str,
 35 |                               'Chain',
 36 |                               default='A')
 37 |         
 38 |         cmd.add_scalar_option('scalemixture', 's', str,
 39 |                               'Scale mixture distribution',
 40 |                               default='student',
 41 |                               choices=['student', 'k'])
 42 |         
 43 |         cmd.add_scalar_option('alignment', 'a', str,
 44 |                               'Alignment in fasta format defining equivalent positions\n'
 45 |                               + 'Assumes that chain1 is the first sequence of '
 46 |                               + 'the alignment and chain2 the second sequence')
 47 | 
 48 |         cmd.add_scalar_option('outfile', 'o', str,
 49 |                               'file to which the rotated second ' + 
 50 |                               'structure will be written',
 51 |                               default='bfit.pdb')
 52 | 
 53 |         cmd.add_scalar_option('niter', 'n', int,
 54 |                               'Number of optimization steps',
 55 |                               default=200)
 56 |         
 57 |         return cmd
 58 | 
 59 | 
 60 | 
 61 | class BFitApp(csb.apps.Application):
 62 |     """
 63 |     Python application for robust structure superposition of two protein structures
 64 |     """
 65 | 
 66 |     def main(self):
 67 |         try:
 68 |             parser = LegacyStructureParser(self.args.pdb)
 69 |             models = parser.models()
 70 | 
 71 |         except IOError as e:
 72 |             self.exit('PDB file parsing failed\n' + str(e.value), ExitCodes.IO_ERROR)
 73 | 
 74 |         if len(models) < 2:
 75 |             self.exit('PDB file contains only one model', ExitCodes.USAGE_ERROR)
 76 | 
 77 |         ensemble = parser.parse_models(models)
 78 |         X = numpy.array([model[self.args.chain].get_coordinates(['CA'], True) for model in ensemble])
 79 |         x_mu = average_structure(X)
 80 |         #n = X.shape[1]
 81 |         m = X.shape[0]
 82 |         R = numpy.zeros((m, 3, 3))
 83 |         t = numpy.ones((m, 3))
 84 | 
 85 | 
 86 |         prior = GammaPrior()
 87 |         mixture = ScaleMixture(scales=X.shape[1],
 88 |                                prior=prior, d=3)
 89 | 
 90 |         for i in range(m):
 91 |             R[i, :, :], t[i, :] = fit(x_mu, X[i])
 92 |         
 93 |         # gibbs sampling cycle
 94 |         for j in range(self.args.niter):
 95 |             # apply rotation
 96 |             data = numpy.array([numpy.sum((x_mu - numpy.dot(X[i], numpy.transpose(R[i])) - t[i]) ** 2, -1) ** 0.5
 97 |                                 for i in range(m)]).T
 98 |             # sample scales
 99 |             mixture.estimate(data)
100 |             # sample rotations
101 |             for i in range(m):
102 |                 R[i, :, :], t[i, :] = wfit(x_mu, X[i], mixture.scales)
103 | 
104 | 
105 |         out_ensemble = csb.bio.structure.Ensemble()
106 | 
107 |         for i, model in enumerate(ensemble):
108 |             model.transform(R[i], t[i])
109 |             out_ensemble.models.append(model)
110 | 
111 |         out_ensemble.to_pdb(self.args.outfile)
112 | 
113 | 
114 | def main():
115 |     AppRunner().run()
116 |     
117 |     
118 | if __name__ == '__main__':
119 |     main()


--------------------------------------------------------------------------------
/csb/apps/embd.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Sharpening of EM maps by non-negative blind deconvolution.
  3 | For details see:
  4 | 
  5 | Hirsch M, Schoelkopf B and Habeck M (2010)
  6 | A New Algorithm for Improving the Resolution of Cryo-EM Density Maps.
  7 | """
  8 | 
  9 | import os
 10 | import numpy
 11 | import csb.apps
 12 | 
 13 | from numpy import sum, sqrt
 14 | 
 15 | from csb.numeric import convolve, correlate, trim
 16 | from csb.bio.io.mrc import DensityMapReader, DensityMapWriter, DensityInfo, DensityMapFormatError
 17 | 
 18 | 
 19 | class ExitCodes(csb.apps.ExitCodes):
 20 |     
 21 |     IO_ERROR = 2
 22 |     INVALID_DATA = 3
 23 |     ARGUMENT_ERROR = 4
 24 | 
 25 | 
 26 | class AppRunner(csb.apps.AppRunner):
 27 |     
 28 |     @property
 29 |     def target(self):
 30 |         return DeconvolutionApp
 31 |      
 32 |     def command_line(self):
 33 |         
 34 |         
 35 |         cmd = csb.apps.ArgHandler(self.program, __doc__)
 36 |         
 37 |         cmd.add_scalar_option('psf-size', 's', int, 'size of the point spread function', default=15)
 38 |         cmd.add_scalar_option('output', 'o', str, 'output directory of the sharpened maps', default='.')
 39 |         cmd.add_scalar_option('iterations', 'i', int, 'number of iterations', default=1000)
 40 |         cmd.add_scalar_option('output-frequency', 'f', int, 'create a map file each f iterations', default=50)
 41 |         cmd.add_boolean_option('verbose', 'v', 'verbose mode')
 42 |                         
 43 |         cmd.add_positional_argument('mapfile', str, 'Input Cryo EM file in CCP4 MRC format')
 44 |                         
 45 |         return cmd
 46 |     
 47 | 
 48 | class DeconvolutionApp(csb.apps.Application):
 49 |     
 50 |     def main(self):
 51 |         
 52 |         if not os.path.isfile(self.args.mapfile):
 53 |             DeconvolutionApp.exit('Input file not found.', code=ExitCodes.IO_ERROR)
 54 |                     
 55 |         if not os.path.isdir(self.args.output):
 56 |             DeconvolutionApp.exit('Output directory does not exist.', code=ExitCodes.IO_ERROR)
 57 |     
 58 |         if self.args.psf_size < 1:
 59 |             DeconvolutionApp.exit('PSF size must be a positive number.', code=ExitCodes.ARGUMENT_ERROR)
 60 |                          
 61 |         if self.args.iterations < 1:
 62 |             DeconvolutionApp.exit('Invalid number of iterations.', code=ExitCodes.ARGUMENT_ERROR)
 63 |             
 64 |         if self.args.output_frequency < 1:
 65 |             DeconvolutionApp.exit('Output frequency must be a positive number.', code=ExitCodes.ARGUMENT_ERROR)
 66 |             
 67 |         if self.args.iterations < self.args.output_frequency:
 68 |             DeconvolutionApp.exit('Output frequency is too low.', code=ExitCodes.ARGUMENT_ERROR)            
 69 |             
 70 |         self.args.output = os.path.abspath(self.args.output)
 71 |                                 
 72 |         self.run()
 73 |     
 74 |     def run(self):
 75 | 
 76 |         writer = DensityMapWriter()
 77 | 
 78 |         self.log('Reading input density map...')
 79 |         try:
 80 |             input = DensityMapReader(self.args.mapfile).read()
 81 |             embd = Deconvolution(input.data, self.args.psf_size)
 82 |             
 83 |         except DensityMapFormatError as e:
 84 |             msg = 'Error reading input MRC file: {0}'.format(e)  
 85 |             DeconvolutionApp.exit(msg, code=ExitCodes.INVALID_DATA)
 86 | 
 87 |         self.log('Running {0} iterations...'.format(self.args.iterations))
 88 |         self.log(' Iteration             Loss Correlation  Output')
 89 |                 
 90 |         for i in range(1, self.args.iterations + 1):
 91 |             embd.run_once()
 92 | 
 93 |             if i % self.args.output_frequency == 0:
 94 |                 output = OutputPathBuilder(self.args, i)
 95 |                 
 96 |                 density = DensityInfo(embd.data, None, None, header=input.header)
 97 |                 writer.write_file(output.fullpath, density)
 98 |                 
 99 |                 self.log('{0:>9}.  {1:15.2f}  {2:10.4f}  {3}'.format(
100 |                                     i, embd.loss, embd.correlation, output.filename))
101 | 
102 |         self.log('Done: {0}.'.format(output.fullpath))
103 |                             
104 |     def log(self, *a, **k):
105 |         
106 |         if self.args.verbose:
107 |             super(DeconvolutionApp, self).log(*a, **k)
108 |       
109 | 
110 | class OutputPathBuilder(object):
111 |     
112 |     def __init__(self, args, i):
113 |         
114 |         basename = os.path.basename(args.mapfile)
115 |         file, extension = os.path.splitext(basename)        
116 |         
117 |         self._newfile = '{0}.{1}{2}'.format(file, i, extension)
118 |         self._path = os.path.join(args.output, self._newfile)
119 |         
120 |     @property
121 |     def fullpath(self):
122 |         return self._path
123 |     
124 |     @property
125 |     def filename(self):
126 |         return os.path.basename(self._newfile)
127 |       
128 | class Util(object):
129 |     
130 |     @staticmethod                    
131 |     def corr(x, y, center=False):
132 |     
133 |         if center:
134 |             x = x - x.mean()
135 |             y = y - y.mean()
136 |     
137 |         return sum(x * y) / sqrt(sum(x * x)) / sqrt(sum(x * x))
138 | 
139 | class Deconvolution(object):
140 |     """
141 |     Blind deconvolution for n-dimensional images.
142 |     
143 |     @param data: EM density map data (data field of L{csb.bio.io.mrc.DensityInfo})
144 |     @type data: array
145 |     @param psf_size: point spread function size
146 |     @type psf_size: ints
147 |     @param beta_x: hyperparameters of sparseness constraints
148 |     @type beta_x: float
149 |     @param beta_f: hyperparameters of sparseness constraints
150 |     @type beta_f: float
151 |     """
152 |     
153 |     def __init__(self, data, psf_size, beta_x=1e-10, beta_f=1e-10, cache=True):
154 | 
155 |         self._f = []
156 |         self._x = []
157 |         self._y = numpy.array(data)
158 |         self._loss = []
159 |         self._corr = []
160 |         
161 |         self._ycache = None
162 |         self._cache = bool(cache)
163 | 
164 |         self._beta_x = float(beta_x)
165 |         self._beta_f = float(beta_f)
166 |                 
167 |         shape_psf = (psf_size, psf_size, psf_size)
168 |         self._initialize(shape_psf)
169 |         
170 |     @property
171 |     def beta_x(self):
172 |         return self._beta_x
173 | 
174 |     @property
175 |     def beta_f(self):
176 |         return self._beta_f
177 |     
178 |     @property
179 |     def loss(self):
180 |         """
181 |         Current loss value.
182 |         """        
183 |         if len(self._loss) > 0:
184 |             return float(self._loss[-1])
185 |         else:
186 |             return None
187 |         
188 |     @property
189 |     def correlation(self):
190 |         """
191 |         Current correlation value.
192 |         """
193 |         if len(self._corr) > 0:
194 |             return float(self._corr[-1])
195 |         else:
196 |             return None
197 |         
198 |     @property
199 |     def data(self):
200 |         return trim(self._x, self._f.shape)
201 |             
202 |     def _initialize(self, shape_psf):
203 |         """
204 |         Initialize with flat image and psf.
205 |         """
206 |         self._f = numpy.ones(shape_psf)
207 |         self._x = numpy.ones(numpy.array(self._y.shape) + numpy.array(shape_psf) - 1)
208 | 
209 |         self._normalize_psf()
210 |                 
211 |     def _normalize_psf(self):
212 |         self._f /= self._f.sum()
213 |         
214 |     def _calculate_image(self):
215 |         return convolve(self._f, self._x)
216 | 
217 |     def calculate_image(self, cache=False):
218 | 
219 |         if cache and self._ycache is not None:
220 |             return self._ycache
221 |         else:
222 |             y = self._calculate_image()
223 |             if self._cache:
224 |                 self._ycache = y
225 |             return y
226 | 
227 |     def _update_map(self):
228 | 
229 |         y = self.calculate_image()
230 | 
231 |         N = correlate(self._f, self._y) - self.beta_x
232 |         D = correlate(self._f, y)
233 | 
234 |         self._x *= numpy.clip(N, 1e-300, 1e300) / numpy.clip(D, 1e-300, 1e300)
235 | 
236 |     def _update_psf(self):
237 | 
238 |         y = self.calculate_image()
239 | 
240 |         N = correlate(self._x, self._y) - self.beta_f
241 |         D = correlate(self._x, y)
242 | 
243 |         self._f *= numpy.clip(N, 1e-300, 1e300) / numpy.clip(D, 1e-300, 1e300)
244 |         self._normalize_psf()
245 | 
246 |     def eval_loss(self, cache=False):
247 | 
248 |         y = self.calculate_image(cache=cache)
249 |         
250 |         return 0.5 * ((self._y - y) ** 2).sum() + \
251 |                 + self.beta_f * self._f.sum() + self.beta_x * self._x.sum()
252 | 
253 |     def eval_corr(self, cache=False):
254 |         
255 |         y = self.calculate_image(cache=cache)
256 |         return Util.corr(self._y, y)
257 | 
258 |     def run_once(self):
259 |         """
260 |         Run a single iteration.
261 |         """
262 | 
263 |         self._loss.append(self.eval_loss(cache=True))
264 |         self._corr.append(self.eval_corr(cache=True))
265 | 
266 |         self._update_map()                
267 |         self._update_psf()
268 |             
269 |     def run(self, iterations):
270 |         """
271 |         Run multiple iterations.
272 |         
273 |         @param iterations: number of iterations to run
274 |         @type iterations: int 
275 |         """
276 |         for i in range(iterations):
277 |             self.run_once()
278 | 
279 | 
280 | def main():
281 |     AppRunner().run()
282 |     
283 |     
284 | if __name__ == '__main__':
285 |     main()


--------------------------------------------------------------------------------
/csb/apps/helloworld.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is a CSB HelloWorld dummy application.
 3 | """
 4 | 
 5 | import sys
 6 | import csb.apps
 7 | 
 8 | 
 9 | class ExitCodes(csb.apps.ExitCodes):
10 |     
11 |     BAD_TEXT = 4
12 |     
13 |     
14 | class AppRunner(csb.apps.AppRunner):
15 |     
16 |     @property
17 |     def target(self):
18 |         return HelloWorldApp
19 |     
20 |     def command_line(self):
21 | 
22 |         text =  "Hello World"
23 |         
24 |         cmd = csb.apps.ArgHandler(self.program, 'This program prints "Hello World".')
25 |         
26 |         cmd.add_scalar_option('text', 't', str, 'The text to print', default=text)
27 |         cmd.add_boolean_option('upper', 'u', 'Print in upper case', default=False)
28 |         
29 |         return cmd
30 |     
31 |     def initapp(self, args):
32 |         
33 |         app = self.target
34 |         if len(args.text) < 3:
35 |             app.exit("Enter at least a few words", code=ExitCodes.BAD_TEXT, usage=True)
36 |         
37 |         return app(args)
38 |     
39 |     
40 | class HelloWorldApp(csb.apps.Application):
41 |     
42 |     def main(self):
43 |         
44 |         if self.args.upper:
45 |             text = self.args.text.upper()
46 |         else:
47 |             text = self.args.text
48 |         
49 |         self.log(text)
50 |         self.log('HW: done.')        
51 | 
52 | 
53 | def main():
54 |     AppRunner(sys.argv).run()
55 |     
56 |     
57 | if __name__ == '__main__':
58 |     main()
59 | 


--------------------------------------------------------------------------------
/csb/apps/hhsearch.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Python bindings for the HHsearch program. Capable of executing multiple
  3 | HHsearch jobs in parallel.
  4 | """
  5 | 
  6 | import multiprocessing as mp
  7 | 
  8 | import csb.apps
  9 | import csb.io
 10 | import csb.bio.io
 11 | 
 12 | 
 13 | class ExitCodes(csb.apps.ExitCodes):
 14 |     
 15 |     IO_ERROR = 2
 16 |     INVALID_DATA = 3
 17 |     EXT_TOOL_FAILURE = 4
 18 |     
 19 | 
 20 | class AppRunner(csb.apps.AppRunner):
 21 |     
 22 |     @property
 23 |     def target(self):
 24 |         return HHsearchApp
 25 |     
 26 |     def command_line(self):
 27 |         
 28 |         cpu = mp.cpu_count()
 29 |         cmd = csb.apps.ArgHandler(self.program, __doc__)
 30 |           
 31 |         cmd.add_scalar_option('binary', 'b', str, 'full path to the HHsearch binary ', default='hhsearch')
 32 |         cmd.add_scalar_option('cpu', 'c', int, 'maximum degree of parallelism', default=cpu)            
 33 |         cmd.add_scalar_option('database', 'd', str, 'the subject (database) HMM file', required=True)     
 34 |         cmd.add_array_argument('query', str, 'query HMM file(s)')      
 35 |         
 36 |         return cmd
 37 |     
 38 | 
 39 | class HHsearchApp(csb.apps.Application):
 40 |     
 41 |     def main(self):
 42 |         
 43 |         queries = list(self.args.query)
 44 |         exe = HHsearch(self.args.binary, self.args.database)
 45 |         
 46 |         try:
 47 |             if len(queries) == 1:
 48 |                 exe.cpu = self.args.cpu
 49 |                 context = HHTask(queries[0])
 50 |                 results = [ exe.run(context) ]
 51 |             else:
 52 |                 context = [ HHTask(q) for q in queries ]
 53 |                 results = exe.runmany(context, workers=self.args.cpu)
 54 | 
 55 |         except IOError as io:
 56 |             HHsearchApp.exit(str(io), ExitCodes.IO_ERROR)
 57 | 
 58 |         except csb.io.InvalidCommandError as ose:
 59 |             msg = '{0!s}: {0.program}'.format(ose)
 60 |             HHsearchApp.exit(msg, ExitCodes.IO_ERROR)   
 61 |                               
 62 |         except csb.io.ProcessError as pe:
 63 |             message = 'Bad exit code from HHsearch: #{0.code}.\nSTDERR: {0.stderr}\nSTDOUT: {0.stdout}'.format(pe.context)
 64 |             HHsearchApp.exit(message, ExitCodes.EXT_TOOL_FAILURE)
 65 | 
 66 |         self.log('\nRank Hit   Prob  St  End Qst Qend')
 67 |         self.log('-------------------------------------')
 68 |                             
 69 |         for c in results:
 70 |             self.log('\n\n# QUERY:{0}\n'.format(c.queryfile))
 71 |             if c.result:
 72 |                 for hit in c.result:
 73 |                     self.log('{0.rank:3}. {0.id:5} {0.probability:5.3f} {0.start:3} {0.end:3} {0.qstart:3} {0.qend:3}'.format(hit))
 74 | 
 75 | 
 76 | class Context(object):
 77 |     
 78 |     def __init__(self, query):
 79 |         
 80 |         self.__query = query
 81 |         self.__result = None
 82 |     
 83 |     @property
 84 |     def query(self):
 85 |         return self.__query
 86 |     
 87 |     @property
 88 |     def result(self):
 89 |         return self.__result
 90 |     @result.setter
 91 |     def result(self, result):
 92 |         self.__result = result
 93 |     
 94 | 
 95 | class HHTask(Context):
 96 |     
 97 |     def __init__(self, queryfile):
 98 |         
 99 |         self.queryfile = queryfile
100 |         query = open(queryfile).read()
101 |         
102 |         super(HHTask, self).__init__(query)
103 |     
104 |         
105 | def _task(args):
106 |     
107 |     try:
108 |         binary, db, cpu, context = args
109 |         return HHsearch(binary, db, cpu=cpu).run(context)
110 |     except (KeyboardInterrupt, SystemExit):
111 |         return
112 | 
113 | class SecStructureScoring(object):
114 |     
115 |     OFF = 0
116 |     AFTER = 1
117 |     DURING = 2
118 |     AFTER_PREDICTED = 3
119 |     DURING_PREDICTED = 4
120 |     
121 | class HHsearch(object):
122 |     
123 |     class Options(object):
124 |         
125 |         CPU = 'cpu'
126 |         SS = 'ssm'
127 |         MACT = 'mact'
128 |         MAX_HITS = 'Z'
129 |         MAX_ALI = 'B'
130 |         MAX_E = 'E'
131 |         MIN_P = 'p'
132 |     
133 |     def __init__(self, binary, db, cpu=None):
134 |         
135 |         self._program = binary
136 |         self._db = db
137 |         self._opt = {}
138 |         self._parser = csb.bio.io.HHOutputParser()
139 |         
140 |         self.cpu = cpu
141 |         self.ss = None
142 |         self.mac_threshold = None
143 |         self.max_hits = None
144 |         self.max_alignments = None
145 |         self.max_evalue = None
146 |         self.min_probability = None
147 | 
148 |     @property
149 |     def program(self):
150 |         return self._program
151 |     @program.setter
152 |     def program(self, value):
153 |         self._program = value
154 |       
155 |     @property
156 |     def db(self):
157 |         return self._db
158 |     @db.setter
159 |     def db(self, value):
160 |         self._db = value
161 |     
162 |     @property
163 |     def parser(self):
164 |         return self._parser
165 |     @parser.setter
166 |     def parser(self, value):
167 |         self._parser = value
168 |     
169 |     @property
170 |     def cpu(self):
171 |         return self._get(HHsearch.Options.CPU)
172 |     @cpu.setter
173 |     def cpu(self, value):
174 |         self._opt[HHsearch.Options.CPU] = value
175 |                         
176 |     @property
177 |     def ss(self):
178 |         return self._get(HHsearch.Options.SS)
179 |     @ss.setter
180 |     def ss(self, value):
181 |         self._opt[HHsearch.Options.SS] = value
182 |     
183 |     @property
184 |     def mac_threshold(self):
185 |         return self._get(HHsearch.Options.MACT)
186 |     @mac_threshold.setter
187 |     def mac_threshold(self, value):
188 |         self._opt[HHsearch.Options.MACT] = value
189 |     
190 |     @property
191 |     def max_hits(self):
192 |         return self._get(HHsearch.Options.MAX_HITS)
193 |     @max_hits.setter
194 |     def max_hits(self, value):
195 |         self._opt[HHsearch.Options.MAX_HITS] = value
196 |     
197 |     @property
198 |     def max_alignments(self):
199 |         return self._get(HHsearch.Options.MAX_ALI)
200 |     @max_alignments.setter
201 |     def max_alignments(self, value):
202 |         self._opt[HHsearch.Options.MAX_ALI] = value
203 |     
204 |     @property
205 |     def max_evalue(self):
206 |         return self._get(HHsearch.Options.MAX_E)
207 |     @max_evalue.setter
208 |     def max_evalue(self, value):
209 |         self._opt[HHsearch.Options.MAX_E] = value
210 |     
211 |     @property
212 |     def min_probability(self):
213 |         return self._get(HHsearch.Options.MIN_P)
214 |     @min_probability.setter
215 |     def min_probability(self, value):
216 |         self._opt[HHsearch.Options.MIN_P] = value
217 |     
218 |     def _get(self, option):
219 |         
220 |         if option in self._opt:
221 |             return self._opt[option]
222 |         else:
223 |             return None
224 |         
225 |     def _options(self):
226 |         
227 |         options = []
228 |         
229 |         for option in self._opt:
230 |             value = self._opt[option]
231 |             
232 |             if value is not None and value != '':
233 |                 if isinstance(value, bool):
234 |                     options.append('-{0}'.format(option))
235 |                 else:    
236 |                     options.append('-{0} {1}'.format(option, value))         
237 |         
238 |         return ' '.join(options)
239 |                         
240 |     def run(self, context):
241 |         
242 |         with csb.io.TempFile() as q:
243 |             
244 |             q.write(context.query)
245 |             q.flush()
246 |             
247 |             with csb.io.TempFile() as o:
248 |                 
249 |                 cmd = '{0.program} -i {1} -d {0.db} -o {2} {3}'.format(self, q.name, o.name, self._options())                    
250 |                 csb.io.Shell.runstrict(cmd)
251 |                 
252 |                 context.result = self.parser.parse_file(o.name)
253 |                 return context
254 |             
255 |     def runmany(self, contexts, workers=mp.cpu_count(), cpu=1):
256 |         
257 |         if workers > len(contexts):
258 |             workers = len(contexts)
259 | 
260 |         results = []
261 |         taskargs = [(self.program, self.db, cpu, c) for c in contexts]
262 | 
263 |         pool = mp.Pool(workers)
264 |         
265 |         try:
266 |             for c in pool.map(_task, taskargs):
267 |                 results.append(c)
268 |         except KeyboardInterrupt:
269 |             pass
270 |         finally:
271 |             pool.terminate()
272 |         
273 |         return results
274 | 
275 | 
276 | def main():
277 |     AppRunner().run()
278 |     
279 |     
280 | if __name__ == '__main__':
281 |     main()


--------------------------------------------------------------------------------
/csb/apps/promix.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ProMix: Take a protein structure ensemble and find a mixture of rigid
  3 | segments or a mixture of conformers. Writes K copies of the ensemble
  4 | (for segments) or K subsets of the ensemble (for conformers) as PDB
  5 | files, each superposed on different components.
  6 | 
  7 | Reference: Hirsch M, Habeck M. - Bioinformatics. 2008 Oct 1;24(19):2184-92
  8 | """
  9 | 
 10 | import numpy
 11 | 
 12 | import csb.apps
 13 | import csb.bio.structure
 14 | 
 15 | from csb.bio.io.wwpdb import LegacyStructureParser
 16 | from csb.statistics import mixtures
 17 | 
 18 | 
 19 | class ExitCodes(csb.apps.ExitCodes):
 20 |     IO_ERROR = 2
 21 | 
 22 | class AppRunner(csb.apps.AppRunner):
 23 | 
 24 |     @property
 25 |     def target(self):
 26 |         return ProMixApp
 27 | 
 28 |     def command_line(self):
 29 |         cmd = csb.apps.ArgHandler(self.program, __doc__)
 30 | 
 31 |         cmd.add_scalar_option('components', 'K', int, 'Number of components', -1)
 32 |         cmd.add_scalar_option('type', 't', str, 'Type of mixture', 'segments', ('segments', 'conformers'))
 33 |         cmd.add_positional_argument('infile', str, 'input PDB file')
 34 | 
 35 |         return cmd
 36 | 
 37 |     def initapp(self, args):
 38 |         app = self.target
 39 |         return app(args)
 40 | 
 41 | class ProMixApp(csb.apps.Application):
 42 | 
 43 |     def main(self):
 44 |         try:
 45 |             parser = LegacyStructureParser(self.args.infile)
 46 |             models = parser.models()
 47 |         except:
 48 |             self.exit('PDB file parsing failed', ExitCodes.IO_ERROR)
 49 | 
 50 |         if len(models) < 2:
 51 |             self.exit('PDB file contains only one model', ExitCodes.USAGE_ERROR)
 52 | 
 53 |         ensemble = parser.parse_models(models)
 54 |         X = numpy.array([model.get_coordinates(['CA'], True) for model in ensemble])
 55 | 
 56 |         if self.args.type == 'segments':
 57 |             self.main_segments(ensemble, X)
 58 |         elif self.args.type == 'conformers':
 59 |             self.main_conformers(ensemble, X)
 60 |         else:
 61 |             raise ValueError('type must be "segments" or "conformers"')
 62 | 
 63 |     def main_segments(self, ensemble, X):
 64 | 
 65 |         mixture = mixtures.SegmentMixture.new(X, self.args.components)
 66 |         self.log('Number of segments: {0}'.format(mixture.K))
 67 | 
 68 |         for k,(sigma,w) in enumerate(zip(mixture.sigma, mixture.w)):
 69 |             outfile = 'promix_segment_{0}.pdb'.format(k+1)
 70 |             self.log('  {0}: sigma = {1:6.3f}, w = {2:.3f}, file = {3}'.format(k+1, sigma, w, outfile))
 71 | 
 72 |             for model, R, t in zip(ensemble, mixture.R, mixture.t):
 73 |                 if k > 0:
 74 |                     model.transform(R[k-1], t[k-1])
 75 |                 R = R[k].T
 76 |                 t = -numpy.dot(R, t[k])
 77 |                 model.transform(R, t)
 78 | 
 79 |             ensemble.to_pdb(outfile)
 80 | 
 81 |     def main_conformers(self, ensemble, X):
 82 | 
 83 |         mixture = mixtures.ConformerMixture.new(X, self.args.components)
 84 |         self.log('Number of conformers: {0}'.format(mixture.K))
 85 | 
 86 |         membership = mixture.membership
 87 | 
 88 |         for k,(sigma,w) in enumerate(zip(mixture.sigma, mixture.w)):
 89 |             outfile = 'promix_conformer_{0}.pdb'.format(k+1)
 90 |             self.log('  {0}: sigma = {1:6.3f}, w = {2:.3f}, file = {3}'.format(k+1, sigma, w, outfile))
 91 | 
 92 |             ek = csb.bio.structure.Ensemble()
 93 | 
 94 |             for model, R, t, mk in zip(ensemble, mixture.R, mixture.t, membership):
 95 |                 if mk != k:
 96 |                     continue
 97 |                 R = R[k].T
 98 |                 t = -numpy.dot(R, t[k])
 99 |                 model.transform(R, t)
100 |                 ek.models.append(model)
101 | 
102 |             ek.to_pdb(outfile)
103 | 
104 | 
105 | def main():
106 |     AppRunner().run()
107 |     
108 |     
109 | if __name__ == '__main__':
110 |     main()


--------------------------------------------------------------------------------
/csb/bio/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Core bioinformatics abstractions and I/O.
3 | """


--------------------------------------------------------------------------------
/csb/bio/io/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | I/O for various biological file formats.
 3 | """
 4 | 
 5 | from csb.bio.io.hhpred import HHOutputParser, HHProfileParser
 6 | from csb.bio.io.hhpred import HHpredOutputParser, HHpredProfileParser
 7 | from csb.bio.io.clans import ClansParser, ClansFileWriter
 8 | from csb.bio.io.wwpdb import StructureParser, AsyncStructureParser, PDBHeaderParser
 9 | from csb.bio.io.fasta import SequenceParser, PDBSequenceParser
10 | from csb.bio.io.dssp import DSSPParser, StrideParser
11 | 
12 | __all__ = ['HHOutputParser', 'HHProfileParser', 'ClansParser', 
13 |            'HHpredOutputParser', 'HHpredProfileParser', 'ISitesParser',
14 |            'StructureParser', 'AsyncStructureParser', 'PDBHeaderParser',
15 |            'SequenceParser', 'PDBSequenceParser', 'DSSPParser', 'StrideParser']
16 | 


--------------------------------------------------------------------------------
/csb/bio/io/cs.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Simple NMR STAR chemical shift readers.
  3 | """
  4 | 
  5 | from csb.bio.nmr import ChemShiftInfo
  6 | 
  7 | 
  8 | class ChemShiftFormatError(ValueError):
  9 |     pass
 10 | 
 11 | 
 12 | class ChemShiftReader(object):
 13 |     """
 14 |     Simple NMR STAR v2 chemical shift reader. 
 15 |     
 16 |     @note: This is not a full-fledged, semantic NMR STAR parser. It handles
 17 |            only the chemical shift table. 
 18 |     """
 19 |     
 20 |     FRAME = 'save_assigned_chemical_shifts'
 21 |     
 22 |     RANK = '_Residue_seq_code'
 23 |     RESIDUE = '_Residue_label'
 24 |     ATOM = '_Atom_name'
 25 |     ELEMENT = '_Atom_type'
 26 |     SHIFT = '_Chem_shift_value'
 27 |     
 28 |     @staticmethod
 29 |     def create(frame=FRAME, version=2):
 30 |         """
 31 |         Parser factory: create a new parser, given a saveframe name 
 32 |         and format verison.
 33 |         
 34 |         @param frame: name of the saveframe to read
 35 |         @type frame: str
 36 |         @param version: NMR STAR format version
 37 |         @type version: int
 38 |         
 39 |         @return: an instance of any L{ChemShiftReader} class
 40 |         @rtype: L{ChemShiftReader}
 41 |         """
 42 |         
 43 |         if version == 3:
 44 |             return ChemShift3Reader(frame=frame)
 45 |         elif version == 2:
 46 |             return ChemShiftReader(frame=frame)
 47 |         else:
 48 |             raise ValueError('Unknown NMR-STAR version')
 49 |         
 50 |     @staticmethod
 51 |     def guess(file, frame=FRAME):
 52 |         """
 53 |         Parser factory: try to guess the correct NMR STAR version from a given
 54 |         file and create an appropriate parser.
 55 |         
 56 |         @param file: NMR STAR path and file name
 57 |         @type file: str 
 58 |         @param frame: name of the saveframe to read
 59 |         @type frame: str
 60 |         
 61 |         @return: an instance of any L{ChemShiftReader} class
 62 |         @rtype: L{ChemShiftReader}
 63 |         
 64 |         @raise ChemShiftFormatError: on failure to determine the NMR STAR version
 65 |         """
 66 |                 
 67 |         with open(file) as cs:
 68 |             content = cs.read()
 69 |             
 70 |             if not content.strip():
 71 |                 return ChemShiftReader.create()
 72 |             elif ChemShift3Reader.SHIFT3 in content:
 73 |                 return ChemShiftReader.create(frame, version=3)
 74 |             elif ChemShiftReader.SHIFT in content:
 75 |                 return ChemShiftReader.create(frame, version=2)
 76 |             else:
 77 |                 raise ChemShiftFormatError("Can't guess NMR-STAR version")
 78 |             
 79 |     def __init__(self, frame=FRAME):
 80 |         self._frame = frame
 81 |     
 82 |     def read_file(self, filename):
 83 |         """
 84 |         Parse the specified file.
 85 |         
 86 |         @param filename: file path and name
 87 |         @type filename: str
 88 |         
 89 |         @rtype: tuple of L{ChemShiftInfo}     
 90 |         """
 91 |         with open(filename) as input:
 92 |             return self.read_shifts(input.read())
 93 |     
 94 |     def read_shifts(self, star_table):
 95 |         """
 96 |         Parse a given NMR STAR chemical shift table.
 97 |         
 98 |         @param star_table: NMR STAR chemical shift table
 99 |         @type star_table: str
100 |         
101 |         @rtype: tuple of L{ChemShiftInfo} 
102 |         @raise ChemShiftFormatError: on parse error    
103 |         """
104 |                 
105 |         shifts = []
106 |         
107 |         init = False
108 |         in_shifts = False
109 |         fields = []
110 |         lines = iter(star_table.splitlines())
111 |         
112 |         if self._frame in star_table:
113 |             self._scroll(lines, self._frame)
114 | 
115 |         
116 |         for l in lines:
117 |             ls = l.strip()
118 |             
119 |             if not in_shifts:
120 | 
121 |                 if ls == 'loop_':
122 |                     assert in_shifts is False and not fields and init is False
123 |                     init = True
124 |                     continue
125 | 
126 |                 elif init and ls.startswith('_'):
127 |                     assert in_shifts is False
128 |                     fields.append(l.strip())
129 |                     continue
130 |                 
131 |                 elif init and not ls:
132 |                     if len(fields) < 1:
133 |                         raise ChemShiftFormatError("No fields found in the CS table")             
134 |                     in_shifts = True
135 |                     continue
136 |                     
137 |             else:
138 |                 
139 |                 if ls == 'stop_':
140 |                     break
141 |                 
142 |                 elif ls.startswith('#'):
143 |                     continue
144 |                 
145 |                 elif ls:
146 |                     values = l.split()
147 |                     if len(values) < len(fields):
148 |                         raise ChemShiftFormatError("Insufficient number of values: {0}".format(l))
149 |                     data = dict(zip(fields, values))
150 |                                         
151 |                     shifts.append(self._create_shift(data))
152 |                     
153 |         return tuple(shifts)
154 |     
155 |     def _scroll(self, iterator, field):
156 |         
157 |         for line in iterator:
158 |             if line.lstrip().startswith(field):
159 |                 break
160 |             
161 |     def _create_shift(self, data):
162 |         
163 |         try:
164 |             position = int(data[ChemShiftReader.RANK])
165 |             residue = data[ChemShiftReader.RESIDUE]
166 |             name = data[ChemShiftReader.ATOM]
167 |             element = data[ChemShiftReader.ELEMENT]
168 |             shift = float(data[ChemShiftReader.SHIFT])
169 |             
170 |         except KeyError as ke:
171 |             raise ChemShiftFormatError("Required field {0} not found".format(str(ke)))
172 |         except ValueError as ve:
173 |             raise ChemShiftFormatError("Can't parse value: {0}".format(str(ve)))
174 |         
175 |         return ChemShiftInfo(position, residue, name, element, shift)
176 | 
177 | 
178 | class ChemShift3Reader(ChemShiftReader):
179 |     """
180 |     Simple NMR STAR v3 chemical shift reader. 
181 |     
182 |     @note: This is not a full-fledged, semantic NMR STAR parser. It handles
183 |            only the chemical shift table. 
184 |     """    
185 |     
186 |     RANK3 = '_Atom_chem_shift.Seq_ID'
187 |     RESIDUE3 = '_Atom_chem_shift.Comp_ID'
188 |     ATOM3 = '_Atom_chem_shift.Atom_ID'
189 |     ELEMENT3 = '_Atom_chem_shift.Atom_type'
190 |     SHIFT3 = '_Atom_chem_shift.Val'
191 |     
192 |     def _create_shift(self, data):
193 | 
194 |         try:        
195 |             position = data[ChemShift3Reader.RANK3]
196 |             residue = data[ChemShift3Reader.RESIDUE3]
197 |             name = data[ChemShift3Reader.ATOM3]
198 |             element = data[ChemShift3Reader.ELEMENT3]
199 |             shift = data[ChemShift3Reader.SHIFT3]
200 |             
201 |         except KeyError as ke:
202 |             raise ChemShiftFormatError("Required field {0} not found".format(str(ke)))
203 |         except ValueError as ve:
204 |             raise ChemShiftFormatError("Can't parse value: {0}".format(str(ve)))
205 |                 
206 |         return ChemShiftInfo(position, residue, name, element, shift)
207 | 


--------------------------------------------------------------------------------
/csb/bio/io/dssp.py:
--------------------------------------------------------------------------------
  1 | """
  2 | DSSP Parser
  3 | """
  4 | 
  5 | import csb.core
  6 | import csb.io
  7 | 
  8 | from csb.bio.structure import SecStructures, UnknownSecStructureError
  9 | 
 10 | 
 11 | class DSSPParseError(ValueError):
 12 |     pass
 13 | 
 14 | 
 15 | class ResidueAssignmentInfo(object):
 16 |     
 17 |     def __init__(self, residue_id, accession, chain, secondary_structure, phi, psi):
 18 |         
 19 |         self.residue_id = residue_id
 20 |         self.accession = accession
 21 |         self.chain = chain
 22 |         self.secondary_structure = secondary_structure
 23 |         self.phi = phi
 24 |         self.psi = psi
 25 |         
 26 | 
 27 | class DSSPParser(object):
 28 |     """
 29 |     Simple DSSP Secondary Structure Parser.
 30 |     """
 31 |     
 32 |     def parse(self, dssp_file):
 33 |         """
 34 |         @param dssp_file: source DSSP file to parse
 35 |         @type dssp_file: str
 36 |         @return: a dictionary of L{ResidueAssignmentInfo} objects
 37 |         @rtype: dict
 38 |         """
 39 |         
 40 |         data = {}
 41 |         start = False
 42 |         offset = 0                  # assume old DSSP format
 43 |         accession = None
 44 |         
 45 |         for line in open(dssp_file):
 46 |             
 47 |             if not start:
 48 |                 
 49 |                 if line.startswith('HEADER'):
 50 |                     accession = line[62:66].strip().lower()
 51 |                      
 52 |                 elif line.startswith('  #  RESIDUE'):
 53 |                     if len(line) >= 140:
 54 |                         offset = 4  # the new DSSP format           
 55 |                     start = True
 56 |             else:
 57 |                 if line[13] == '!':
 58 |                     continue                
 59 |                 
 60 |                 residue_id = line[6:11].strip()
 61 |                 chain = line[11]         
 62 |                 try:
 63 |                     ss = line[16].strip()
 64 |                     if ss == '':
 65 |                         ss = SecStructures.Gap
 66 |                     else:
 67 |                         ss = csb.core.Enum.parse(SecStructures, ss)  
 68 |                 except csb.core.EnumValueError as e:
 69 |                     raise UnknownSecStructureError(str(e)) 
 70 |                 phi = float(line[104 + offset : 109 + offset])
 71 |                 psi = float(line[110 + offset : 115 + offset])
 72 |                 
 73 |                 
 74 |                 if chain not in data:
 75 |                     data[chain] = {}
 76 |                 
 77 |                 data[chain][residue_id] = ResidueAssignmentInfo(residue_id, accession, chain, ss, phi, psi)
 78 |                 
 79 |         return data
 80 | 
 81 | class StrideParser(object):
 82 |     """
 83 |     Simple STRIDE Secondary Structure Parser.
 84 |     """
 85 |     
 86 |     def parse(self, stride_file):
 87 |         """
 88 |         @param stride_file: source STRIDE file to parse
 89 |         @type stride_file: str
 90 |         @return: a dictionary of L{ResidueAssignmentInfo} objects
 91 |         @rtype: dict
 92 |         """
 93 |         
 94 |         data = {}
 95 |         
 96 |         for line in open(stride_file):
 97 |             if line.startswith('ASG '):
 98 |                 
 99 |                 fields = line.split()
100 |                 
101 |                 residue_id = fields[3]
102 |                 chain = fields[2]
103 |                 accession = fields[-1].lower()
104 |                 try:
105 |                     ss = csb.core.Enum.parse(SecStructures, fields[5])  
106 |                 except csb.core.EnumValueError as e:
107 |                     raise UnknownSecStructureError(str(e)) 
108 |                 phi = float(fields[7])
109 |                 psi = float(fields[8])
110 |                 
111 |                 if chain not in data:
112 |                     data[chain] = {}
113 |                 
114 |                 data[chain][residue_id] = ResidueAssignmentInfo(residue_id, accession, chain, ss, phi, psi)
115 |                 
116 |         return data
117 | 
118 |     
119 | def get(accession, prefix='http://www.pdb.org/pdb/files/'):
120 |     """
121 |     Download and parse a DSSP entry.
122 | 
123 |     @param accession: accession number of the entry
124 |     @type accession: str
125 |     @param prefix: download URL prefix
126 |     @type prefix: str
127 | 
128 |     @return: see L{DSSPParser.parse}
129 |     @rtype: dict
130 |     """
131 |     dssp = csb.io.TempFile()
132 | 
133 |     browser = csb.io.urllib.urlopen(prefix + accession.lower() + '.dssp')
134 |     dssp.write(browser.read().decode('utf-8'))
135 |     dssp.flush()
136 | 
137 |     return DSSPParser().parse(dssp.name)
138 | 
139 | 


--------------------------------------------------------------------------------
/csb/bio/io/noe.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Simple XEASY and Sparky peak list parsers.
  3 | """
  4 | 
  5 | from abc import ABCMeta, abstractmethod
  6 | from csb.bio.nmr import NOESpectrum
  7 | 
  8 | 
  9 | class PeakListFormatError(ValueError):
 10 |     pass
 11 | 
 12 | class BasePeakListReader(object):
 13 |     
 14 |     __metaclass__ = ABCMeta
 15 |     
 16 |     @abstractmethod
 17 |     def read(self, table):
 18 |         """
 19 |         Parse a peak list table.
 20 |         
 21 |         @param table: input peak list table
 22 |         @type table: str
 23 |         @rtype: L{NOESpectrum}
 24 |         """
 25 |         pass
 26 |     
 27 |     def read_file(self, filename):
 28 |         """
 29 |         Parse a peak list file.
 30 |         
 31 |         @param filename: input file name
 32 |         @type filename: str
 33 |         @rtype: L{NOESpectrum}
 34 |         """
 35 |         with open(filename) as input:
 36 |             return self.read(input.read())
 37 |         
 38 |     def read_all(self, filenames):
 39 |         """
 40 |         Parse a list of peak list files and merge the resulting spectra.
 41 |         All spectra must have identical dimensions.
 42 |         
 43 |         @param filenames: input file names
 44 |         @type filenames: iterable of str
 45 |         
 46 |         @return: joint spectrum
 47 |         @rtype: L{NOESpectrum}
 48 |         """
 49 |         spectra = [self.read_file(f) for f in filenames]
 50 |         return NOESpectrum.join(*spectra)
 51 |         
 52 | class SparkyPeakListReader(BasePeakListReader):
 53 |     """
 54 |     Sparky NOE peak list parser.
 55 |     
 56 |     @param elements: list of element names for each dimension
 57 |     @type elements: list of (str or L{EnumItem})
 58 |     @param connected: list of covalently connected dimension indices in the
 59 |                       format: [(i1,i2),...]
 60 |     @type connected: list of (int,int) tuples
 61 |     """
 62 |     
 63 |     def __init__(self, elements, connected):
 64 |         
 65 |         self._elements = list(elements)
 66 |         self._connected = [(d1, d2) for d1, d2 in connected]
 67 |         
 68 |         if len(self._elements) < 1:
 69 |             raise ValueError("Can't parse a 0-dimensional peak list")
 70 |     
 71 |     def read(self, table):
 72 |         """
 73 |         Parse a Sparky peak list table.
 74 |         
 75 |         @param table: input peak list
 76 |         @type table: str
 77 |         @rtype: L{NOESpectrum}
 78 |         """
 79 |         offset = 0                
 80 |         spectrum = NOESpectrum(self._elements)
 81 |         
 82 |         for d1, d2 in self._connected:
 83 |             spectrum.connect(d1, d2)
 84 |         
 85 |         for l in table.splitlines():
 86 |             if not l.strip() or ('w1' in l and 'w2' in l):
 87 |                 if l.lstrip().lower().startswith('assignment'):
 88 |                     offset = 1
 89 |                 continue
 90 |             
 91 |             line = l.split()[offset:]
 92 |             try:
 93 |                 float(line[-1])             # last item may or may not be a comment
 94 |             except ValueError:
 95 |                 if len(line) > 0:
 96 |                     line.pop()
 97 |             
 98 |             items = list(map(float, line))
 99 |             intensity = items[-1]
100 |             dimensions = items[:-1]
101 |             
102 |             if len(dimensions) != len(self._elements):
103 |                 raise PeakListFormatError("Expected {0} dimensional spectrum, got {1}".format(
104 |                                                                     len(self._elements), len(dimensions)))
105 |             
106 |             spectrum.add(intensity, dimensions)
107 |         
108 |         return spectrum       
109 |                     
110 | class XeasyPeakListReader(BasePeakListReader):
111 |     """
112 |     XEASY NOE peak list parser.
113 |     """
114 |         
115 |     def __init__(self):
116 |         pass
117 |             
118 |     def read(self, table):
119 |         """
120 |         Parse an XEASY peak list table.
121 |         
122 |         @param table: input peak list
123 |         @type table: str
124 |         @rtype: L{NOESpectrum}
125 |         """      
126 |         lines = table.splitlines()  
127 |         spectrum = self._read_header(lines)
128 |         
129 |         for l in lines:
130 |             if not l.strip() or l.startswith('#'):
131 |                 continue
132 |             
133 |             parts = l.split()[1:]
134 |             peak = parts[:spectrum.num_dimensions]
135 |             height = parts[spectrum.num_dimensions + 2] 
136 |             
137 |             intensity = float(height)
138 |             dimensions = map(float, peak)
139 |             
140 |             spectrum.add(intensity, dimensions)
141 |         
142 |         return spectrum           
143 |     
144 |     
145 |     def _read_header(self, lines):
146 |         
147 |         num = 0
148 |         dim = {}
149 |         el = {}
150 |         el2 = {}
151 |         connectivity = None
152 |         
153 |         for l in lines:
154 |             if l.startswith('#'):
155 |                 if l[1:].lstrip().lower().startswith('number of dimensions'):
156 |                     num = int(l.split()[-1])
157 |                 
158 |                 if l.startswith('#INAME'):
159 |                     parts = l.split()[1:]
160 |                     if len(parts) != 2:
161 |                         raise PeakListFormatError("Invalid Xeasy header")                    
162 |                     
163 |                     index = int(parts[0]) - 1
164 |                     if index < 0:
165 |                         raise PeakListFormatError("Invalid Xeasy header: dimension index < 1")
166 |                     
167 |                     element = ''.join(i for i in parts[1] if i.isalpha())
168 |                     el[parts[1]] = index
169 |                     el2[element] = index
170 |                      
171 |                     dim[index] = element
172 |                     
173 |                 if l.startswith('#CYANAFORMAT'):
174 |                     connectivity = l.split()[1]
175 |       
176 |         if len(dim) != num or num == 0:
177 |             raise PeakListFormatError("Invalid Xeasy header")
178 |         
179 |         elements = tuple(dim[i] for i in sorted(dim))
180 |         spectrum = NOESpectrum(elements)
181 |         
182 |         if connectivity:
183 |             # cyanaformat - explicitly defines connected dimensions:
184 |             # upper case dimensions are connected, e.g. "#CYANAFORMAT hCH" => 2-3 
185 |             if connectivity.upper() != ''.join(elements).upper():
186 |                 raise ValueError("Invalid XEASY/CYANA header") 
187 |             for i1 in range(len(connectivity)):
188 |                 for i2 in range(len(connectivity)):
189 |                     e1, e2 = connectivity[i1], connectivity[i2] 
190 |                     if i1 != i2 and e1.isupper() and e2.isupper():
191 |                         spectrum.connect(i1, i2)                        
192 |         else:
193 |             # dimension labels starting with a number are connected, e.g. "1A B2 3C" => 1-3
194 |             if len(el) != num:
195 |                 raise PeakListFormatError("Invalid XEASY header")                
196 |             for e1 in el:
197 |                 for e2 in el:
198 |                     if e1 != e2:
199 |                         element1 = dim[el[e1]]
200 |                         element2 = dim[el[e2]]
201 |                         
202 |                         num1 = e1.replace(element1, '')
203 |                         num2 = e2.replace(element2, '')
204 |                         
205 |                         if e1.startswith(num1) and e2.startswith(num2):
206 |                             spectrum.connect(el[e1], el[e2])
207 |                         
208 |         return spectrum
209 | 
210 | 
211 | class XeasyFileBuilder(object):
212 |     """
213 |     XEASY output format builder.
214 |     
215 |     @param stream: destination stream, were the output is written
216 |     @type stream: file
217 |     """
218 |     
219 |     def __init__(self, stream):
220 |         self._out = stream
221 |         
222 |     def add_spectrum(self, spectrum):
223 |         
224 |         self.add_header(spectrum)
225 |         self.add_peaks(spectrum)
226 |         
227 |     def add_header(self, spectrum):
228 |         """
229 |         Write the XEASY header.
230 |         
231 |         @param spectrum: NOE spectrum
232 |         @type spectrum: L{NOESpectrum} 
233 |         """
234 |         
235 |         self._out.write(
236 |             '# Number of dimensions {0}\n'.format(spectrum.num_dimensions))
237 |         
238 |         conn = ''
239 |         
240 |         for en, e in enumerate(spectrum.dimensions, start=1):
241 |             element = repr(e).upper()
242 |             self._out.write('#INAME {0} {1}{0}\n'.format(en, element))
243 |             
244 |             if spectrum.has_connected_dimensions(en - 1):
245 |                 conn += element.upper()
246 |             else:
247 |                 conn += element.lower()
248 |                 
249 |         self._out.write(
250 |             '#CYANAFORMAT {0}\n'.format(conn))
251 |     
252 |     def add_peaks(self, spectrum):
253 |         """
254 |         Write all peaks from C{spectrum}.
255 |         
256 |         @param spectrum: NOE spectrum
257 |         @type spectrum: L{NOESpectrum} 
258 |         """
259 |         
260 |         for pn, peak in enumerate(spectrum, start=1):
261 |             self._out.write("{0:5} ".format(pn))
262 |             
263 |             for dim in range(spectrum.num_dimensions):
264 |                 data = "{0:7.3f} ".format(peak.get(dim))
265 |                 self._out.write(data)
266 |                             
267 |             self._out.write("2 U ")
268 |             self._out.write("{0:18e} ".format(peak.intensity))
269 |             self._out.write("0.00e+00 m   0    0    0    0 0\n")
270 | 


--------------------------------------------------------------------------------
/csb/bio/io/procheck.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Procheck parser
 3 | """
 4 | import os
 5 | import re
 6 | import shutil
 7 | 
 8 | from csb.io import Shell
 9 | from csb.io import TempFolder
10 | 
11 | class ProcheckParser():
12 |     """
13 |     Simple Prochceck Summary parser
14 |     """
15 |     def __init__(self):
16 |         self.binary = 'procheck.scr'
17 |         self.acc = 2.0
18 |         
19 |     def parse(self, fn):
20 |         """
21 |         @param fn: source  file to parse
22 |         @type fn: str
23 | 
24 |         @return: dicttionary of parsed quality indicatiors
25 |         """
26 |         info = dict()
27 |         
28 |         f_handler = open(os.path.expanduser(fn))
29 |         text = f_handler.read()
30 |         
31 |         input_file_name = re.compile('>>>-----.*?\n.*?\n' r'\s*\|\s*(\S+)\s+')
32 |         residues = re.compile(r'(\d+)\s*residues\s\|')
33 |         ramachandran_plot = re.compile(r'Ramachandran\splot:\s*(\d+\.\d+)' + 
34 |                                       r'%\s*core\s*(\d+\.\d+)%\s*allow\s*(\d+\.\d+)' + 
35 |                                       r'%\s*gener\s*(\d+\.\d+)%\s*disall')
36 |         labelled_all = re.compile(r'Ramachandrans:\s*(\d+)\s*.*?out\sof\s*(\d+)')
37 |         labelled_chi = re.compile(r'Chi1-chi2\splots:\s*(\d+)\s*.*?out\sof\s*(\d+)')
38 |         bad_contacts = re.compile(r'Bad\scontacts:\s*(\d+)')
39 |         g_factors = re.compile(r'G-factors\s*Dihedrals:\s*([0-9-+.]+)' + 
40 |                               r'\s*Covalent:\s*([0-9-+.]+)\s*Overall:\s*([0-9-+.]+)')
41 | 
42 |         info['input_file'] = input_file_name.search(text).groups()[0]
43 |         info['#residues'] = int(residues.search(text).groups()[0])
44 |         info['rama_core'], info['rama_allow'], info['rama_gener'], info['rama_disall'] = \
45 |                            [float(g) for g in ramachandran_plot.search(text).groups()]
46 |         info['g_dihedrals'], info['g_bond'], info['g_overall'] = \
47 |                              [float(g) for g in g_factors.search(text).groups()]
48 |         info['badContacts'] = int(bad_contacts.search(text).groups()[0])
49 |         info['labelledAll'] = float(labelled_all.search(text).groups()[0]) / \
50 |                               float(labelled_all.search(text).groups()[1])
51 |         info['labelledChi'] = float(labelled_chi.search(text).groups()[0]) / \
52 |                               float(labelled_chi.search(text).groups()[0])
53 | 
54 |         f_handler.close()
55 |         
56 |         return info
57 | 
58 | 
59 |     def run(self, pdb_file):
60 |         """
61 |         Runs procheck for the given pdbfile and parses the output.
62 |         Will fail if the procheck binary is not in the path.
63 |         
64 |         @param pdb_file: file to parse
65 |         @return: dict of parsed values
66 |         """
67 |         wd = os.getcwd()
68 |         base = os.path.basename(pdb_file)
69 | 
70 |         with TempFolder() as tmp:
71 |             shutil.copy(os.path.expanduser(pdb_file), tmp.name)
72 |             os.chdir(tmp.name)
73 |             Shell.run('{0} {1} {2}'.format(self.binary,
74 |                                            os.path.join(tmp.name, base),
75 |                                            self.acc))
76 |             summary = '.'.join([os.path.splitext(base)[0], 'sum'])
77 |             out = self.parse(os.path.join(tmp.name, summary))
78 |             os.chdir(wd)
79 | 
80 |         return out
81 | 


--------------------------------------------------------------------------------
/csb/bio/io/psipred.py:
--------------------------------------------------------------------------------
 1 | """
 2 | PSIPRED Parser
 3 | """
 4 | 
 5 | import csb.core
 6 | 
 7 | from csb.bio.structure import SecondaryStructure, SecStructures, UnknownSecStructureError
 8 | 
 9 | 
10 | class PSIPredParseError(ValueError):
11 |     pass
12 | 
13 | 
14 | class PSIPredResidueInfo(object):
15 |     
16 |     def __init__(self, rank, residue, sec_structure, helix, strand, coil):
17 |         
18 |         self.rank = rank
19 |         self.residue = residue
20 |         self.sec_structure = sec_structure
21 |         self.helix = helix
22 |         self.coil = coil
23 |         self.strand = strand    
24 | 
25 | 
26 | class PSIPredParser(object):
27 |     """
28 |     Simple PSI-PRED Secondary Structure Parser.
29 |     """
30 |     
31 |     def parse(self, psipred_file):
32 |         """
33 |         @param psipred_file: source PSI-PRED *.horiz file to parse
34 |         @type psipred_file: str
35 |         @rtype: L{SecondaryStructure}
36 |         """
37 |         
38 |         ss = []
39 |         conf = []
40 |         
41 |         for line in open(psipred_file):
42 |             
43 |             if line.startswith('Conf:'):
44 |                 conf.extend(line[6:].strip())
45 |                 
46 |             elif line.startswith('Pred:'):
47 |                 ss.append(line[6:].strip())
48 |         
49 |         ss = ''.join(ss)
50 |         conf = ''.join(conf)
51 |         
52 |         if len(ss) != len(conf):
53 |             raise PSIPredParseError('Invalid PSI-PRED output file')
54 |         
55 |         if ss:
56 |             return SecondaryStructure(ss, conf)
57 |         else:
58 |             return SecondaryStructure(None)
59 | 
60 |     def parse_scores(self, scores_file):
61 |         """
62 |         @param scores_file: source PSI-PRED *.ss2 file to parse
63 |         @type scores_file: str
64 |         @rtype: list of L{PSIPredResidueInfo}
65 |         """
66 |         residues = [] 
67 |         
68 |         for line in open(scores_file):
69 |             
70 |             if line.startswith('#') or not line.strip():
71 |                 continue
72 |             else:
73 |                 line = line.split()         
74 | 
75 |                 rank = int(line[0])
76 |                 residue = line[1]
77 |                                 
78 |                 try:
79 |                     ss = csb.core.Enum.parse(SecStructures, line[2])  
80 |                 except csb.core.EnumValueError as e:
81 |                     raise UnknownSecStructureError(str(e))
82 |                 
83 |                 coil, helix, strand = map(float, line[3:6])
84 |                 
85 |                 residues.append(PSIPredResidueInfo(rank, residue, ss, helix, strand, coil))
86 |         
87 |         return tuple(residues)
88 |                 


--------------------------------------------------------------------------------
/csb/bio/io/svg.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Build SVG diagrams from various csb objects.
  3 | """
  4 | 
  5 | import math
  6 | import csb.core
  7 | 
  8 | from csb.bio.structure import SecondaryStructure, SecStructures
  9 | 
 10 | 
 11 | class SSCartoonBuilder(object):
 12 |     """
 13 |     Creates 2D vector diagrams from L{SecondaryStructure} objects.
 14 |     
 15 |     @param ss: source secondary structure (either a SS string or a SS object)
 16 |     @type ss: str or L{SecondaryStructure}
 17 |     @param width: output width of the diagram in pixels
 18 |     @type width: int
 19 |     @param height: output height of the diagram in pixels
 20 |     @type height: int
 21 |     
 22 |     @param thickness: stroke-width (2px by default)
 23 |     @param helix: SVG color for helicies (red by default)
 24 |     @param strand: SVG color for strands (blue by default)
 25 |     @param coil: SVG color for coils (orange by default)
 26 |     @param gap: SVG color for gaps (grey by default)
 27 |     @param cap: stroke-linecap (round by default)
 28 |     """
 29 |     
 30 |     def __init__(self, ss, width, height, thickness='2px', 
 31 |                  helix='#C24641', strand='#6698FF', coil='#FF8C00', gap='#E0E0E0', 
 32 |                  cap='round'):
 33 |          
 34 |         if ss:
 35 |             if isinstance(ss, csb.core.string):
 36 |                 self._ss = SecondaryStructure(ss)
 37 |             else:
 38 |                 self._ss = ss.clone()
 39 |             self._ss.to_three_state()
 40 |             self._residues = sum(e.length for e in self._ss)
 41 |             if self._residues == 0:
 42 |                 raise ValueError('Zero-length secondary structure')
 43 |         else:
 44 |             raise ValueError('Invalid secondary structure')
 45 |         
 46 |         self.thickness = thickness
 47 |         self.helixcolor = helix
 48 |         self.strandcolor = strand
 49 |         self.coilcolor = coil
 50 |         self.gapcolor = gap
 51 |         self.cap = cap
 52 |         
 53 |         self._realwidth = float(width)
 54 |         self._width = self._realwidth - 2 # this is to compensate for antialiasing and rounded caps
 55 |         self._height = float(height)
 56 |         self._x = 0
 57 |         self._y = 0        
 58 |         
 59 |         self._svg = ''
 60 |         
 61 |     def build(self):
 62 |         """
 63 |         Build a SVG image using the current size and color settings.
 64 |         
 65 |         @return: SVG diagram
 66 |         @rtype: str (SVG document) 
 67 |         """
 68 |         
 69 |         self._x = 0
 70 |         self._y = 0
 71 |         self._svg = [r'''<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" 
 72 |     width="{0._realwidth}" height="{0._height}">
 73 |     
 74 |     <g transform="translate(0, {1})">'''.format(self, self._height / 2.0)]
 75 |         
 76 |         for e in self._ss:
 77 |             
 78 |             if e.type == SecStructures.Helix:
 79 |                 cartoon = self._helix(e.length)
 80 |                 color = self.helixcolor
 81 |                 
 82 |             elif e.type == SecStructures.Strand:
 83 |                 cartoon = self._strand(e.length)
 84 |                 color = self.strandcolor
 85 |                 
 86 |             elif e.type == SecStructures.Coil:
 87 |                 cartoon = self._coil(e.length)
 88 |                 color = self.coilcolor
 89 |                 
 90 |             elif e.type == SecStructures.Gap:
 91 |                 cartoon = self._gap(e.length)
 92 |                 color = self.gapcolor
 93 |                 
 94 |             else:
 95 |                 assert False, "Unhandled SS Type: {0!r}".format(e.type)
 96 |         
 97 |             path = r'''        <path fill="none" stroke="{0}" stroke-width="{1.thickness}" stroke-linecap="{1.cap}"
 98 |             d="{2}" />'''.format(color, self, cartoon)
 99 |         
100 |             self._svg.append(path)
101 | 
102 |         self._svg.append('    </g>')        
103 |         self._svg.append('</svg>')        
104 |         return '\n'.join(self._svg)
105 |     
106 |     def _format(self, path):
107 |         
108 |         formatted = []
109 |         
110 |         for i in path:
111 |             
112 |             if i == -0:
113 |                 i = 0
114 |             
115 |             if isinstance(i, float):
116 |                 i = round(i, ndigits=7)
117 |                 if i == -0:
118 |                     i = 0
119 |                 formatted.append('{0:.7f}'.format(i))
120 |             else:
121 |                 formatted.append(str(i))
122 |         
123 |         return ' '.join(formatted)
124 |     
125 |     def _helix(self, length, arc_width=3.0):
126 |         
127 |         if length < 1:
128 |             return ''
129 |         
130 |         helix_width = float(length) * self._width / self._residues
131 |         helix_end = self._x + helix_width
132 |         path = ['M', self._x, self._y, 'Q']
133 |         
134 |         arcs = int(helix_width / arc_width)
135 |         for i in range(1, arcs + 1):
136 | 
137 |             # quadratic bezier control points: sine curve's min, max and inflection points (0, 1, 0, -1, 0, 1 ...)
138 |             # one arc is the curve from 0 to pi/2                                    
139 |             if i < arcs:
140 |                 # inner arc
141 |                 self._x += arc_width            
142 |                 self._y = math.sin(math.pi * i / 2) * (self._height / 2.0)
143 |                 path.append(self._x)
144 |                 path.append(self._y)                 
145 |             else:
146 |                 # last arc; stretch it to make the helix pixel-precise, ending also at y=0
147 |                 # also the number of arcs/controlpoints must be even, otherwise the path is broken
148 |                 
149 |                 # remaining pixels on x
150 |                 remainder = helix_end - self._x 
151 |                 
152 |                 if i % 2 == 0:
153 |                     # even number of arcs, just extend the last arc with the remainder
154 |                     self._x += remainder
155 |                     self._y = 0                
156 |                     path.append(self._x)
157 |                     path.append(self._y)
158 |                 else:
159 |                     # odd number of arcs
160 |                     
161 |                     #  1) keep this arc at the expected y, but stretch it half of the x remainder
162 |                     self._x += remainder / 2.0     
163 |                     self._y = math.sin(math.pi * i / 2) * (self._height / 2.0)
164 |                     path.append(self._x)
165 |                     path.append(self._y)
166 |                     
167 |                     #  2) append a final arc, ending at [helix_end, 0]
168 |                     self._x += remainder / 2.0
169 |                     self._y = 0
170 |                     path.append(self._x)
171 |                     path.append(self._y)  
172 |                     
173 |         return self._format(path)
174 |                 
175 |     def _strand(self, length, arrow_width=3.0):
176 |         
177 |         offset = 1.0
178 |         strand_width = float(length) * self._width / self._residues
179 |         path = ['M', self._x, self._y, 'H']
180 | 
181 |         self._x += strand_width         
182 |         path.append(self._x)
183 |         
184 |         if offset < arrow_width < strand_width:
185 |             arrow_start = self._x - offset - arrow_width
186 |             path.extend(['M', self._x - offset, self._y])
187 |             path.extend(['L', arrow_start, self._y + self._height / 9])
188 |             path.extend(['L', arrow_start, self._y - self._height / 9])
189 |             path.extend(['L', self._x - offset, self._y])
190 |                 
191 |         return self._format(path)
192 |         
193 |     def _coil(self, length):
194 |         
195 |         coil_width = float(length) * self._width / self._residues
196 |         path = ['M', self._x, self._y, 'Q']
197 |     
198 |         # first control point    
199 |         self._x += coil_width / 2.0 
200 |         self._y = self._height / -2.0
201 |         path.append(self._x)
202 |         path.append(self._y)
203 |         
204 |         # second
205 |         self._x += coil_width / 2.0 
206 |         self._y = 0
207 |         path.append(self._x)
208 |         path.append(self._y)
209 |         
210 |         return self._format(path)               
211 |     
212 |     def _gap(self, length):
213 |         
214 |         return self._strand(length, arrow_width=0)
215 | 


--------------------------------------------------------------------------------
/csb/bio/io/vasco.py:
--------------------------------------------------------------------------------
  1 | """
  2 | VASCO Chemical Shift format parser.
  3 | """
  4 | 
  5 | import csb.core
  6 | 
  7 | 
  8 | class ShiftInfo(object):
  9 | 
 10 |     def __init__(self, residue_id, amino_acid, nucleus,
 11 |                  shift, element, secondary_structure):
 12 | 
 13 |         self.residue_id = residue_id
 14 |         self.nucleus = nucleus
 15 |         self.element = element
 16 |         self.amino_acid = amino_acid
 17 |         self.shift = shift
 18 |         self.secondary_structure = secondary_structure
 19 | 
 20 |     def __str__(self):
 21 |         return '{0.amino_acid} {0.nucleus} {0.shift}'.format(self)
 22 | 
 23 |     __repr__ = __str__
 24 | 
 25 |         
 26 | class ChemicalShiftContainer(csb.core.DictionaryContainer):
 27 | 
 28 |     def __init__(self, bmrb_id='', pdb_id='', sequence='',
 29 |                  chain='', exptype=''):
 30 |         
 31 |         self.bmrb_id = bmrb_id
 32 |         self.pdb_id = pdb_id
 33 |         self.sequence = sequence
 34 |         self.chain = chain
 35 |         self.exptype = exptype
 36 |         
 37 |         super(ChemicalShiftContainer, self).__init__()
 38 | 
 39 | class VascoStructureParser(object):
 40 |     """
 41 |     Simple Vasco Parser
 42 |     """
 43 | 
 44 |     def __init__(self):
 45 |         self._stream  = None
 46 | 
 47 |     def parse(self, file_name, ignore_outliers=True):
 48 |         """
 49 |         @param file_name: source  file to parse
 50 |         @type file_name: str
 51 |         @return: a L{ChemicalShiftContainer} of L{ShiftInfo} objects
 52 |         @rtype: dict
 53 |         """
 54 |         self._stream  = open(file_name)
 55 |         shifts = self._parse_header()
 56 | 
 57 |         self._parse_shifts(shifts, ignore_outliers=ignore_outliers)
 58 |         self._stream.close()
 59 | 
 60 |         return shifts
 61 |         
 62 |     def _parse_header(self):
 63 | 
 64 |         bmrb_id = ''
 65 |         pdb_id = ''
 66 |         sequence = ''
 67 |         chain = ''
 68 |         exptype = ''
 69 |         self._stream.seek(0)
 70 |         
 71 |         while True:
 72 |             try:
 73 |                 line = next(self._stream)
 74 |             except StopIteration :
 75 |                 break
 76 | 
 77 |             if line.startswith('#'):
 78 |                 if line[2:].startswith('BMRB ORIGIN'):
 79 |                     bmrb_id = line[20:].strip()
 80 |                 elif line[2:].startswith('PDB ORIGIN'):
 81 |                     pdb_id = line[20:].strip()
 82 |                 elif line[2:].startswith('SEQUENCE PDB'):
 83 |                     sequence = line[20:].strip()
 84 |                     chain = line[17]
 85 |                 elif line[2:].startswith('PDB EXPTYPE'):
 86 |                     exptype = line[20:].strip()
 87 |             else:
 88 |                 break
 89 | 
 90 |          
 91 |         return ChemicalShiftContainer(bmrb_id, pdb_id, chain,
 92 |                                       sequence, exptype )
 93 |             
 94 |     
 95 |     def _parse_shifts(self, data, ignore_outliers=True):
 96 | 
 97 |         while True:
 98 |             try:
 99 |                 line = next(self._stream)
100 |             except StopIteration:
101 |                 break
102 | 
103 |             if ignore_outliers and "Shift outlier" in line:
104 |                 continue
105 |       
106 |             chain_id = line[7]
107 |             res_code = line[9:14].strip()
108 |             res_label = line[16:19].strip()
109 |             res_ss = line[21]
110 |             nucleus_name = line[23:28].strip()
111 |             nucleus_element = line[41]
112 |             shift = float(line[43:52])
113 | 
114 |             info = ShiftInfo(res_code, res_label,
115 |                              nucleus_name, shift,
116 |                              nucleus_element, res_ss)
117 | 
118 |             if not chain_id in data:
119 |                 data.append(chain_id, csb.core.OrderedDict())
120 | 
121 |             if not res_code in data[chain_id]:
122 |                 data[chain_id][res_code] = {}
123 | 
124 |             
125 |             data[chain_id][res_code][nucleus_name] = info
126 | 
127 |     


--------------------------------------------------------------------------------
/csb/bio/io/whatif.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple WhatIf/WhatCheck Summary parser
 3 | """
 4 | 
 5 | import re
 6 | import os
 7 | import shutil
 8 | 
 9 | from csb.io import Shell
10 | from csb.io import TempFolder
11 | 
12 | class WhatCheckParser(object):
13 |     """
14 |     Simple WhatIf/WhatCheck Summary parser
15 |     """
16 | 
17 |     def __init__(self, binary='DO_WHATCHECK.COM'):
18 |         self.binary = binary
19 |     
20 |     def parse_summary(self, fn):
21 |         """
22 |         @param fn: whatif pdbout.txt file to parse
23 |         @type fn: str
24 | 
25 |         @return: A dict containing some of the WhatCheck results
26 |         @rtype: a dict
27 |         """
28 |         f_handler = open(os.path.expanduser(fn))
29 |         text = f_handler.read()
30 | 
31 |         info = dict()
32 |         re_ramachandran = re.compile(r'Ramachandran\s*Z-score\s*:\s*([0-9.Ee-]+)')
33 |         re_1st = re.compile(r'1st\s*generation\s*packing\s*quality\s*:\s*([0-9.Ee-]+)')
34 |         re_2nd = re.compile(r'2nd\s*generation\s*packing\s*quality\s*:\s*([0-9.Ee-]+)')
35 |         re_backbone = re.compile(r'Backbone\s*conformation\s*Z-score\s*:\s*([0-9.Ee-]+)')
36 |         re_rotamer = re.compile(r'chi-1\S*chi-2\s*rotamer\s*normality\s*:\s*([0-9.Ee-]+)')
37 |         
38 | 
39 |         info['rama_z_score'] = float(re_ramachandran.search(text).groups(0)[0])
40 |         info['bb_z_score'] = float(re_backbone.search(text).groups(0)[0])
41 |         info['1st_packing_z_score'] = float(re_1st.search(text).groups(0)[0])
42 |         info['2nd_packing_z_score'] = float(re_2nd.search(text).groups(0)[0])
43 |         info['rotamer_score'] = float(re_rotamer.search(text).groups(0)[0])
44 | 
45 |         f_handler.close()
46 |         return info
47 | 
48 |     parse = parse_summary
49 | 
50 | 
51 |     def run(self, pdb_file):
52 |         """
53 |         Runs WhatCheck for the given pdbfile and parses the output.
54 |         Will fail if the WhatCheck binary is not in the path.
55 |         
56 |         @param pdb_file: file to parse
57 |         @return: dict of parsed values
58 |         """
59 |         wd = os.getcwd()
60 |         base = os.path.basename(pdb_file)
61 | 
62 |         with TempFolder() as tmp:
63 |             shutil.copy(os.path.expanduser(pdb_file), tmp.name)
64 |             os.chdir(tmp.name)
65 |             Shell.run('{0} {1}'.format(self.binary,
66 |                                        os.path.join(tmp.name, base)))
67 |             out = self.parse_summary(os.path.join(tmp.name, 'pdbout.txt'))
68 |             os.chdir(wd)
69 | 
70 |         return out
71 |                     
72 |         
73 | 
74 |         
75 | 


--------------------------------------------------------------------------------
/csb/bio/nmr/resources/RandomCoil.Corrections.tsv:
--------------------------------------------------------------------------------
  1 | ALA	H	-0.01	-0.05	0.07	-0.10
  2 | ALA	HA	-0.02	-0.03	-0.03	0.00
  3 | ALA	C	-0.11	-0.77	-0.07	-0.02
  4 | ALA	CA	-0.02	-0.17	0.06	0.01
  5 | ALA	N	-0.12	-0.33	-0.57	-0.15
  6 | ASN	H	-0.01	-0.03	0.13	-0.07
  7 | ASN	HA	-0.01	-0.01	-0.02	-0.01
  8 | ASN	C	-0.09	-0.66	-0.10	-0.03
  9 | ASN	CA	-0.06	-0.03	0.23	0.01
 10 | ASN	N	-0.18	-0.26	0.87	-0.17
 11 | ASP	H	-0.02	-0.03	0.14	-0.11
 12 | ASP	HA	-0.02	-0.01	-0.02	-0.01
 13 | ASP	C	-0.08	-0.58	-0.13	-0.04
 14 | ASP	CA	-0.03	0.00	0.25	-0.01
 15 | ASP	N	-0.12	-0.20	0.86	-0.29
 16 | ARG	H	0.00	-0.02	0.15	-0.06
 17 | ARG	HA	-0.02	-0.02	-0.02	0.00
 18 | ARG	C	-0.06	-0.49	-0.19	-0.03
 19 | ARG	CA	0.00	-0.07	-0.01	0.02
 20 | ARG	N	-0.06	-0.14	1.62	-0.06
 21 | CYS	H	0.00	-0.02	0.20	-0.07
 22 | CYS	HA	-0.01	0.02	0.00	0.00
 23 | CYS	C	-0.08	-0.51	-0.28	-0.07
 24 | CYS	CA	-0.03	-0.07	0.10	-0.01
 25 | CYS	N	-0.06	-0.26	3.07	0.00
 26 | GLN	H	-0.01	-0.02	0.15	-0.06
 27 | GLN	HA	-0.01	-0.02	-0.01	0.00
 28 | GLN	C	-0.05	-0.48	-0.18	-0.03
 29 | GLN	CA	-0.02	-0.06	0.04	0.01
 30 | GLN	N	-0.06	-0.14	1.62	-0.06
 31 | GLU	H	-0.01	-0.03	0.15	-0.07
 32 | GLU	HA	-0.02	-0.02	-0.02	0.00
 33 | GLU	C	-0.09	-0.48	-0.20	-0.03
 34 | GLU	CA	-0.01	-0.08	0.05	0.01
 35 | GLU	N	-0.06	-0.20	1.51	-0.12
 36 | GLY	H	0.00	0.00	0.00	0.00
 37 | GLY	HA2	0.00	0.00	0.00	0.00
 38 | GLY	C	0.00	0.00	0.00	0.00
 39 | GLY	CA	0.00	0.00	0.00	0.00
 40 | GLY	N	0.00	0.00	0.00	0.00
 41 | HIS	H	-0.01	-0.04	0.20	0.00
 42 | HIS	HA	-0.03	-0.06	0.01	0.01
 43 | HIS	C	-0.10	-0.65	-0.22	-0.07
 44 | HIS	CA	-0.05	-0.09	0.02	0.01
 45 | HIS	N	-0.12	-0.55	1.68	0.17
 46 | ILE	H	-0.01	-0.06	0.17	-0.09
 47 | ILE	HA	-0.03	-0.02	-0.02	-0.01
 48 | ILE	C	-0.20	-0.58	-0.18	-0.02
 49 | ILE	CA	-0.07	-0.20	-0.01	0.02
 50 | ILE	N	-0.18	-0.14	4.87	0.00
 51 | LEU	H	0.00	-0.03	0.14	-0.08
 52 | LEU	HA	-0.04	-0.03	-0.05	-0.01
 53 | LEU	C	-0.13	-0.50	-0.13	-0.01
 54 | LEU	CA	-0.01	-0.10	0.03	0.02
 55 | LEU	N	-0.06	-0.14	1.05	-0.06
 56 | LYS	H	0.00	-0.03	0.14	-0.06
 57 | LYS	HA	-0.02	-0.02	-0.01	0.00
 58 | LYS	C	-0.08	-0.50	-0.18	-0.03
 59 | LYS	CA	-0.01	-0.11	-0.02	0.02
 60 | LYS	N	-0.06	-0.20	1.57	-0.06
 61 | MET	H	0.00	-0.02	0.15	-0.06
 62 | MET	HA	-0.02	-0.01	-0.01	0.00
 63 | MET	C	-0.08	-0.41	-0.18	-0.02
 64 | MET	CA	0.00	0.10	-0.06	0.01
 65 | MET	N	-0.06	-0.20	1.57	-0.06
 66 | PHE	H	-0.03	-0.12	0.10	-0.37
 67 | PHE	HA	-0.06	-0.09	-0.08	-0.04
 68 | PHE	C	-0.27	-0.83	-0.25	-0.10
 69 | PHE	CA	-0.07	-0.23	0.06	0.01
 70 | PHE	N	-0.18	-0.49	2.78	-0.46
 71 | PRO	H	-0.04	-0.18	0.19	-0.12
 72 | PRO	HA	-0.01	0.11	-0.03	-0.01
 73 | PRO	C	-0.47	-2.84	-0.09	-0.02
 74 | PRO	CA	-0.22	-2.00	0.02	0.04
 75 | PRO	N	-0.18	-0.32	0.87	-0.17
 76 | SER	H	0.00	-0.03	0.16	-0.08
 77 | SER	HA	-0.01	0.02	0.00	-0.01
 78 | SER	C	-0.08	-0.40	-0.15	-0.06
 79 | SER	CA	0.00	-0.08	0.13	0.00
 80 | SER	N	-0.06	-0.03	2.55	-0.17
 81 | THR	H	0.01	0.00	0.14	-0.06
 82 | THR	HA	-0.01	0.05	0.00	-0.01
 83 | THR	C	-0.08	-0.19	-0.13	-0.05
 84 | THR	CA	-0.01	-0.04	0.12	0.00
 85 | THR	N	-0.06	-0.03	2.78	-0.12
 86 | TRP	H	-0.08	-0.13	0.04	-0.62
 87 | TRP	HA	-0.08	-0.10	-0.15	-0.16
 88 | TRP	C	-0.26	-0.85	-0.30	-0.17
 89 | TRP	CA	-0.02	-0.17	0.03	-0.08
 90 | TRP	N	0.00	-0.26	3.19	-0.64
 91 | TYR	H	-0.04	-0.11	0.09	-0.42
 92 | TYR	HA	-0.05	-0.10	-0.08	-0.04
 93 | TYR	C	-0.28	-0.85	-0.24	-0.13
 94 | TYR	CA	-0.07	-0.22	0.06	-0.01
 95 | TYR	N	-0.24	-0.43	3.01	-0.52
 96 | VAL	H	-0.01	-0.05	0.17	-0.08
 97 | VAL	HA	-0.02	-0.01	-0.02	-0.01
 98 | VAL	C	-0.20	-0.57	-0.18	-0.03
 99 | VAL	CA	-0.07	-0.21	-0.02	0.01
100 | VAL	N	-0.24	-0.14	4.34	-0.06
101 | 


--------------------------------------------------------------------------------
/csb/bio/nmr/resources/RandomCoil.Reference.tsv:
--------------------------------------------------------------------------------
  1 | ALA	H	8.35
  2 | ALA	HA	4.35
  3 | ALA	HB	1.42
  4 | ALA	C	178.5
  5 | ALA	CA	52.82
  6 | ALA	CB	19.26
  7 | ALA	N	125
  8 | ASN	H	8.51
  9 | ASN	HA	4.79
 10 | ASN	HB2	2.88
 11 | ASN	HB3	2.81
 12 | ASN	HD21	7.59
 13 | ASN	HD22	7.01
 14 | ASN	C	176.1
 15 | ASN	CA	53.33
 16 | ASN	CB	39.09
 17 | ASN	CG	177.3
 18 | ASN	N	119
 19 | ASP	H	8.56
 20 | ASP	HA	4.82
 21 | ASP	HB2	2.98
 22 | ASP	HB3	2.91
 23 | ASP	C	175.9
 24 | ASP	CA	52.99
 25 | ASP	CB	38.33
 26 | ASP	CG	177.4
 27 | ASP	N	119.1
 28 | ARG	H	8.39
 29 | ARG	HA	4.38
 30 | ARG	HB2	1.91
 31 | ARG	HB3	1.79
 32 | ARG	HG2	1.68
 33 | ARG	HG3	1.64
 34 | ARG	HD2	3.2
 35 | ARG	HD3	3.2
 36 | ARG	HE	7.2
 37 | ARG	C	177.1
 38 | ARG	CA	56.48
 39 | ARG	CB	30.93
 40 | ARG	CG	27.33
 41 | ARG	CD	43.55
 42 | ARG	CZ	159.7
 43 | ARG	N	121.2
 44 | CYS	H	8.44
 45 | CYS	HA	4.59
 46 | CYS	HB2	2.98
 47 | CYS	HB3	2.98
 48 | CYS	C	175.3
 49 | CYS	CA	58.63
 50 | CYS	CB	28.34
 51 | CYS	N	118.8
 52 | GLN	H	8.44
 53 | GLN	HA	4.38
 54 | GLN	HB2	2.17
 55 | GLN	HB3	2.01
 56 | GLN	HG2	2.39
 57 | GLN	HG3	2.39
 58 | GLN	HE21	7.5
 59 | GLN	HE22	6.91
 60 | GLN	C	176.8
 61 | GLN	CA	56.22
 62 | GLN	CB	29.53
 63 | GLN	CG	33.96
 64 | GLN	CD	180.5
 65 | GLN	N	120.5
 66 | GLU	H	8.4
 67 | GLU	HA	4.42
 68 | GLU	HB2	2.18
 69 | GLU	HB3	2.01
 70 | GLU	HG2	2.5
 71 | GLU	HG3	2.5
 72 | GLU	C	176.8
 73 | GLU	CA	56.09
 74 | GLU	CB	28.88
 75 | GLU	CG	32.88
 76 | GLU	CD	180
 77 | GLU	N	120.2
 78 | GLY	H	8.41
 79 | GLY	HA2	4.02
 80 | GLY	HA	4.02
 81 | GLY	C	174.9
 82 | GLY	CA	45.39
 83 | GLY	N	107.5
 84 | HIS	H	8.56
 85 | HIS	HA	4.79
 86 | HIS	HB2	3.35
 87 | HIS	HB3	3.19
 88 | HIS	HE1	8.61
 89 | HIS	HD2	7.31
 90 | HIS	C	175.1
 91 | HIS	CA	55.39
 92 | HIS	CB	29.12
 93 | HIS	CE1	136.4
 94 | HIS	CD2	120.2
 95 | HIS	CG	131.4
 96 | HIS	N	118.1
 97 | ILE	H	8.17
 98 | ILE	HA	4.21
 99 | ILE	HB	1.89
100 | ILE	HG12	1.48
101 | ILE	HG13	1.19
102 | ILE	HG2	0.93
103 | ILE	HD1	0.88
104 | ILE	C	177.1
105 | ILE	CA	61.62
106 | ILE	CB	38.91
107 | ILE	CG1	27.46
108 | ILE	CG2	17.47
109 | ILE	CD1	13.16
110 | ILE	N	120.4
111 | LEU	H	8.28
112 | LEU	HA	4.38
113 | LEU	HB2	1.67
114 | LEU	HB3	1.62
115 | LEU	HG	1.62
116 | LEU	HD1	0.93
117 | LEU	HD2	0.88
118 | LEU	C	178.2
119 | LEU	CA	55.47
120 | LEU	CB	42.46
121 | LEU	CG	27.11
122 | LEU	CD1	24.99
123 | LEU	CD2	23.32
124 | LEU	N	122.4
125 | LYS	H	8.36
126 | LYS	HA	4.36
127 | LYS	HB2	1.89
128 | LYS	HB3	1.77
129 | LYS	HG2	1.47
130 | LYS	HG3	1.42
131 | LYS	HD2	1.68
132 | LYS	HD3	1.68
133 | LYS	C	177.4
134 | LYS	CA	56.71
135 | LYS	CB	33.21
136 | LYS	CG	25.01
137 | LYS	CD	29.33
138 | LYS	CE	42.35
139 | LYS	N	121.6
140 | MET	H	8.42
141 | MET	HA	4.52
142 | MET	HB2	2.15
143 | MET	HB3	2.03
144 | MET	HG2	2.63
145 | MET	HG3	2.64
146 | MET	HE	2.11
147 | MET	C	177.1
148 | MET	CA	55.77
149 | MET	CB	32.94
150 | MET	CG	32.25
151 | MET	CE	16.96
152 | MET	N	120.3
153 | PHE	H	8.31
154 | PHE	HA	4.65
155 | PHE	HB2	3.19
156 | PHE	HB3	3.04
157 | PHE	HD1	7.28
158 | PHE	HE1	7.38
159 | PHE	HZ	7.33
160 | PHE	HE2	7.38
161 | PHE	HD2	7.28
162 | PHE	C	176.6
163 | PHE	CA	58.09
164 | PHE	CB	39.75
165 | PHE	CG	139.2
166 | PHE	CD1	132
167 | PHE	CE1	131.5
168 | PHE	CZ	130
169 | PHE	CE2	131.5
170 | PHE	CD2	132
171 | PHE	N	120.7
172 | PRO	HA	4.45
173 | PRO	HB2	2.29
174 | PRO	HB3	1.99
175 | PRO	HG2	2.04
176 | PRO	HG3	2.04
177 | PRO	HD2	3.67
178 | PRO	HD3	3.61
179 | PRO	C	177.8
180 | PRO	CA	63.7
181 | PRO	CB	32.22
182 | PRO	CG	27.32
183 | PRO	CD	49.81
184 | PRO	N	135.8
185 | SER	H	8.43
186 | SER	HA	4.51
187 | SER	HB2	3.95
188 | SER	HB3	3.9
189 | SER	C	175.4
190 | SER	CA	58.67
191 | SER	CB	64.06
192 | SER	N	115.5
193 | THR	H	8.25
194 | THR	HA	4.43
195 | THR	HB	4.33
196 | THR	HG2	1.22
197 | THR	C	175.6
198 | THR	CA	62.01
199 | THR	CB	70.01
200 | THR	CG2	21.6
201 | THR	N	112
202 | TRP	H	8.22
203 | TRP	HA	4.7
204 | TRP	HB2	3.34
205 | TRP	HB3	3.25
206 | TRP	HE1	10.63
207 | TRP	HD1	7.28
208 | TRP	HE3	7.65
209 | TRP	HZ3	7.18
210 | TRP	HH2	7.26
211 | TRP	HZ2	7.51
212 | TRP	C	177.1
213 | TRP	CA	57.6
214 | TRP	CB	29.75
215 | TRP	CD1	127.4
216 | TRP	CG	111.7
217 | TRP	CE3	122.2
218 | TRP	CZ3	124.8
219 | TRP	CH2	121.1
220 | TRP	CZ2	114.8
221 | TRP	CE2	139
222 | TRP	CD2	129.6
223 | TRP	N	122.1
224 | TYR	H	8.26
225 | TYR	HA	4.58
226 | TYR	HB2	3.09
227 | TYR	HB3	2.97
228 | TYR	HD1	7.15
229 | TYR	HE1	6.86
230 | TYR	HE2	6.86
231 | TYR	HD2	7.15
232 | TYR	C	176.7
233 | TYR	CA	58.28
234 | TYR	CB	38.94
235 | TYR	CG	130.8
236 | TYR	CD1	133.3
237 | TYR	CE1	118.3
238 | TYR	CZ	157.5
239 | TYR	CE2	118.3
240 | TYR	CD2	133.3
241 | TYR	N	120.9
242 | VAL	H	8.16
243 | VAL	HA	4.16
244 | VAL	HB	2.11
245 | VAL	HG1	0.96
246 | VAL	HG2	0.96
247 | VAL	C	177
248 | VAL	CA	62.61
249 | VAL	CB	32.82
250 | VAL	CG1	21.11
251 | VAL	CG2	20.34
252 | VAL	N	119.3
253 | 


--------------------------------------------------------------------------------
/csb/numeric/integrators.py:
--------------------------------------------------------------------------------
  1 | """
  2 | provides various integration schemes and an abstract gradient class.
  3 | """
  4 | 
  5 | import numpy
  6 | 
  7 | from abc import ABCMeta, abstractmethod
  8 | from csb.statistics.samplers.mc import State, TrajectoryBuilder
  9 | from csb.numeric import InvertibleMatrix
 10 | 
 11 | 
 12 | class AbstractIntegrator(object):
 13 |     """
 14 |     Abstract integrator class. Subclasses implement different integration
 15 |     schemes for solving deterministic equations of motion.
 16 | 
 17 |     @param timestep: Integration timestep
 18 |     @type timestep: float
 19 | 
 20 |     @param gradient: Gradient of potential energy
 21 |     @type gradient: L{AbstractGradient}
 22 |     """
 23 |     
 24 |     __metaclass__ = ABCMeta
 25 | 
 26 |     def __init__(self, timestep, gradient):
 27 | 
 28 |         self._timestep = timestep
 29 |         self._gradient = gradient
 30 | 
 31 |     def integrate(self, init_state, length, mass_matrix=None, return_trajectory=False):
 32 |         """
 33 |         Integrates equations of motion starting from an initial state a certain
 34 |         number of steps.
 35 | 
 36 |         @param init_state: Initial state from which to start integration
 37 |         @type init_state: L{State}
 38 |         
 39 |         @param length: Nubmer of integration steps to be performed
 40 |         @type length: int
 41 | 
 42 |         @param mass_matrix: Mass matrix
 43 |         @type mass_matrix:  n-dimensional L{InvertibleMatrix} with n being the dimension
 44 |                                     of the configuration space, that is, the dimension of
 45 |                                     the position / momentum vectors
 46 | 
 47 |         @param return_trajectory: Return complete L{Trajectory} instead of the initial
 48 |                                   and final states only (L{PropagationResult}). This reduces
 49 |                                   performance.
 50 |         @type return_trajectory: boolean
 51 | 
 52 |         @rtype: L{AbstractPropagationResult}
 53 |         """
 54 | 
 55 |         builder = TrajectoryBuilder.create(full=return_trajectory)
 56 |             
 57 |         builder.add_initial_state(init_state)
 58 |         state = init_state.clone()
 59 |         
 60 |         for i in range(length - 1):
 61 |             state = self.integrate_once(state, i, mass_matrix=mass_matrix)
 62 |             builder.add_intermediate_state(state)
 63 | 
 64 |         state = self.integrate_once(state, length - 1, mass_matrix=mass_matrix)
 65 |         builder.add_final_state(state)
 66 | 
 67 |         return builder.product
 68 | 
 69 |     @abstractmethod
 70 |     def integrate_once(self, state, current_step, mass_matrix=None):
 71 |         """
 72 |         Integrates one step starting from an initial state and an initial time
 73 |         given by the product of the timestep and the current_step parameter.
 74 |         The input C{state} is changed in place.
 75 | 
 76 |         @param state: State which to evolve one integration step
 77 |         @type state: L{State}
 78 |         
 79 |         @param current_step: Current integration step
 80 |         @type current_step: int
 81 | 
 82 |         @param mass_matrix: mass matrix
 83 |         @type mass_matrix:  n-dimensional numpy array with n being the dimension
 84 |                             of the configuration space, that is, the dimension of
 85 |                             the position / momentum vectors
 86 |         @return: the altered state
 87 |         @rtype: L{State}
 88 |         """
 89 |         pass
 90 | 
 91 |     def _get_inverse(self, mass_matrix):
 92 | 
 93 |         inverse_mass_matrix = None
 94 |         if mass_matrix is None:
 95 |             inverse_mass_matrix = 1.0
 96 |         else:
 97 |             if mass_matrix.is_unity_multiple:
 98 |                 inverse_mass_matrix = mass_matrix.inverse[0][0]
 99 |             else:
100 |                 inverse_mass_matrix = mass_matrix.inverse
101 | 
102 |         return inverse_mass_matrix
103 | 
104 | class LeapFrog(AbstractIntegrator):
105 |     """
106 |     Leap Frog integration scheme implementation that calculates position and
107 |     momenta at equal times. Slower than FastLeapFrog, but intermediate points
108 |     in trajectories obtained using
109 |     LeapFrog.integrate(init_state, length, return_trajectoy=True) are physical.
110 |     """
111 |     
112 |     def integrate_once(self, state, current_step, mass_matrix=None):
113 | 
114 |         inverse_mass_matrix = self._get_inverse(mass_matrix)
115 | 
116 |         i = current_step
117 |         
118 |         if i == 0:
119 |             self._oldgrad = self._gradient(state.position, 0.)
120 |             
121 |         momentumhalf = state.momentum - 0.5 * self._timestep * self._oldgrad
122 |         state.position = state.position + self._timestep * numpy.dot(inverse_mass_matrix, momentumhalf)
123 |         self._oldgrad = self._gradient(state.position, (i + 1) * self._timestep)
124 |         state.momentum = momentumhalf - 0.5 * self._timestep * self._oldgrad
125 | 
126 |         return state
127 | 
128 | class FastLeapFrog(LeapFrog):
129 |     """
130 |     Leap Frog integration scheme implementation that calculates position and
131 |     momenta at unequal times by concatenating the momentum updates of two
132 |     successive integration steps.
133 |     WARNING: intermediate points in trajectories obtained by
134 |     FastLeapFrog.integrate(init_state, length, return_trajectories=True)
135 |     are NOT to be interpreted as phase-space trajectories, because
136 |     position and momenta are not given at equal times! In the initial and the
137 |     final state, positions and momenta are given at equal times.
138 |     """
139 | 
140 |     def integrate(self, init_state, length, mass_matrix=None, return_trajectory=False):
141 | 
142 |         inverse_mass_matrix = self._get_inverse(mass_matrix)
143 | 
144 |         builder = TrajectoryBuilder.create(full=return_trajectory)
145 |             
146 |         builder.add_initial_state(init_state)
147 |         state = init_state.clone()
148 |         
149 |         state.momentum = state.momentum - 0.5 * self._timestep * self._gradient(state.position, 0.)
150 |         
151 |         for i in range(length-1):
152 |             state.position = state.position + self._timestep * numpy.dot(inverse_mass_matrix, state.momentum)
153 |             state.momentum = state.momentum - self._timestep * \
154 |                              self._gradient(state.position, (i + 1) * self._timestep)
155 |             builder.add_intermediate_state(state)
156 | 
157 |         state.position = state.position + self._timestep * numpy.dot(inverse_mass_matrix, state.momentum)
158 |         state.momentum = state.momentum - 0.5 * self._timestep * \
159 |                          self._gradient(state.position, length * self._timestep)
160 |         builder.add_final_state(state)
161 |         
162 |         return builder.product
163 | 
164 | class VelocityVerlet(AbstractIntegrator):
165 |     """
166 |     Velocity Verlet integration scheme implementation.
167 |     """
168 | 
169 |     def integrate_once(self, state, current_step, mass_matrix=None):
170 | 
171 |         inverse_mass_matrix = self._get_inverse(mass_matrix)
172 | 
173 |         i = current_step
174 |         
175 |         if i == 0:
176 |             self._oldgrad = self._gradient(state.position, 0.)
177 |             
178 |         state.position = state.position + self._timestep * numpy.dot(inverse_mass_matrix, state.momentum) \
179 |                          - 0.5 * self._timestep ** 2 * numpy.dot(inverse_mass_matrix, self._oldgrad)
180 |         newgrad = self._gradient(state.position, (i + 1) * self._timestep)
181 |         state.momentum = state.momentum - 0.5 * self._timestep * (self._oldgrad + newgrad)
182 |         self._oldgrad = newgrad
183 | 
184 |         return state
185 | 
186 | class AbstractGradient(object):
187 |     """
188 |     Abstract gradient class. Implementations evaluate the gradient of an energy
189 |     function.
190 |     """
191 | 
192 |     __metaclass__ = ABCMeta
193 | 
194 |     @abstractmethod
195 |     def evaluate(self, q, t):
196 |         """
197 |         Evaluates the gradient at position q and time t.
198 | 
199 |         @param q: Position array
200 |         @type q:  One-dimensional numpy array
201 |         
202 |         @param t: Time
203 |         @type t: float
204 |         
205 |         @rtype: numpy array
206 |         """
207 |         pass
208 | 
209 |     def __call__(self, q, t):
210 |         """
211 |         Evaluates the gradient at position q and time t.
212 | 
213 |         @param q: Position array
214 |         @type q:  One-dimensional numpy array
215 |         
216 |         @param t: Time
217 |         @type t: float
218 |         
219 |         @rtype: numpy array
220 |         """
221 |         State.check_flat_array(q)
222 |         return self.evaluate(q, t)
223 | 


--------------------------------------------------------------------------------
/csb/statistics/ars.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Adaptive Rejection Sampling (ARS)
  3 | 
  4 | The ARS class generates a single random sample from a
  5 | univariate distribution specified by an instance of the
  6 | LogProb class, implemented by the user. An instance of
  7 | LogProb returns the log of the probability density and
  8 | its derivative. The log probability function passed must
  9 | be concave.
 10 | 
 11 | The user must also supply initial guesses.  It is not
 12 | essential that these values be very accurate, but performance
 13 | will generally depend on their accuracy.
 14 | """
 15 | 
 16 | from numpy import exp, log
 17 | 
 18 | class Envelope(object):
 19 |     """
 20 |     Envelope function for adaptive rejection sampling.
 21 |     
 22 |     The envelope defines a piecewise linear upper and lower
 23 |     bounding function of the concave log-probability.
 24 |     """
 25 |     def __init__(self, x, h, dh):
 26 | 
 27 |         from numpy import array, inf
 28 | 
 29 |         self.x = array(x)
 30 |         self.h = array(h)
 31 |         self.dh = array(dh)
 32 |         self.z0 = -inf
 33 |         self.zk = inf
 34 |         
 35 |     def z(self):
 36 |         """
 37 |         Support intervals for upper bounding function.
 38 |         """
 39 |         from numpy import concatenate
 40 | 
 41 |         h = self.h
 42 |         dh = self.dh
 43 |         x = self.x
 44 | 
 45 |         z = (h[1:] - h[:-1] + x[:-1] * dh[:-1] - x[1:] * dh[1:]) / \
 46 |             (dh[:-1] - dh[1:])
 47 | 
 48 |         return concatenate(([self.z0], z, [self.zk]))
 49 | 
 50 |     def u(self, x):
 51 |         """
 52 |         Piecewise linear upper bounding function.
 53 |         """
 54 |         z = self.z()[1:-1]
 55 |         j = (x > z).sum()
 56 | 
 57 |         return self.h[j] + self.dh[j] * (x - self.x[j])
 58 | 
 59 |     def u_max(self):
 60 | 
 61 |         z = self.z()[1:-1]
 62 | 
 63 |         return (self.h + self.dh * (z - self.x)).max()
 64 | 
 65 |     def l(self, x):
 66 |         """
 67 |         Piecewise linear lower bounding function.
 68 |         """
 69 |         from numpy import inf
 70 | 
 71 |         j = (x > self.x).sum()
 72 | 
 73 |         if j == 0 or j == len(self.x):
 74 |             return -inf
 75 |         else:
 76 |             j -= 1
 77 |             return ((self.x[j + 1] - x) * self.h[j] + (x - self.x[j]) * self.h[j + 1]) / \
 78 |                    (self.x[j + 1] - self.x[j])
 79 | 
 80 |     def insert(self, x, h, dh):
 81 |         """
 82 |         Insert new support point for lower bounding function
 83 |         (and indirectly for upper bounding function).
 84 |         """
 85 |         from numpy import concatenate
 86 | 
 87 |         j = (x > self.x).sum()
 88 | 
 89 |         self.x = concatenate((self.x[:j], [x], self.x[j:]))
 90 |         self.h = concatenate((self.h[:j], [h], self.h[j:]))
 91 |         self.dh = concatenate((self.dh[:j], [dh], self.dh[j:]))
 92 | 
 93 |     def log_masses(self):
 94 |         
 95 |         from numpy import  abs, putmask
 96 | 
 97 |         z = self.z()
 98 |         b = self.h - self.x * self.dh
 99 |         a = abs(self.dh)
100 |         m = (self.dh > 0)
101 |         q = self.x * 0.        
102 |         putmask(q, m, z[1:])
103 |         putmask(q, 1 - m, z[:-1])
104 |         
105 |         log_M = b - log(a) + log(1 - exp(-a * (z[1:] - z[:-1]))) + \
106 |                 self.dh * q
107 | 
108 |         return log_M
109 | 
110 |     def masses(self):
111 | 
112 |         z = self.z()
113 |         b = self.h - self.x * self.dh
114 |         a = self.dh
115 |         
116 |         return exp(b) * (exp(a * z[1:]) - exp(a * z[:-1])) / a
117 | 
118 |     def sample(self):
119 | 
120 |         from numpy.random import random
121 |         from numpy import add
122 |         from csb.numeric import log_sum_exp
123 |         
124 |         log_m = self.log_masses()
125 |         log_M = log_sum_exp(log_m)
126 |         c = add.accumulate(exp(log_m - log_M))
127 |         u = random()
128 |         j = (u > c).sum()
129 | 
130 |         a = self.dh[j]
131 |         z = self.z()
132 |         
133 |         xmin, xmax = z[j], z[j + 1]
134 | 
135 |         u = random()
136 | 
137 |         if a > 0:
138 |             return xmax + log(u + (1 - u) * exp(-a * (xmax - xmin))) / a
139 |         else:
140 |             return xmin + log(u + (1 - u) * exp(a * (xmax - xmin))) / a
141 | 
142 | 
143 | class LogProb(object):
144 | 
145 |     def __call__(self, x):
146 |         raise NotImplementedError()
147 | 
148 | class Gauss(LogProb):
149 | 
150 |     def __init__(self, mu, sigma=1.):
151 | 
152 |         self.mu = float(mu)
153 |         self.sigma = float(sigma)
154 | 
155 |     def __call__(self, x):
156 | 
157 |         return -0.5 * (x - self.mu) ** 2 / self.sigma ** 2, \
158 |                - (x - self.mu) / self.sigma ** 2
159 | 
160 | 
161 | class ARS(object):
162 | 
163 |     from numpy import inf
164 | 
165 |     def __init__(self, logp):
166 | 
167 |         self.logp = logp
168 | 
169 |     def initialize(self, x, z0=-inf, zmax=inf):
170 | 
171 |         from numpy import array
172 | 
173 |         self.hull = Envelope(array(x), *self.logp(array(x)))
174 |         self.hull.z0 = z0
175 |         self.hull.zk = zmax
176 | 
177 |     def sample(self, maxiter=100):
178 | 
179 |         from numpy.random import random
180 | 
181 |         for i in range(maxiter):
182 | 
183 |             x = self.hull.sample()
184 |             l = self.hull.l(x)
185 |             u = self.hull.u(x)
186 |             w = random()
187 | 
188 |             if w <= exp(l - u): return x
189 | 
190 |             h, dh = self.logp(x)
191 | 
192 |             if w <= exp(h - u): return x
193 | 
194 |             self.hull.insert(x, h, dh)
195 | 


--------------------------------------------------------------------------------
/csb/statistics/rand.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Random number generators
  3 | """
  4 | 
  5 | def probability_transform(shape, inv_cum, cum_min=0., cum_max=1.):
  6 |     """
  7 |     Generic sampler based on the probability transform.
  8 | 
  9 |     @param shape: shape of the random sample
 10 |     @param inv_cum: inversion of the cumulative density function from which one seeks to sample
 11 |     @param cum_min: lower value of the cumulative distribution
 12 |     @param cum_max: upper value of the cumulative distribution
 13 |     @return: random variates of the PDF implied by the inverse cumulative distribution
 14 |     """
 15 |     from numpy.random import random
 16 |     
 17 |     return inv_cum(cum_min + random(shape) * (cum_max - cum_min))
 18 | 
 19 | def truncated_gamma(shape=None, alpha=1., beta=1., x_min=None, x_max=None):
 20 |     """
 21 |     Generate random variates from a lower-and upper-bounded gamma distribution.
 22 | 
 23 |     @param shape: shape of the random sample
 24 |     @param alpha: shape parameter (alpha > 0.)
 25 |     @param beta:  scale parameter (beta >= 0.)
 26 |     @param x_min: lower bound of variate
 27 |     @param x_max: upper bound of variate    
 28 |     @return: random variates of lower-bounded gamma distribution
 29 |     """
 30 |     from scipy.special import gammainc, gammaincinv
 31 |     from numpy.random import gamma
 32 |     from numpy import inf
 33 | 
 34 |     if x_min is None and x_max is None:
 35 |         return gamma(alpha, 1 / beta, shape)
 36 |     elif x_min is None:
 37 |         x_min = 0.
 38 |     elif x_max is None:
 39 |         x_max = inf
 40 |         
 41 |     x_min = max(0., x_min)
 42 |     x_max = min(1e300, x_max)
 43 | 
 44 |     a = gammainc(alpha, beta * x_min)
 45 |     b = gammainc(alpha, beta * x_max)
 46 | 
 47 |     return probability_transform(shape,
 48 |                                  lambda x, alpha=alpha: gammaincinv(alpha, x),
 49 |                                  a, b) / beta
 50 | 
 51 | def truncated_normal(shape=None, mu=0., sigma=1., x_min=None, x_max=None):
 52 |     """
 53 |     Generates random variates from a lower-and upper-bounded normal distribution
 54 | 
 55 |     @param shape: shape of the random sample
 56 |     @param mu:    location parameter 
 57 |     @param sigma: width of the distribution (sigma >= 0.)
 58 |     @param x_min: lower bound of variate
 59 |     @param x_max: upper bound of variate    
 60 |     @return: random variates of lower-bounded normal distribution
 61 |     """
 62 |     from scipy.special import erf, erfinv
 63 |     from numpy.random import standard_normal
 64 |     from numpy import inf, sqrt
 65 | 
 66 |     if x_min is None and x_max is None:
 67 |         return standard_normal(shape) * sigma + mu
 68 |     elif x_min is None:
 69 |         x_min = -inf
 70 |     elif x_max is None:
 71 |         x_max = inf
 72 |         
 73 |     x_min = max(-1e300, x_min)
 74 |     x_max = min(+1e300, x_max)
 75 |     var = sigma ** 2 + 1e-300
 76 |     sigma = sqrt(2 * var)
 77 |     
 78 |     a = erf((x_min - mu) / sigma)
 79 |     b = erf((x_max - mu) / sigma)
 80 | 
 81 |     return probability_transform(shape, erfinv, a, b) * sigma + mu
 82 | 
 83 | def sample_dirichlet(alpha, n_samples=1):
 84 |     """
 85 |     Sample points from a dirichlet distribution with parameter alpha.
 86 | 
 87 |     @param alpha: alpha parameter of a dirichlet distribution
 88 |     @type alpha: array
 89 |     """
 90 |     from numpy import array, sum, transpose, ones
 91 |     from numpy.random import gamma
 92 | 
 93 |     alpha = array(alpha, ndmin=1)
 94 |     X = gamma(alpha,
 95 |               ones(len(alpha)),
 96 |               [n_samples, len(alpha)])
 97 |      
 98 |     return transpose(transpose(X) / sum(X, -1))
 99 | 
100 | def sample_sphere3d(radius=1., n_samples=1):
101 |     """
102 |     Sample points from 3D sphere.
103 | 
104 |     @param radius: radius of the sphere
105 |     @type radius: float
106 | 
107 |     @param n_samples: number of samples to return
108 |     @type n_samples: int
109 | 
110 |     @return: n_samples times random cartesian coordinates inside the sphere
111 |     @rtype: numpy array
112 |     """
113 |     from numpy.random  import random
114 |     from numpy import arccos, transpose, cos, sin, pi, power
115 | 
116 |     r = radius * power(random(n_samples), 1 / 3.)
117 |     theta = arccos(2. * (random(n_samples) - 0.5))
118 |     phi = 2 * pi * random(n_samples)
119 | 
120 |     x = cos(phi) * sin(theta) * r
121 |     y = sin(phi) * sin(theta) * r
122 |     z = cos(theta) * r
123 | 
124 |     return transpose([x, y, z])
125 | 
126 | def sample_from_histogram(p, n_samples=1):
127 |     """
128 |     returns the indice of bin according to the histogram p
129 | 
130 |     @param p: histogram
131 |     @type p: numpy.array
132 |     @param n_samples: number of samples to generate
133 |     @type n_samples: integer
134 |     """
135 |     
136 |     from numpy import add, less, argsort, take, arange
137 |     from numpy.random import random
138 | 
139 |     indices = argsort(p)
140 |     indices = take(indices, arange(len(p) - 1, -1, -1))
141 | 
142 |     c = add.accumulate(take(p, indices)) / add.reduce(p)
143 | 
144 |     return indices[add.reduce(less.outer(c, random(n_samples)), 0)]
145 | 
146 | def gen_inv_gaussian(a, b, p, burnin=10):
147 |     """
148 |     Sampler based on Gibbs sampling.
149 |     Assumes scalar p.
150 |     """
151 |     from numpy.random import gamma
152 |     from numpy import sqrt
153 | 
154 |     s = a * 0. + 1.
155 | 
156 |     if p < 0:
157 |         a, b = b, a
158 | 
159 |     for i in range(burnin):
160 | 
161 |         l = b + 2 * s
162 |         m = sqrt(l / a)
163 | 
164 |         x = inv_gaussian(m, l, shape=m.shape)
165 |         s = gamma(abs(p) + 0.5, x)
166 | 
167 |     if p >= 0:
168 |         return x
169 |     else:
170 |         return 1 / x
171 | 
172 | def inv_gaussian(mu=1., _lambda=1., shape=None):
173 |     """
174 |     Generate random samples from inverse gaussian.
175 |     """
176 |     from numpy.random import standard_normal, random
177 |     from numpy import sqrt, less_equal, clip
178 |     
179 |     mu_2l = mu / _lambda / 2.
180 |     Y = mu * standard_normal(shape) ** 2
181 |     X = mu + mu_2l * (Y - sqrt(4 * _lambda * Y + Y ** 2))
182 |     U = random(shape)
183 | 
184 |     m = less_equal(U, mu / (mu + X))
185 | 
186 |     return clip(m * X + (1 - m) * mu ** 2 / X, 1e-308, 1e308)
187 | 
188 | def random_rotation(A, n_iter=10, initial_values=None):
189 |     """
190 |     Generation of three-dimensional random rotations in
191 |     fitting and matching problems, Habeck 2009.
192 | 
193 |     Generate random rotation R from::
194 | 
195 |         exp(trace(dot(transpose(A), R)))
196 | 
197 |     @param A: generating parameter
198 |     @type A: 3 x 3 numpy array
199 | 
200 |     @param n_iter: number of gibbs sampling steps
201 |     @type n_iter: integer
202 | 
203 |     @param initial_values: initial euler angles alpha, beta and gamma
204 |     @type initial_values: tuple
205 | 
206 |     @rtype: 3 x 3 numpy array
207 |     """
208 |     from numpy import cos, sin, dot, pi, clip
209 |     from numpy.linalg import svd, det    
210 |     from random import vonmisesvariate, randint
211 |     from csb.numeric import euler
212 | 
213 | 
214 |     def sample_beta(kappa, n=1):
215 |         from numpy import arccos
216 |         from csb.numeric import log, exp
217 |         from numpy.random import random
218 | 
219 |         u = random(n)
220 | 
221 |         if kappa != 0.:
222 |             x = clip(1 + 2 * log(u + (1 - u) * exp(-kappa)) / kappa, -1., 1.)
223 |         else:
224 |             x = 2 * u - 1
225 | 
226 |         if n == 1:
227 |             return arccos(x)[0]
228 |         else:
229 |             return arccos(x)
230 | 
231 | 
232 |     U, L, V = svd(A)
233 | 
234 |     if det(U) < 0:
235 |         L[2] *= -1
236 |         U[:, 2] *= -1
237 |     if det(V) < 0:
238 |         L[2] *= -1
239 |         V[2] *= -1
240 | 
241 |     if initial_values is None:
242 |         beta = 0.
243 |     else:
244 |         alpha, beta, gamma = initial_values
245 | 
246 |     for _i in range(n_iter):
247 | 
248 |         ## sample alpha and gamma
249 |         phi = vonmisesvariate(0., clip(cos(beta / 2) ** 2 * (L[0] + L[1]), 1e-308, 1e10))
250 |         psi = vonmisesvariate(pi, sin(beta / 2) ** 2 * (L[0] - L[1]))
251 |         u = randint(0, 1)
252 |         
253 |         alpha = 0.5 * (phi + psi) + pi * u
254 |         gamma = 0.5 * (phi - psi) + pi * u
255 | 
256 |         ## sample beta
257 |         kappa = cos(phi) * (L[0] + L[1]) + cos(psi) * (L[0] - L[1]) + 2 * L[2]
258 |         beta = sample_beta(kappa)
259 | 
260 |     return dot(U, dot(euler(alpha, beta, gamma), V))
261 | 


--------------------------------------------------------------------------------
/csb/statistics/samplers/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Defines abstract samplers.
  3 | """
  4 | 
  5 | import numpy as np
  6 | import csb.core
  7 | 
  8 | from abc import ABCMeta, abstractmethod, abstractproperty
  9 | 
 10 | 
 11 | class DimensionError(TypeError):
 12 |     pass
 13 | 
 14 | class AbstractSampler(object):
 15 |     """
 16 |     Abstract interface for sampling algorithms.
 17 |     """
 18 |     
 19 |     __metaclass__ = ABCMeta
 20 |     
 21 |     @abstractmethod
 22 |     def sample(self):
 23 |         """
 24 |         Draw a sample.
 25 |         @rtype: L{AbstractState}
 26 |         """
 27 |         pass
 28 | 
 29 | class AbstractState(object):
 30 |     """
 31 |     Represents a point in phase-space.
 32 |     """
 33 |     
 34 |     __metaclass__ = ABCMeta    
 35 |     
 36 |     @abstractproperty
 37 |     def position(self):
 38 |         pass
 39 |     
 40 |     @abstractproperty
 41 |     def momentum(self):
 42 |         pass
 43 |     
 44 | class State(AbstractState):
 45 |     """
 46 |     Represents a point in phase-space.
 47 |     """
 48 |     
 49 |     @staticmethod
 50 |     def check_flat_array(*args):
 51 |         """
 52 |         Check whether arguments are flat, one-dimensional numpy arrays.
 53 |         """
 54 |         
 55 |         for q in args:
 56 |             if not isinstance(q, np.ndarray):
 57 |                 raise TypeError(q, 'numpy.ndarray expected!')
 58 |     
 59 |             if not len(q.squeeze().shape) <= 1:
 60 |                 raise DimensionError(q, '1d numpy.ndarray expected!')
 61 |         
 62 |     @staticmethod
 63 |     def check_equal_length(q, p):
 64 |         """
 65 |         Check whether arguments have equal length.
 66 |         """
 67 |         
 68 |         if len(q) != len(p):
 69 |             raise DimensionError(p, 'momentum needs to have the same dimension as coordinates!')
 70 |     
 71 |     def __init__(self, position, momentum=None):
 72 |         
 73 |         self._position = None
 74 |         self._momentum = None
 75 | 
 76 |         self.position = position
 77 |         self.momentum = momentum
 78 | 
 79 |     def __eq__(self, other):
 80 | 
 81 |         return self.position == other.position and self.momentum == other.momentum
 82 | 
 83 |     @property
 84 |     def position(self):        
 85 |         return self._position.copy()
 86 |     @position.setter
 87 |     def position(self, value):        
 88 |         State.check_flat_array(value)        
 89 |         self._position = np.array(value)
 90 | 
 91 |     @property
 92 |     def momentum(self):
 93 |         if self._momentum is None:
 94 |             return None
 95 |         else:
 96 |             return self._momentum.copy()
 97 |     @momentum.setter
 98 |     def momentum(self, value):
 99 |         if not value is None:
100 |             State.check_flat_array(value)
101 |             State.check_equal_length(value, self.position)
102 |             self._momentum = np.array(value)
103 |         else:
104 |             self._momentum = None
105 |         
106 |     def clone(self):
107 |         if self.momentum is not None:
108 |             return self.__class__(self.position.copy(), self.momentum.copy())
109 |         else:
110 |             return self.__class__(self.position.copy())
111 |         
112 |         
113 | class EnsembleState(csb.core.BaseCollectionContainer, AbstractState):
114 |     """
115 |     Defines an Ensemble Monte Carlo state; it is a read-only collection
116 |     of State objects.
117 | 
118 |     @param items: initialization list of states
119 |     @type items: list of L{States}
120 |     """
121 | 
122 |     def __init__(self, items):   
123 |         super(EnsembleState, self).__init__(items, type=State)
124 |     
125 |     @property
126 |     def position(self):        
127 |         return np.array([s.position for s in self])
128 | 
129 |     @property
130 |     def momentum(self):
131 |         return np.array([s.momentum for s in self])
132 | 


--------------------------------------------------------------------------------
/csb/statistics/samplers/mc/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Abstract Monte Carlo samplers.
  3 | """
  4 | 
  5 | import numpy.random
  6 | 
  7 | import csb.numeric
  8 | import csb.core
  9 | 
 10 | from abc import ABCMeta, abstractmethod, abstractproperty
 11 | from csb.statistics.samplers import AbstractSampler, AbstractState, State, EnsembleState
 12 | 
 13 | class AbstractMC(AbstractSampler):
 14 |     """
 15 |     Abstract Monte Carlo sampler class. Subclasses implement various
 16 |     Monte carlo equilibrium sampling schemes.
 17 |     
 18 |     @param state: Initial state
 19 |     @type state: L{AbstractState}
 20 |     """
 21 |     
 22 |     __metaclass__ = ABCMeta
 23 |     
 24 |     def __init__(self, state):
 25 |         
 26 |         self._state = None
 27 |         self.state = state
 28 |          
 29 |     def _checkstate(self, state):
 30 |         
 31 |         if not isinstance(state, AbstractState):
 32 |             raise TypeError(state)
 33 |     
 34 |     @abstractproperty
 35 |     def energy(self):
 36 |         """
 37 |         Energy of the current state.
 38 |         """
 39 |         pass
 40 | 
 41 |     @property
 42 |     def state(self):
 43 |         """
 44 |         Current state.
 45 |         """
 46 |         return self._state
 47 |     @state.setter
 48 |     def state(self, value):
 49 |         self._checkstate(value)
 50 |         self._state = value
 51 | 
 52 |     @abstractmethod
 53 |     def sample(self):
 54 |         """
 55 |         Draw a sample.
 56 |         @rtype: L{AbstractState}
 57 |         """
 58 |         pass
 59 | 
 60 | class AbstractPropagationResult(object):
 61 |     """
 62 |     Abstract class providing the interface for the result
 63 |     of a deterministic or stochastic propagation of a state.
 64 |     """
 65 |     
 66 |     __metaclass__ = ABCMeta 
 67 |     
 68 |     @abstractproperty
 69 |     def initial(self):
 70 |         """
 71 |         Initial state
 72 |         """
 73 |         pass
 74 |     
 75 |     @abstractproperty
 76 |     def final(self):
 77 |         """
 78 |         Final state
 79 |         """
 80 |         pass
 81 |     
 82 |     @abstractproperty
 83 |     def heat(self):
 84 |         """
 85 |         Heat produced during propagation
 86 |         @rtype: float
 87 |         """        
 88 |         pass    
 89 | 
 90 | class PropagationResult(AbstractPropagationResult):
 91 |     """
 92 |     Describes the result of a deterministic or stochastic
 93 |     propagation of a state.
 94 | 
 95 |     @param initial: Initial state from which the
 96 |                     propagation started
 97 |     @type initial: L{State}
 98 | 
 99 |     @param final: Final state in which the propagation
100 |                   resulted
101 |     @type final: L{State}
102 | 
103 |     @param heat: Heat produced during propagation
104 |     @type heat: float
105 |     """
106 |     
107 |     
108 |     def __init__(self, initial, final, heat=0.0):
109 |         
110 |         if not isinstance(initial, AbstractState):
111 |             raise TypeError(initial)
112 |         
113 |         if not isinstance(final, AbstractState):
114 |             raise TypeError(final)        
115 |         
116 |         self._initial = initial
117 |         self._final = final
118 |         self._heat = None
119 |         
120 |         self.heat = heat
121 | 
122 |     def __iter__(self):
123 | 
124 |         return iter([self._initial, self.final])
125 |         
126 |     @property
127 |     def initial(self):
128 |         return self._initial
129 |     
130 |     @property
131 |     def final(self):
132 |         return self._final
133 |     
134 |     @property
135 |     def heat(self):
136 |         return self._heat
137 |     @heat.setter
138 |     def heat(self, value):
139 |         self._heat = float(value)
140 | 
141 | class Trajectory(csb.core.CollectionContainer, AbstractPropagationResult):
142 |     """
143 |     Ordered collection of states, representing a phase-space trajectory.
144 | 
145 |     @param items: list of states defining a phase-space trajectory
146 |     @type items: list of L{AbstractState}
147 |     @param heat: heat produced during the trajectory
148 |     @type heat: float
149 |     @param work: work produced during the trajectory
150 |     @type work: float
151 |     """
152 |     
153 |     def __init__(self, items, heat=0.0, work=0.0):
154 |         
155 |         super(Trajectory, self).__init__(items, type=AbstractState)
156 |         
157 |         self._heat = heat    
158 |         self._work = work
159 |     
160 |     @property
161 |     def initial(self):
162 |         return self[0]
163 |     
164 |     @property
165 |     def final(self):
166 |         return self[self.last_index]
167 |     
168 |     @property
169 |     def heat(self):
170 |         return self._heat
171 |     @heat.setter
172 |     def heat(self, value):
173 |         self._heat = float(value)
174 | 
175 |     @property
176 |     def work(self):
177 |         return self._work
178 |     @work.setter
179 |     def work(self, value):
180 |         self._work = float(value)
181 | 
182 | class TrajectoryBuilder(object):
183 |     """
184 |     Allows to  build a Trajectory object step by step.
185 | 
186 |     @param heat: heat produced over the trajectory
187 |     @type heat: float
188 |     @param work: work produced during the trajectory
189 |     @type work: float
190 |     """
191 |     
192 |     def __init__(self, heat=0.0, work=0.0):
193 |         self._heat = heat
194 |         self._work = work
195 |         self._states = []
196 |         
197 |     @staticmethod
198 |     def create(full=True):
199 |         """
200 |         Trajectory builder factory.
201 | 
202 |         @param full: if True, a TrajectoryBuilder instance designed
203 |                      to build a full trajectory with initial state,
204 |                      intermediate states and a final state. If False,
205 |                      a ShortTrajectoryBuilder instance designed to
206 |                      hold only the initial and the final state is
207 |                      returned
208 |         @type full: boolean
209 |         """
210 |         
211 |         if full:
212 |             return TrajectoryBuilder()
213 |         else:
214 |             return ShortTrajectoryBuilder()
215 |         
216 |     @property
217 |     def product(self):
218 |         """
219 |         The L{Trajectory} instance build by a specific instance of
220 |         this class
221 |         """
222 |         return Trajectory(self._states, heat=self._heat, work=self._work)
223 | 
224 |     def add_initial_state(self, state):
225 |         """
226 |         Inserts a state at the beginning of the trajectory
227 | 
228 |         @param state: state to be added
229 |         @type state: L{State}
230 |         """
231 |         self._states.insert(0, state.clone())
232 |         
233 |     def add_intermediate_state(self, state):
234 |         """
235 |         Adds a state to the end of the trajectory
236 | 
237 |         @param state: state to be added
238 |         @type state: L{State}
239 |         """
240 |         self._states.append(state.clone())
241 |     
242 |     def add_final_state(self, state):
243 |         """
244 |         Adds a state to the end of the trajectory
245 | 
246 |         @param state: state to be added
247 |         @type state: L{State}
248 |         """
249 |         self._states.append(state.clone())
250 |     
251 | class ShortTrajectoryBuilder(TrajectoryBuilder):    
252 | 
253 |     def add_intermediate_state(self, state):
254 |         pass
255 | 
256 |     @property
257 |     def product(self):
258 |         """
259 |         The L{PropagationResult} instance built by a specific instance of
260 |         this class
261 |         """
262 |         
263 |         if len(self._states) != 2:
264 |             raise ValueError("Can't create a product, two states required")
265 |         
266 |         initial, final = self._states
267 |         return PropagationResult(initial, final, heat=self._heat)
268 | 
269 | 
270 | class MCCollection(csb.core.BaseCollectionContainer):
271 |     """
272 |     Collection of single-chain samplers.
273 | 
274 |     @param items: samplers
275 |     @type items: list of L{AbstractSingleChainMC}
276 |     """
277 |     
278 |     def __init__(self, items):
279 | 
280 |         from csb.statistics.samplers.mc.singlechain import AbstractSingleChainMC
281 |         
282 |         super(MCCollection, self).__init__(items, type=AbstractSingleChainMC)
283 | 
284 | 
285 | def augment_state(state, temperature=1.0, mass_matrix=None):
286 |     """
287 |     Augments a state with only positions given by momenta drawn
288 |     from the Maxwell-Boltzmann distribution.
289 | 
290 |     @param state: State to be augmented
291 |     @type state: L{State}
292 | 
293 |     @param temperature: Temperature of the desired Maxwell-Boltzmann
294 |                         distribution
295 |     @type temperature: float
296 | 
297 |     @param mass_matrix: Mass matrix to be used in the Maxwell-Boltzmann
298 |                         distribution; None defaults to a unity matrix
299 |     @type mass_matrix: L{InvertibleMatrix}
300 | 
301 |     @return: The initial state augmented with momenta
302 |     @rtype: L{State}
303 |     """
304 | 
305 |     d = len(state.position)
306 |     mm_unity = None
307 |     
308 |     if mass_matrix is None:
309 |         mm_unity = True
310 | 
311 |     if mm_unity == None:
312 |         mm_unity = mass_matrix.is_unity_multiple
313 |         
314 |     if mm_unity == True:
315 |         momentum = numpy.random.normal(scale=numpy.sqrt(temperature),
316 |                                        size=d)
317 |     else:
318 |         covariance_matrix = temperature * mass_matrix
319 |         momentum = numpy.random.multivariate_normal(mean=numpy.zeros(d),
320 |                                                     cov=covariance_matrix)
321 | 
322 |     state.momentum = momentum
323 | 
324 |     return state
325 | 


--------------------------------------------------------------------------------
/csb/statmech/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Methods for statistical mechanics
3 | """
4 | 


--------------------------------------------------------------------------------
/csb/statmech/ensembles.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Statistical Ensembles
  3 | """
  4 | 
  5 | from csb.numeric import log, exp
  6 | from abc import ABCMeta, abstractmethod
  7 | 
  8 | 
  9 | class StatisticalEnsemble(object):
 10 |     
 11 |     __metaclass__ = ABCMeta
 12 |   
 13 |     def __call__(self, raw_energies):
 14 |         return exp(-self.energy(raw_energies))
 15 | 
 16 |     def log_prob(self, raw_energies):
 17 |         return -self.energy(raw_energies)
 18 | 
 19 |     @abstractmethod
 20 |     def energy(self, raw_energies):
 21 |         """
 22 |         Transforms the raw energies as if they were observed
 23 |         in this statistical ensemble
 24 |         """
 25 |         pass
 26 | 
 27 |     def gradient(self, raw_energies):
 28 |         raise NotImplementedError()
 29 | 
 30 | 
 31 | class BoltzmannEnsemble(StatisticalEnsemble):
 32 | 
 33 |     def __init__(self, beta=1.):
 34 |         
 35 |         self._beta = float(beta)
 36 | 
 37 |     @property
 38 |     def beta(self):
 39 |         """
 40 |         Inverse temperature
 41 |         """
 42 |         return self._beta
 43 |     @beta.setter
 44 |     def beta(self, value):
 45 |         value = float(value)
 46 |         if value <= 0.:
 47 |             raise ValueError("Inverse temperature {0} < 0".formate(value))
 48 |         self._beta = value
 49 | 
 50 |     def energy(self, raw_energies):
 51 |         return raw_energies * self._beta
 52 |         
 53 | class FermiEnsemble(BoltzmannEnsemble):
 54 | 
 55 |     def __init__(self, beta=1., e_max=0.):
 56 | 
 57 |         super(FermiEnsemble, self).__init__(beta)
 58 |         self._e_max = float(e_max)
 59 | 
 60 |     @property
 61 |     def e_max(self):
 62 |         """
 63 |         Maximum energy
 64 |         """
 65 |         return self._e_max
 66 |     @e_max.setter
 67 |     def e_max(self, value):
 68 |         self._e_max = float(value)
 69 | 
 70 |     def energy(self, raw_energies):
 71 | 
 72 |         from numpy import isinf
 73 |         
 74 |         if isinf(self.beta):
 75 |             m = (raw_energies >= self.e_max).astype('f')
 76 |             return - m * log(0.)
 77 |         else:
 78 |             x = 1 + exp(self.beta * (raw_energies - self.e_max))
 79 |             return log(x)
 80 | 
 81 | class TsallisEnsemble(StatisticalEnsemble):
 82 | 
 83 |     def __init__(self, q=1., e_min=0.):
 84 | 
 85 |         self._q = q
 86 |         self._e_min = e_min
 87 |     
 88 |     @property
 89 |     def q(self):
 90 |         """
 91 |         q-analoge of the temperature
 92 |         """
 93 |         return self._q
 94 |     @q.setter
 95 |     def q(self, value):
 96 |         if value <= 0.:
 97 |             raise ValueError("Inverse temperature {0} < 0".formate(value))
 98 |         self._q = value
 99 | 
100 |     @property
101 |     def e_min(self):
102 |         """
103 |         lower bound of the energy
104 |         """
105 |         return self._e_min
106 |     @e_min.setter
107 |     def e_min(self, value):
108 |         self._e_min = value
109 | 
110 |     def energy(self, raw_energies):
111 |         q = self.q
112 |         e_min = self.e_min
113 |         
114 |         if (q < 1 + 1e-10):
115 |             return raw_energies * q
116 |         else:
117 |             return log(1 + (raw_energies - e_min) * (q - 1)) * q / (q - 1) + e_min
118 | 
119 | 
120 | class CompositeEnsemble(StatisticalEnsemble):
121 | 
122 |     def __init__(self, ensembles=[]):
123 | 
124 |         self._ensembles = ensembles
125 | 
126 |     @property
127 |     def ensembles(self):
128 |         """
129 |         Collection of statistical ensembles
130 |         """
131 |         return self._ensembles
132 |     @ensembles.setter
133 |     def ensembles(self, value):
134 |         if not isinstance(value, list):
135 |             if len(value) > 0:
136 |                 if not isinstance(value[0], StatisticalEnsemble):
137 |                     raise  ValueError("Not a list of statistical ensembles")
138 |                 else:
139 |                     self._enesmbles = value
140 |             else:
141 |                 self._enesmbles = value
142 | 
143 |     def energy(self, raw_energies):
144 |         return sum([self._ensembles[i].energy(raw_energies[i])
145 |                     for i in range(len(self.ensembles))], 0)
146 |     
147 |     def gradient(self, raw_energies):
148 |         return sum([self._ensembles[i].gradient(raw_energies[i])
149 |                     for i in range(len(self.ensembles))], 0)
150 | 


--------------------------------------------------------------------------------
/csb/statmech/wham.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Estimate the free energy and density of states from tempered ensembles using
  3 | histogram re-weighting.
  4 | """
  5 | 
  6 | import numpy
  7 | 
  8 | from csb.numeric import log, log_sum_exp
  9 | from csb.statistics import histogram_nd
 10 | 
 11 | from abc import abstractmethod, ABCMeta
 12 | 
 13 | 
 14 | class AbstractWHAM(object):
 15 |     """
 16 |     Abstract base class
 17 |     """
 18 |     __metaclass__ = ABCMeta
 19 | 
 20 |     def __init__(self, ensembles, raw_energies, n):
 21 |         
 22 |         self._f = numpy.zeros(len(ensembles))
 23 |         self._e = raw_energies
 24 |         self._n = n
 25 |         self._L = []
 26 |         self._log_g = None
 27 |         self._ensembles = ensembles
 28 | 
 29 |     def log_g(self, normalize=True):
 30 |         """
 31 |         Return the Density of states (DOS).
 32 | 
 33 |         @param normalize: Ensure that the density of states sums to one
 34 |         @rtype: float
 35 |         """
 36 |         if normalize:
 37 |             return self._log_g - log_sum_exp(self._log_g)
 38 |         else:
 39 |             return self._log_g 
 40 |     
 41 |     @property
 42 |     def free_energies(self):
 43 |         """
 44 |         Free energies
 45 |         """
 46 |         return self._f
 47 | 
 48 |     def _stop_criterium(self, tol=1e-10):
 49 |         """
 50 |         general stop criterium; if the relative difference between
 51 |         sequential negative log likelihoods is less than a predefined
 52 |         tolerance
 53 |         
 54 |         @param tol: tolerance
 55 |         @type tol: float
 56 | 
 57 |         @rtype: boolean
 58 |         """
 59 |         L = self._L
 60 |         return  tol is not None and len(L) > 1 and \
 61 |                    abs((L[-2] - L[-1]) / (L[-2] + L[-1])) < tol
 62 |         
 63 |     
 64 |     @abstractmethod
 65 |     def estimate(self, *params):
 66 |         """
 67 |         Estimate the density of states
 68 |         """
 69 |         pass
 70 | 
 71 |     @abstractmethod
 72 |     def log_z(self, beta=1., ensembles=None):
 73 |         """
 74 |         Compute the partition function for an ensemble at inverse temperature
 75 |         beta or for a defined ensemble
 76 | 
 77 |         @param beta: Inverse Temperature
 78 |         @type beta: float or list
 79 | 
 80 |         @param ensembles: List of ensembles for which the partition function should be evaluated
 81 |         @type ensembles: List of ensembles
 82 | 
 83 |         @rtype: float or array
 84 |         """
 85 |         pass
 86 |  
 87 |  
 88 | class WHAM(AbstractWHAM):
 89 |     """
 90 |     Implementation of the original WHAM methods based on histograms.
 91 |     """
 92 | 
 93 |     def __init__(self, ensembles, raw_energies, n):
 94 |         super(WHAM, self).__init__(ensembles, raw_energies, n)
 95 | 
 96 |         self._ex = None
 97 |         self._h = None
 98 |         
 99 |     def estimate(self, n_bins=100, n_iter=10000, tol=1e-10):
100 | 
101 |         self._L = []        
102 |         h, e = histogram_nd(self._e, nbins=n_bins, normalize=False)
103 |         self._ex = e = numpy.array(e)
104 |         self._h = h
105 |         f = self._f
106 |         
107 |         log_h = log(h)
108 |         log_g = h * 0.0
109 |         log_g -= log_sum_exp(log_g)
110 |         log_n = log(self._n)
111 | 
112 |         e_ij = -numpy.squeeze(numpy.array([ensemble.energy(e)
113 |                                            for ensemble in self._ensembles])).T
114 | 
115 |         for _i in range(n_iter):
116 | 
117 |             ## update density of states
118 |             y = log_sum_exp(numpy.reshape((e_ij - f + log_n).T,
119 |                                           (len(f), -1)), 0)
120 |             log_g = log_h - numpy.reshape(y, log_g.shape)
121 |             log_g -= log_sum_exp(log_g)
122 | 
123 |             ## update free energies
124 |             f = log_sum_exp(numpy.reshape(e_ij.T + log_g.flatten(),
125 |                                           (len(f), -1)).T, 0)
126 |             self._L.append((self._n * f).sum() - (h * log_g).sum())
127 | 
128 |             self._log_g = log_g
129 |             self._f = f
130 | 
131 |             if self._stop_criterium(tol):
132 |                 break
133 | 
134 |         return f, log_g
135 | 
136 |     def log_z(self, beta=1., ensembles=None):
137 |         """
138 |         Use trapezoidal rule to evaluate the partition function.
139 |         """
140 |         from numpy import array, multiply, reshape
141 | 
142 |         is_float = False
143 | 
144 |         if type(beta) == float:
145 |             beta = reshape(array(beta), (-1,))
146 |             is_float = True
147 | 
148 |         x = self._ex[0, 1:] - self._ex[0, :-1]
149 |         y = self._ex[0]
150 | 
151 |         for i in range(1, self._ex.shape[0]):
152 |             x = multiply.outer(x, self._ex[i, 1:] - self._ex[i, :-1])
153 |             y = multiply.outer(y, self._ex[i])
154 | 
155 |         y = -multiply.outer(beta, y) + self._log_g
156 |         y = reshape(array([y.T[1:], y.T[:-1]]), (2, -1))
157 |         y = log_sum_exp(y, 0) - log(2)
158 |         y = reshape(y, (-1, len(beta))).T + log(x)
159 | 
160 |         log_z = log_sum_exp(y.T, 0)
161 | 
162 |         if is_float:
163 |             return float(log_z)
164 |         else:
165 |             return log_z
166 | 
167 |     
168 | class NonparametricWHAM(AbstractWHAM):
169 |     """
170 |     Implementation of the nonparametric WHAM outlined in Habeck 2012, in which histograms
171 |     are reduced to delta peaks, this allows to use energies samples at different orders 
172 |     of magnitude, improving the accuracy of the DOS estimates.
173 |     """
174 | 
175 |     def estimate(self, n_iter=10000, tol=1e-10):
176 | 
177 |         e_ij = numpy.array([ensemble.energy(self._e)
178 |                             for ensemble in self._ensembles]).T
179 | 
180 |         f = self._f
181 |         log_n = log(self._n)
182 |         self._L = []
183 |         for _i in range(n_iter):
184 | 
185 |             ## update density of states
186 |             log_g = -log_sum_exp((-e_ij - f + log_n).T, 0)
187 |             log_g -= log_sum_exp(log_g)
188 | 
189 |             ## update free energies            
190 |             f = log_sum_exp((-e_ij.T + log_g).T, 0)
191 |             self._L.append((self._n * f).sum() - log_g.sum())
192 | 
193 |             self._f = f
194 |             self._log_g = log_g
195 | 
196 |             if self._stop_criterium(tol):
197 |                 break
198 | 
199 |         return f, log_g
200 | 
201 |     def log_g(self, normalize=True):
202 | 
203 |         e_ij = numpy.array([ensemble.energy(self._e)
204 |                             for ensemble in self._ensembles]).T
205 | 
206 |         log_g = -log_sum_exp((-e_ij - self._f + log(self._n)).T, 0)
207 | 
208 |         if normalize:
209 |             log_g -= log_sum_exp(log_g)
210 | 
211 |         return log_g
212 | 
213 |     def log_z(self, beta=1., ensembles=None):
214 | 
215 |         from numpy import multiply
216 | 
217 |         if ensembles is not None:
218 |             e_ij_prime = numpy.array([ensemble.energy(self._e)
219 |                                       for ensemble in ensembles])
220 |         else:
221 |             e_ij_prime = multiply.outer(beta, self._e)
222 |         
223 |         
224 |         log_z = log_sum_exp((-e_ij_prime + self.log_g()).T, 0)
225 | 
226 |         return log_z
227 | 
228 | 
229 | 
230 | 


--------------------------------------------------------------------------------
/csb/test/app.py:
--------------------------------------------------------------------------------
 1 | """
 2 | CSB Test Runner app. Run with -h to see the app's documentation.
 3 | """
 4 | 
 5 | from csb.test import Console
 6 | 
 7 | 
 8 | def main():
 9 |     return Console('csb.test.cases.*')
10 | 
11 | 
12 | if __name__ == '__main__':
13 |     main()
14 | 


--------------------------------------------------------------------------------
/csb/test/cases/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Root package, containing all test cases as sub-packages.
3 | """


--------------------------------------------------------------------------------
/csb/test/cases/bio/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csb-toolbox/CSB/1a858c9a8bbb5e528b06dc0ffb67cf151489413b/csb/test/cases/bio/__init__.py


--------------------------------------------------------------------------------
/csb/test/cases/bio/io/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/csb/test/cases/bio/io/cs/__init__.py:
--------------------------------------------------------------------------------
 1 | import csb.test as test
 2 | 
 3 | from csb.bio.io.cs import ChemShiftReader, ChemShift3Reader, ChemShiftFormatError
 4 | from csb.bio.structure import ChemElements
 5 | from csb.bio.sequence import ProteinAlphabet
 6 | 
 7 | 
 8 | 
 9 | @test.unit
10 | class TestChemShiftReader(test.Case):
11 |     
12 |     def setUp(self):
13 |         
14 |         super(TestChemShiftReader, self).setUp()
15 |         
16 |         self.parser = self.klass()
17 |         self.file2 = self.config.getTestFile('2l01.v2.str')
18 |         self.file3 = self.config.getTestFile('2l01.v3.str')
19 |         
20 |     @property
21 |     def file(self):
22 |         return self.file2
23 |     
24 |     @property
25 |     def klass(self):
26 |         return ChemShiftReader
27 |     
28 |     def testCreate(self):
29 |         
30 |         klass = self.klass
31 |         
32 |         self.assertTrue(isinstance(klass.create(version=2), ChemShiftReader))
33 |         self.assertTrue(isinstance(klass.create(version=3), ChemShift3Reader))
34 |         
35 |         self.assertRaises(ValueError, klass.create, version=1)
36 |         
37 |     def testGuess(self):
38 |         
39 |         klass = self.klass
40 |                 
41 |         self.assertTrue(isinstance(klass.guess(self.file2), ChemShiftReader))
42 |         self.assertTrue(isinstance(klass.guess(self.file3), ChemShift3Reader))
43 |         
44 |         dummy = self.config.getTestFile("2JZC.sum")
45 |         self.assertRaises(ChemShiftFormatError, klass.guess, dummy)
46 |         
47 |     def testReadShifts(self):
48 |         
49 |         content = open(self.file).read()
50 |         cs = self.parser.read_shifts(content)
51 |         
52 |         self.assertEqual(len(cs), 11)
53 |         
54 |         self.assertEqual(cs[0].name, "HA")
55 |         self.assertEqual(cs[0].element, ChemElements.H)
56 |         self.assertEqual(cs[0].shift, 3.977)
57 |         
58 |         self.assertEqual(cs[1].name, "HB2")
59 |         self.assertEqual(cs[1].shift, 2.092)
60 |          
61 |         self.assertEqual(cs[7].element, ChemElements.C)
62 |         self.assertEqual(cs[7].residue, ProteinAlphabet.MET)
63 |         
64 |         self.assertEqual(cs[10].residue, ProteinAlphabet.LYS)
65 |         self.assertEqual(cs[10].shift, 4.423)
66 |         
67 |     def testReadFile(self):
68 |         
69 |         cs = self.parser.read_file(self.file)   
70 |         self.assertEqual(len(cs), 11)
71 |         
72 | @test.unit
73 | class TestChemShift3Reader(TestChemShiftReader):
74 |     
75 |     @property
76 |     def file(self):
77 |         return self.file3
78 | 
79 |     @property
80 |     def klass(self):
81 |         return ChemShift3Reader        
82 |     
83 |     
84 | if __name__ == '__main__':
85 |     
86 |     test.Console()
87 |     


--------------------------------------------------------------------------------
/csb/test/cases/bio/io/mrc/__init__.py:
--------------------------------------------------------------------------------
 1 | import csb.test as test
 2 | 
 3 | from csb.io import MemoryStream
 4 | from csb.bio.io.mrc import DensityMapReader, DensityMapWriter, DensityMapFormatError, HeaderInfo, ByteOrder
 5 | 
 6 |         
 7 | @test.unit
 8 | class TestDensityMapReader(test.Case):
 9 |     
10 |     def setUp(self):
11 |         
12 |         super(TestDensityMapReader, self).setUp()
13 |         
14 |         self.file = self.config.getTestFile('1C3W_10.mrc')
15 |         self.reader = DensityMapReader(self.file)
16 |         self.rawheader = None
17 |         
18 |         with open(self.file, 'rb') as stream:
19 |             self.rawheader = self.reader._rawheader(stream) 
20 |         
21 |     def testReadRawHeader(self):
22 |         self.assertEqual(len(self.rawheader), DensityMapReader.HEADER_SIZE)
23 |         
24 |     def testReadHeader(self):
25 |         
26 |         density = self.reader.read_header()
27 | 
28 |         self.assertEqual(density.data, None)
29 |         self.assertEqual(density.header, self.rawheader)
30 |         self.assertEqual(density.origin, [-36.0, -36.0, -36.0])
31 |         self.assertEqual(density.shape, (72, 72, 72))
32 |         self.assertEqual(density.spacing, (1.0, 1.0, 1.0))
33 |         
34 |     def testRead(self):
35 |         
36 |         density = self.reader.read()
37 | 
38 |         self.assertIsNotNone(density.data)
39 |         self.assertEqual(density.header, self.rawheader)
40 |         self.assertEqual(density.origin, [-36.0, -36.0, -36.0])
41 |         self.assertEqual(density.shape, (72, 72, 72))
42 |         self.assertEqual(density.spacing, (1.0, 1.0, 1.0))
43 |         
44 |         
45 | @test.unit
46 | class TestDensityMapWriter(test.Case):
47 |     
48 |     def setUp(self):
49 |         
50 |         super(TestDensityMapWriter, self).setUp()
51 |         
52 |         self.file = self.config.getTestFile('1C3W_10.mrc')
53 |         self.writer = DensityMapWriter()
54 |         self.reader = DensityMapReader(self.file)
55 |         self.density = self.reader.read()        
56 |         
57 |     def testWriteDensity(self):
58 |         
59 |         with self.config.getTempStream(mode='b') as temp:
60 |             with open(self.file, 'rb') as source:
61 |                 self.writer.write(temp, self.density)
62 |                 temp.flush()
63 |                 if temp.content() != source.read(): 
64 |                     self.fail('binary strings differ')
65 |                     
66 |     def testReconstructHeader(self):
67 |         
68 |         raw = self.density.header
69 |         self.density.header = None
70 |         
71 |         new = self.writer.reconstruct_header(self.density)
72 |         
73 |         original = self.reader._inspect(raw, ByteOrder.NATIVE)
74 |         generated = self.reader._inspect(new, ByteOrder.NATIVE)
75 |         
76 |         for o, g in zip(original, generated):
77 |             self.assertAlmostEqual(o, g, places=4)
78 | 
79 |         
80 |                         
81 | if __name__ == '__main__':
82 |     
83 |     test.Console()
84 | 


--------------------------------------------------------------------------------
/csb/test/cases/bio/io/noe/__init__.py:
--------------------------------------------------------------------------------
  1 | import csb.test as test
  2 | import csb.io
  3 | 
  4 | from csb.bio.io.noe import SparkyPeakListReader, XeasyPeakListReader, XeasyFileBuilder
  5 | from csb.bio.structure import ChemElements
  6 | from csb.bio.sequence import ProteinAlphabet
  7 | 
  8 | 
  9 | @test.unit
 10 | class TestSparkyPeakListReader(test.Case):
 11 |     
 12 |     def setUp(self):
 13 |         
 14 |         super(TestSparkyPeakListReader, self).setUp()
 15 |         
 16 |         self.elements = (ChemElements.H, ChemElements.C, ChemElements.H)
 17 |         self.parser = SparkyPeakListReader(self.elements, [(1, 2)])
 18 |         self.file = self.config.getTestFile('Sparky.peaks')
 19 |     
 20 |     def testRead(self):
 21 |         
 22 |         content = open(self.file).read()
 23 |         spectrum = self.parser.read(content)
 24 |         
 25 |         self.assertEqual(len(spectrum), 3)
 26 |         
 27 |         self.assertEqual(spectrum.min_intensity, 147454)
 28 |         self.assertEqual(spectrum.max_intensity, 204746)
 29 |         
 30 |         self.assertEqual(spectrum.element(0), self.elements[0])
 31 |         self.assertEqual(spectrum.element(1), self.elements[1])
 32 |         
 33 |         self.assertEqual(spectrum.dimensions, self.elements)
 34 |         self.assertEqual(spectrum.proton_dimensions, (0, 2))
 35 |         self.assertEqual(spectrum.num_dimensions, 3)
 36 |         self.assertEqual(spectrum.num_proton_dimensions, 2)
 37 |         
 38 |         self.assertFalse(spectrum.has_element(ChemElements.Ca))
 39 |         self.assertTrue(spectrum.has_element(ChemElements.C))
 40 | 
 41 |         self.assertFalse(spectrum.has_connected_dimensions(0))
 42 |         self.assertEqual(spectrum.connected_dimensions(0), ())
 43 |         self.assertTrue(spectrum.has_connected_dimensions(1))        
 44 |         self.assertEqual(spectrum.connected_dimensions(1), (2,))
 45 |         self.assertTrue(spectrum.has_connected_dimensions(2))        
 46 |         self.assertEqual(spectrum.connected_dimensions(2), (1,))
 47 |         
 48 |         peaks = list(spectrum)
 49 |         self.assertEqual(peaks[0].intensity, 157921)
 50 |         self.assertEqual(peaks[0].get(0), 3.418)
 51 |         self.assertEqual(peaks[0].get(1), 114.437)
 52 |         self.assertEqual(peaks[0].get(2), 7.440)
 53 |     
 54 |     def testReadFile(self):
 55 | 
 56 |         spectrum = self.parser.read_file(self.file)
 57 |         self.assertEqual(len(spectrum), 3)
 58 |     
 59 |     def testReadAll(self):
 60 |         
 61 |         spectrum = self.parser.read_all([self.file, self.file])
 62 |         self.assertEqual(len(spectrum), 6)        
 63 | 
 64 | 
 65 | @test.unit
 66 | class TestXeasyPeakListReader(test.Case):
 67 |     
 68 |     def setUp(self):
 69 |         
 70 |         super(TestXeasyPeakListReader, self).setUp()
 71 | 
 72 |         self.elements = (ChemElements.H, ChemElements.C, ChemElements.H)        
 73 |         self.parser = XeasyPeakListReader()
 74 |         self.file = self.config.getTestFile('Xeasy1.peaks')
 75 |     
 76 |     def testRead(self):
 77 | 
 78 |         content = open(self.file).read()
 79 |         spectrum = self.parser.read(content)
 80 |         
 81 |         self.assertEqual(len(spectrum), 3)
 82 |         
 83 |         self.assertEqual(spectrum.min_intensity, 1.291120e05)
 84 |         self.assertEqual(spectrum.max_intensity, 4.243830e05)
 85 |         
 86 |         self.assertEqual(spectrum.element(0), self.elements[0])
 87 |         self.assertEqual(spectrum.element(1), self.elements[1])
 88 |         
 89 |         self.assertEqual(spectrum.dimensions, self.elements)
 90 |         self.assertEqual(spectrum.proton_dimensions, (0, 2))
 91 |         self.assertEqual(spectrum.num_dimensions, 3)
 92 |         self.assertEqual(spectrum.num_proton_dimensions, 2)
 93 |         
 94 |         self.assertFalse(spectrum.has_element(ChemElements.Ca))
 95 |         self.assertTrue(spectrum.has_element(ChemElements.C))
 96 | 
 97 |         self.assertFalse(spectrum.has_connected_dimensions(0))
 98 |         self.assertEqual(spectrum.connected_dimensions(0), ())
 99 |         self.assertTrue(spectrum.has_connected_dimensions(1))        
100 |         self.assertEqual(spectrum.connected_dimensions(1), (2,))
101 |         self.assertTrue(spectrum.has_connected_dimensions(2))        
102 |         self.assertEqual(spectrum.connected_dimensions(2), (1,))        
103 | 
104 |         peaks = list(spectrum)
105 |         self.assertEqual(peaks[0].intensity, 1.565890e05)
106 |         self.assertEqual(peaks[0].get(0), 7.050)
107 |         self.assertEqual(peaks[0].get(1), 10.374)
108 |         self.assertEqual(peaks[0].get(2), 0.889)        
109 | 
110 | @test.unit
111 | class TestXeasyPeakListReader2(TestXeasyPeakListReader):
112 |     
113 |     def setUp(self):
114 |         
115 |         super(TestXeasyPeakListReader2, self).setUp()
116 | 
117 |         self.elements = (ChemElements.H, ChemElements.C, ChemElements.H)        
118 |         self.parser = XeasyPeakListReader()
119 |         self.file = self.config.getTestFile('Xeasy2.peaks')
120 | 
121 | 
122 | @test.unit
123 | class TestXeasyFileBuilder(test.Case):
124 |     
125 |     def setUp(self):
126 |         super(TestXeasyFileBuilder, self).setUp()
127 |         
128 |     def testBuild(self):
129 |         
130 |         content = self.config.getContent("Xeasy1.peaks")
131 |         spectrum = XeasyPeakListReader().read(content)
132 |         
133 |         stream = csb.io.MemoryStream()
134 |         
135 |         builder = XeasyFileBuilder(stream)
136 |         builder.add_header(spectrum)
137 |         builder.add_peaks(spectrum)
138 |         
139 |         self.assertEqual(stream.getvalue().strip(), content.strip())
140 |         
141 |         
142 | 
143 | 
144 | if __name__ == '__main__':
145 |     
146 |     test.Console()
147 |     
148 |             


--------------------------------------------------------------------------------
/csb/test/cases/bio/io/procheck/__init__.py:
--------------------------------------------------------------------------------
 1 | import csb.test as test
 2 | 
 3 | from csb.bio.io.procheck import ProcheckParser
 4 | 
 5 | @test.functional
 6 | class TestProcheckParser(test.Case):
 7 | 
 8 |         
 9 |     def setUp(self):
10 |         
11 |         super(TestProcheckParser, self).setUp()
12 |         self.file = self.config.getTestFile('2JZC.sum')
13 |         self.parser =  ProcheckParser()
14 | 
15 |     def testParse(self):
16 | 
17 |         res = self.parser.parse(self.file)
18 | 
19 |         self.assertEqual(res['#residues'], 201)
20 |         self.assertEqual(res['rama_core'], 69.5)
21 |         self.assertEqual(res['rama_allow'], 22.6)
22 |         self.assertEqual(res['rama_gener'], 5.6)
23 |         self.assertEqual(res['rama_disall'], 2.3)
24 | 
25 |         self.assertEqual(res['g_dihedrals'], -0.1)
26 |         self.assertEqual(res['g_bond'], 0.51)
27 |         self.assertEqual(res['g_overall'], 0.14)
28 | 
29 |         self.assertEqual(res['badContacts'], 5581)
30 |         
31 | 
32 | if __name__ == '__main__':
33 |     
34 |     test.Console()
35 | 
36 | 
37 | 
38 |         
39 | 
40 | 


--------------------------------------------------------------------------------
/csb/test/cases/bio/io/whatif/__init__.py:
--------------------------------------------------------------------------------
 1 | import csb.test as test
 2 | 
 3 | from csb.bio.io.whatif import WhatCheckParser
 4 | 
 5 | 
 6 | @test.functional
 7 | class TestWhatCheckParser(test.Case):
 8 | 
 9 | 
10 |     def setUp(self):
11 |         super(TestWhatCheckParser, self).setUp()
12 |         self.file = self.config.getTestFile('pdbout.txt')
13 |         self.parser = WhatCheckParser()
14 | 
15 | 
16 |     def testParse(self):
17 | 
18 |         res = self.parser.parse(self.file)
19 |         self.assertEqual(res['rama_z_score'], -4.617)
20 |         self.assertEqual(res['bb_z_score'], -1.421)
21 |         self.assertEqual(res['1st_packing_z_score'], -3.436)
22 |         self.assertEqual(res['2nd_packing_z_score'], -4.424)
23 |         self.assertEqual(res['rotamer_score'], -2.103)
24 |         
25 | 
26 | if __name__ == '__main__':
27 |     
28 |     test.Console()
29 | 
30 |         
31 | 


--------------------------------------------------------------------------------
/csb/test/cases/bio/sequence/alignment/__init__.py:
--------------------------------------------------------------------------------
  1 | import csb.test as test
  2 | 
  3 | from csb.bio.sequence import RichSequence, SequenceTypes
  4 | from csb.bio.sequence.alignment import IdentityMatrix, SimilarityMatrix
  5 | from csb.bio.sequence.alignment import GlobalAlignmentAlgorithm, LocalAlignmentAlgorithm, AlignmentResult
  6 | 
  7 | 
  8 | @test.unit
  9 | class TestIdentityMatrix(test.Case):
 10 |     
 11 |     def setUp(self):
 12 |         
 13 |         super(TestIdentityMatrix, self).setUp()
 14 |         self.matrix = IdentityMatrix(2, -3)
 15 |         
 16 |     def testScore(self):
 17 |         self.assertEqual(self.matrix.score("a", "a"), 2)
 18 |         self.assertEqual(self.matrix.score("a", "b"), -3)
 19 |         
 20 | @test.unit
 21 | class TestSimilarityMatrix(test.Case):
 22 |     
 23 |     def setUp(self):
 24 |         
 25 |         super(TestSimilarityMatrix, self).setUp()
 26 |         self.matrix = SimilarityMatrix(SimilarityMatrix.BLOSUM62)
 27 |         
 28 |     def testScore(self):
 29 |         self.assertEqual(self.matrix.score("A", "A"), 4)
 30 |         self.assertEqual(self.matrix.score("A", "R"), -1)  
 31 |         self.assertEqual(self.matrix.score("R", "A"), -1)
 32 |         
 33 |         
 34 | @test.unit
 35 | class TestGlobalAlignmentAlgorithm(test.Case):
 36 |     
 37 |     def setUp(self):
 38 |         
 39 |         super(TestGlobalAlignmentAlgorithm, self).setUp()
 40 |         
 41 |         self.seq1 = RichSequence('s1', '', 'CCABBBCBBCABAABCCEAAAAAAAAAAAAFAA', SequenceTypes.Protein)
 42 |         self.seq2 = RichSequence('s1', '', 'AZCBBABAABCCEF', SequenceTypes.Protein)
 43 |         self.algorithm = GlobalAlignmentAlgorithm(scoring=IdentityMatrix(1, -1), gap=0)  
 44 |         
 45 |     def testAlign(self):
 46 | 
 47 |         ali = self.algorithm.align(self.seq1, self.seq2)
 48 |         
 49 |         self.assertEqual(ali.query.sequence,   "CCA-BBBCBBCABAABCCEAAAAAAAAAAAAFAA")
 50 |         self.assertEqual(ali.subject.sequence, "--AZ---CBB-ABAABCCE------------F--")
 51 | 
 52 |         self.assertEqual(ali.query.residues[3], self.seq1.residues[3])
 53 |         self.assertTrue(ali.query.residues[3] is self.seq1.residues[3])
 54 |                 
 55 |         self.assertEqual(ali.qstart, 1)
 56 |         self.assertEqual(ali.qend, 33)
 57 |         self.assertEqual(ali.start, 1)
 58 |         self.assertEqual(ali.end, 14)
 59 | 
 60 |         self.assertEqual(ali.length, 34)        
 61 |         self.assertEqual(ali.gaps, 21)
 62 |         self.assertEqual(ali.identicals, 13)
 63 |         self.assertEqual(ali.identity, 13 / 34.0 )
 64 |         self.assertEqual(ali.score, 13)
 65 |         
 66 |     def testEmptyAlignment(self):
 67 |         
 68 |         seq1 = RichSequence('s1', '', 'AAAA', SequenceTypes.Protein)
 69 |         seq2 = RichSequence('s2', '', 'BBBB', SequenceTypes.Protein)
 70 |         
 71 |         ali = self.algorithm.align(seq1, seq2)
 72 |         self.assertTrue(ali.is_empty)        
 73 | 
 74 | @test.unit
 75 | class TestLocalAlignmentAlgorithm(test.Case):
 76 |     
 77 |     def setUp(self):
 78 |         
 79 |         super(TestLocalAlignmentAlgorithm, self).setUp()
 80 |         
 81 |         self.seq1 = RichSequence('s1', '', 'CCABBBCBBCABAABCCEAAAAAAAAAAAAFAA', SequenceTypes.Protein)
 82 |         self.seq2 = RichSequence('s1', '', 'AZCBBABAACBCCEF', SequenceTypes.Protein)
 83 |         self.algorithm = LocalAlignmentAlgorithm(scoring=IdentityMatrix(1, -1), gap=-1)  
 84 |         
 85 |     def testAlign(self):
 86 | 
 87 |         ali = self.algorithm.align(self.seq1, self.seq2)
 88 |                 
 89 |         self.assertEqual(ali.query.sequence,   "CBBCABAA-BCCE")
 90 |         self.assertEqual(ali.subject.sequence, "CBB-ABAACBCCE")         
 91 |         
 92 |         self.assertEqual(ali.qstart, 7)
 93 |         self.assertEqual(ali.qend, 18)
 94 |         self.assertEqual(ali.start, 3)
 95 |         self.assertEqual(ali.end, 14)
 96 | 
 97 |         self.assertEqual(ali.length, 13)        
 98 |         self.assertEqual(ali.gaps, 2)
 99 |         self.assertEqual(ali.identicals, 11)
100 |         self.assertEqual(ali.identity, 11 / 13.0 )
101 |         self.assertEqual(ali.score, 9)
102 |         
103 |     def testEmptyAlignment(self):
104 |         
105 |         seq1 = RichSequence('s1', '', 'AAAA', SequenceTypes.Protein)
106 |         seq2 = RichSequence('s2', '', 'BBBB', SequenceTypes.Protein)
107 |         
108 |         ali = self.algorithm.align(seq1, seq2)
109 |         self.assertTrue(ali.is_empty)
110 |         
111 | 
112 | @test.unit
113 | class TestAlignmentResult(test.Case):
114 |     
115 |     def setUp(self):
116 |         
117 |         super(TestAlignmentResult, self).setUp()        
118 |         
119 |         self.seq1 = RichSequence('s1', '', 'AB-D', SequenceTypes.Protein)
120 |         self.seq2 = RichSequence('s2', '', 'A-CD', SequenceTypes.Protein)        
121 |         self.ali = AlignmentResult(5.5, self.seq1, self.seq2, 10, 12, 20, 22)
122 |         
123 |         self.es = RichSequence('s1', '', '')
124 |         self.empty = AlignmentResult(0, self.es, self.es, 0, 0, 0, 0)
125 |         
126 |     def testConstructor(self):
127 |         
128 |         self.assertRaises(ValueError, AlignmentResult, 1, self.es, self.es, 0, 0, 0, 0)
129 |         self.assertRaises(ValueError, AlignmentResult, 0, self.es, self.es, 1, 0, 0, 0)
130 |         self.assertRaises(ValueError, AlignmentResult, 0, self.es, self.es, 0, 1, 0, 0)
131 |         self.assertRaises(ValueError, AlignmentResult, 0, self.es, self.es, 0, 0, 1, 0)
132 |         self.assertRaises(ValueError, AlignmentResult, 0, self.es, self.es, 0, 0, 0, 1)
133 |         
134 |         self.assertRaises(ValueError, AlignmentResult, 1, self.seq1, self.seq2, 0, 0, 0, 0)
135 |         
136 |     def testStr(self):
137 |         
138 |         string = r"""
139 |    10 AB-D 12   
140 |    20 A-CD 22   """.strip("\r\n")
141 |         self.assertEqual(string, str(self.ali))
142 |         
143 |     def testAlignment(self):
144 |         
145 |         ali = self.ali.alignment()
146 |         self.assertEqual(ali.rows[1].sequence, self.seq1.sequence)
147 |         self.assertEqual(ali.rows[2].sequence, self.seq2.sequence)
148 |         
149 |     def testQuery(self):
150 |         self.assertEqual(self.ali.query.sequence, self.seq1.sequence)
151 |         self.assertEqual(self.ali.query.residues[2], self.seq1.residues[2])
152 |         self.assertTrue(self.ali.query.residues[2] is self.seq1.residues[2]) 
153 |         
154 |     def testSubject(self):
155 |         self.assertEqual(self.ali.subject.sequence, self.seq2.sequence)
156 |         self.assertEqual(self.ali.subject.residues[3], self.seq2.residues[3])
157 |         self.assertTrue(self.ali.subject.residues[3] is self.seq2.residues[3])         
158 |         
159 |     def testQstart(self):
160 |         self.assertEqual(self.ali.qstart, 10)
161 | 
162 |     def testQend(self):
163 |         self.assertEqual(self.ali.qend, 12)
164 |         
165 |     def testStart(self):       
166 |         self.assertEqual(self.ali.start, 20)
167 |                 
168 |     def testEnd(self):
169 |         self.assertEqual(self.ali.end, 22)
170 |         
171 |     def testLength(self):
172 |         self.assertEqual(self.ali.length, 4)
173 | 
174 |     def testScore(self):
175 |         self.assertEqual(self.ali.score, 5.5)
176 |         
177 |     def testGaps(self):
178 |         self.assertEqual(self.ali.gaps, 2)
179 |                                 
180 |     def testIdenticals(self):
181 |         self.assertEqual(self.ali.identicals, 2)
182 |         
183 |     def testIdentity(self):
184 |         self.assertEqual(self.ali.identity, 0.5) 
185 |         
186 |     def testIsEmpty(self):
187 |         self.assertFalse(self.ali.is_empty)
188 |         
189 |         es = RichSequence('s1', '', '')
190 |         empty = AlignmentResult(0, es, es, 0, 0, 0, 0)
191 |         self.assertTrue(empty.is_empty)
192 |                 
193 | 
194 | if __name__ == '__main__':
195 | 
196 |     test.Console()
197 | 


--------------------------------------------------------------------------------
/csb/test/cases/bio/utils/__init__.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import sys
  3 | import numpy
  4 | import multiprocessing
  5 | 
  6 | import csb.test as test
  7 | import csb.bio.utils as cbu
  8 | import csb.io
  9 | 
 10 | 
 11 | X1 = numpy.array([
 12 |     [ 0.,  0.,  0.],
 13 |     [ 1.,  0.,  0.],
 14 |     [ 0.,  1.,  0.]])
 15 | 
 16 | X2 = numpy.array([
 17 |     [ 0.,  0.,  0.],
 18 |     [ 1.,  2.,  0.],
 19 |     [-2., -1.,  0.]])
 20 | 
 21 | X3 = numpy.array([
 22 |     [ 0.,  0.,  0.],
 23 |     [ 2., -1.,  0.],
 24 |     [-1.,  2.,  0.]])
 25 | 
 26 | RZ = numpy.array([
 27 |     [ 0.,  1.,  0.],
 28 |     [-1.,  0.,  0.],
 29 |     [ 0.,  0.,  1.]])
 30 | 
 31 | X4 = numpy.array([
 32 |     [ 0.,  0.,  0.],
 33 |     [ 1.,  0.,  0.],
 34 |     [ 0.,  1.,  0.],
 35 |     [ 1.,  1.,  0.]])
 36 | 
 37 | X5 = numpy.array([
 38 |     [   0.,    0.,    0.],
 39 |     [ 100.,    0.,    0.],
 40 |     [   0.,  100.,    0.],
 41 |     [  50.,   50.,    0.]])
 42 | 
 43 | X6 = numpy.array([
 44 |     [   0.,    0.,    0.],
 45 |     [ 100.,    0.,    0.],
 46 |     [   0.,  100.,    0.],
 47 |     [  60.,   60.,    0.]])
 48 | 
 49 | X7 = numpy.array([
 50 |     [   0.,    0.,    0.],
 51 |     [ 100.,    0.,    0.],
 52 |     [   0.,  100.,    0.],
 53 |     [   0.,    0.,  100.]])
 54 | 
 55 |             
 56 | @test.regression
 57 | class Regressions(test.Case):
 58 | 
 59 |     def _timeoutTest(self):
 60 |         cbu.tm_superimpose([[1, 1, 1]], [[1, 1, 1]])
 61 |         
 62 |     def _multiprocessingTest(self):        
 63 |         return True
 64 |     
 65 |     def _runProcess(self, target, timeout=1.0):
 66 |                 
 67 |         p = multiprocessing.Process(target=target)
 68 |         p.start()
 69 |         p.join(timeout=timeout)
 70 |         
 71 |         return p
 72 |     
 73 |     @test.skip("n/a on this platform", sys.platform.startswith('win'))        
 74 |     def testTMSuperimpose(self):
 75 |         """
 76 |         @see: [CSB 0000058]
 77 |         """
 78 |         try:
 79 |             self._runProcess(target=self._multiprocessingTest)
 80 |         except:
 81 |             self.skipTest("may produce a false positive")
 82 |                 
 83 |         p = self._runProcess(target=self._timeoutTest, timeout=5.0)
 84 |         
 85 |         if p.is_alive():
 86 |             p.terminate()
 87 |             self.fail('timeout expired')
 88 |             
 89 |     def testRmsdMirrorImage(self):
 90 |         X, Y = X7, X7.copy()
 91 |         Y[:, 0] *= -1
 92 |         rmsd = cbu.rmsd(X, Y)
 93 | 
 94 |         self.assertAlmostEqual(rmsd, 50.0)
 95 | 
 96 | 
 97 | @test.functional
 98 | class TestUtils(test.Case):
 99 | 
100 |     def assertArrayEqual(self, first, second, eps=1e-7):
101 |         diff = numpy.asarray(first) - numpy.asarray(second)
102 |         self.assertTrue((abs(diff) < eps).all())
103 | 
104 |     def testFit(self):
105 |         R, t = cbu.fit(X1, X2)
106 |         Y = numpy.dot(X2, R.T) + t
107 | 
108 |         self.assertArrayEqual(R, RZ)
109 |         self.assertArrayEqual(t, [0., 0., 0.])
110 |         self.assertArrayEqual(Y, X3)
111 | 
112 |     def testWFit(self):
113 |         w = numpy.array([1., 1., 0.])
114 |         R, t = cbu.wfit(X1, X2, w)                              #@UnusedVariable
115 | 
116 |         d = 5.0**0.5
117 |         self.assertArrayEqual(t, [-d / 2.0 + 0.5, 0., 0.])
118 | 
119 |     def testScaleAndFit(self):
120 |         R, t, s = cbu.scale_and_fit(2.0 * X1, X1)
121 | 
122 |         self.assertArrayEqual(R, numpy.identity(3))
123 |         self.assertArrayEqual(t, [0., 0., 0.])
124 |         self.assertAlmostEqual(s, 2.0)
125 | 
126 |     def testFitWellordered(self):
127 |         R, t = cbu.fit_wellordered(X5, X6, 10, 1.0)             #@UnusedVariable
128 | 
129 |         self.assertArrayEqual(t, [0., 0., 0.])
130 | 
131 |     def testRmsd(self):
132 |         rmsd = cbu.rmsd(X1, X2)
133 | 
134 |         self.assertAlmostEqual(rmsd, (4./3.)**0.5)
135 | 
136 |     def testWrmsd(self):
137 |         w = numpy.array([1., 1., 0.])
138 |         rmsd = cbu.wrmsd(X1, X2, w)
139 | 
140 |         d = 5.0**0.5
141 |         self.assertAlmostEqual(rmsd, d / 2.0 - 0.5)
142 | 
143 |     def testTorsionRmsd(self):
144 |         rmsd = cbu.torsion_rmsd(X1[:,:2], X1[:,:2])
145 | 
146 |         self.assertAlmostEqual(rmsd, 0.0)
147 | 
148 |     def testTmScore(self):
149 |         score = cbu.tm_score(X1, X3)
150 | 
151 |         self.assertAlmostEqual(score, 0.4074, 4)
152 | 
153 |     def testTmSuperimpose(self):
154 |         R, t, score = cbu.tm_superimpose(X1, X2)            #@UnusedVariable
155 | 
156 |         self.assertAlmostEqual(score, 0.4074, 4)
157 | 
158 |     def testCenterOfMass(self):
159 |         com = cbu.center_of_mass(X4)
160 | 
161 |         self.assertArrayEqual(com, [0.5, 0.5, 0.0])
162 | 
163 |     def testRadiusOfGyration(self):
164 |         gyradius = cbu.radius_of_gyration(X4)
165 | 
166 |         s2 = 2.0**0.5
167 |         self.assertArrayEqual(gyradius, s2 / 2.0)
168 | 
169 |     def testSecondMoments(self):
170 |         sm = cbu.second_moments(X1)
171 | 
172 |         # TODO: correct?
173 |         sm_test = numpy.array([
174 |             [ 2./3., -1./3., 0.    ],
175 |             [-1./3.,  2./3., 0.    ],
176 |             [ 0.,     0.,    0.    ]])
177 |         self.assertArrayEqual(sm, sm_test)
178 | 
179 |     def testInertiaTensor(self):
180 |         it = cbu.inertia_tensor(X1)
181 | 
182 |         # TODO: correct?
183 |         it_test = numpy.array([
184 |             [ 2./3.,  1./3., 0.    ],
185 |             [ 1./3.,  2./3., 0.    ],
186 |             [ 0.,     0.,    4./3. ]])
187 |         self.assertArrayEqual(it, it_test)
188 | 
189 |     def testFindPairs(self):
190 |         pairs11 = list(cbu.find_pairs(1.2, X1))
191 |         pairs12 = list(cbu.find_pairs(1.2, X1, X2))
192 | 
193 |         self.assertEqual(len(pairs11), 2)
194 |         self.assertEqual(len(pairs12), 3)
195 | 
196 |     def testDistanceMatrix(self):
197 |         d = cbu.distance_matrix(X1)
198 | 
199 |         s2 = 2.0**0.5
200 |         d_test = [
201 |             [ 0., 1., 1. ],
202 |             [ 1., 0., s2 ],
203 |             [ 1., s2, 0. ]]
204 |         self.assertArrayEqual(d, d_test)
205 | 
206 |     def testDistance(self):
207 |         d = cbu.distance(X1, X2)
208 | 
209 |         self.assertEqual(d.shape, (len(X1),))
210 |         self.assertArrayEqual(d[:2], [0., 2.])
211 | 
212 |     def testRmsdCur(self):
213 |         rmsd = cbu.rmsd_cur(X1, X2)
214 | 
215 |         self.assertAlmostEqual(rmsd, 2.0)
216 |         
217 | 
218 |     def testRad(self):
219 |         
220 |         converted = cbu.rad([[0, 90, 180]])[0]
221 |          
222 |         self.assertEqual(converted[0], 0)
223 |         self.assertAlmostEqual(converted[1], numpy.pi / 2.0)
224 |         self.assertAlmostEqual(converted[2], numpy.pi)
225 | 
226 |     def testDeg(self):
227 |         
228 |         converted = cbu.deg([[0, numpy.pi / 2.0, numpy.pi]])[0]
229 |         
230 |         self.assertEqual(converted[0], 0)
231 |         self.assertAlmostEqual(converted[1], 90)
232 |         self.assertAlmostEqual(converted[2], 180)
233 |         
234 | 
235 | if __name__ == '__main__':
236 | 
237 |     test.Console()
238 | 
239 | 


--------------------------------------------------------------------------------
/csb/test/cases/numeric/integrators.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import csb.test as test
 3 | 
 4 | from math import cos
 5 | 
 6 | from csb.numeric.integrators import LeapFrog, FastLeapFrog, VelocityVerlet, AbstractGradient
 7 | from csb.statistics.samplers import State
 8 | 
 9 | 
10 | @test.functional
11 | class TestIntegrators(test.Case):
12 | 
13 |     def setUp(self):
14 |         
15 |         super(TestIntegrators, self).setUp()
16 |         
17 |         self.dt = 0.1
18 |         self.grad = self._createGradient(1.)
19 |         self.nsteps = 100
20 |         self.state = State(np.array([1.]), np.array([0.]))
21 |         
22 |     def _createGradient(self, sigma):
23 |         
24 |         class Grad(AbstractGradient):
25 |             def evaluate(self, q, t):
26 |                 return q / (sigma ** 2)
27 |             
28 |         return Grad()        
29 | 
30 |     def _run(self, algorithm):
31 |         
32 |         result = algorithm.integrate(self.state, self.nsteps).final.position
33 |         self.assertAlmostEqual(result, cos(self.nsteps * self.dt), delta=0.1)
34 |         
35 |     def testLeapFrog(self):
36 |         
37 |         algorithm = LeapFrog(self.dt, self.grad)
38 |         self._run(algorithm)
39 | 
40 |     def testFastLeapFrog(self):
41 | 
42 |         algorithm = FastLeapFrog(self.dt, self.grad)
43 |         self._run(algorithm)
44 | 
45 |     def testVelocityVerlet(self):
46 |         
47 |         algorithm = VelocityVerlet(self.dt, self.grad)
48 |         self._run(algorithm)
49 | 
50 | @test.regression
51 | class ReferenceRegressions(test.Case):
52 |     """
53 |     @see: [0000108]
54 |     """    
55 | 
56 |     def setUp(self):
57 |         
58 |         super(ReferenceRegressions, self).setUp()
59 |         
60 |         self.dt = 0.1
61 |         self.grad = self._createGradient(1.)
62 |         self.nsteps = 100
63 |         self.state = State(np.array([1.]), np.array([0.]))
64 |         
65 |     def _createGradient(self, sigma):
66 |         
67 |         class Grad(AbstractGradient):
68 |             def evaluate(self, q, t):
69 |                 return q / (sigma ** 2)
70 |             
71 |         return Grad()        
72 | 
73 |     def _run(self, algorithm):
74 |         
75 |         result = algorithm.integrate(self.state, self.nsteps, return_trajectory=True)
76 |         self.assertFalse(result[0].position[0] == result[10].position[0])
77 |         self.assertFalse(result[10].position[0] == result[20].position[0])
78 |         self.assertFalse(result[0].position == result.final.position)
79 |         
80 |     def testLeapFrog(self):
81 |         
82 |         algorithm = LeapFrog(self.dt, self.grad)
83 |         self._run(algorithm)
84 | 
85 |     def testVelocityVerlet(self):
86 |         
87 |         algorithm = VelocityVerlet(self.dt, self.grad)
88 |         self._run(algorithm)
89 |         
90 | 
91 | if __name__ == '__main__':
92 | 
93 |     test.Console()
94 | 


--------------------------------------------------------------------------------
/csb/test/cases/statistics/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy
 3 | import numpy.random
 4 | 
 5 | import csb.test as test
 6 | 
 7 | from csb.statistics import Cumulative
 8 | from csb.statistics import kurtosis, skewness, autocorrelation
 9 | 
10 | 
11 | @test.functional
12 | class TestStatFunction(test.Case):
13 | 
14 | 
15 |     def testCumulative(self):
16 |         from scipy.stats import norm
17 |         
18 |         x = numpy.linspace(-5., 5., 200)
19 |         samples = numpy.random.normal(size=100000)
20 |         cumula = Cumulative(samples)
21 |         c = cumula(x)
22 |         
23 |         cx = norm.cdf(x)
24 |         for i in range(199):
25 |             self.assertAlmostEqual(cx[i], c[i], delta=1e-2)
26 |         
27 | 
28 |     def testKurtosis(self):
29 |         samples = numpy.random.normal(size=100000)
30 |         self.assertAlmostEqual(kurtosis(samples), 0., delta=1e-1)
31 | 
32 |         samples = numpy.random.uniform(-2., 2., size=100000)
33 |         self.assertAlmostEqual(kurtosis(samples), -1.2, delta=1e-1)
34 | 
35 | 
36 |     def testSkewness(self):
37 |         samples = numpy.random.gamma(2., 0.5, size=100000)
38 |         self.assertAlmostEqual(skewness(samples), 2. / numpy.sqrt(2.), delta=1e-1)
39 | 
40 |     def testAutorcorrelation(self):
41 |         x = numpy.random.normal(size=1000) + numpy.sin(numpy.linspace(0., 2 * numpy.pi, 1000))
42 |         n = 10
43 |         ac = autocorrelation(x, n)
44 |         self.assertAlmostEqual(ac[0], 1., delta=1e-1)
45 |         
46 |     def testEntropy(self):
47 |         pass
48 | 
49 |     def testCircvar(self):
50 |         pass
51 | 
52 |     def testCircmean(self):
53 |         pass
54 | 
55 |     
56 |         
57 | 
58 | 


--------------------------------------------------------------------------------
/csb/test/cases/statistics/ars.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import csb.test as test
 3 | 
 4 | from csb.statistics.ars import ARS, Gauss
 5 | 
 6 | 
 7 | @test.functional
 8 | class TestARS(test.Case):
 9 | 
10 |     def testNormal(self):
11 |         mu = 5.
12 |         sigma = 1.
13 |         ars = ARS(Gauss(mu, sigma))
14 |         ars.initialize([mu - 1., mu + 1.1], z0=-10., zmax=30)
15 |         samples = numpy.array([ars.sample() for i in range(10000)])
16 | 
17 |         self.assertAlmostEqual(mu, numpy.mean(samples), delta=0.5)
18 |         self.assertAlmostEqual(sigma, numpy.std(samples), delta=0.5)
19 |     
20 |            
21 | if __name__ == '__main__':
22 |     
23 |     test.Console()
24 | 


--------------------------------------------------------------------------------
/csb/test/cases/statistics/maxent.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | import csb.test as test
 4 | import csb.io
 5 | 
 6 | from scipy.optimize import fmin_powell
 7 | 
 8 | from csb.numeric import log_sum_exp
 9 | from csb.statistics.maxent import MaxentModel, MaxentPosterior
10 | 
11 | @test.functional
12 | class TestMaxent(test.Case):
13 | 
14 |     def setUp(self):
15 |         super(TestMaxent, self).setUp()
16 |         self.data_fn = self.config.getTestFile('maxent.pickle')
17 |         
18 |     @test.skip("slow")
19 |     def testMaxent(self):
20 |         k = 2
21 |         data = csb.io.load(self.data_fn)
22 |         model = MaxentModel(k)
23 |         model.sample_weights()
24 |         posterior = MaxentPosterior(model, data[:100000] / 180. * numpy.pi)
25 | 
26 |         model.get() * 1.
27 | 
28 |         x0 = posterior.model.get().flatten()
29 |         target = lambda w:-posterior(w, n=50)
30 |         x = fmin_powell(target, x0, disp=False)
31 | 
32 |         self.assertTrue(x != None)
33 |         self.assertTrue(len(x) == k * k * 4)
34 | 
35 |         posterior.model.set(x)
36 |         posterior.model.normalize(True)
37 | 
38 |         xx = numpy.linspace(0 , 2 * numpy.pi, 500)
39 |         fx = posterior.model.log_prob(xx, xx)
40 | 
41 |         self.assertAlmostEqual(posterior.model.log_z(integration='simpson'),
42 |                                posterior.model.log_z(integration='trapezoidal'),
43 |                                places=2)
44 |         
45 |         self.assertTrue(fx != None)
46 |         z = numpy.exp(log_sum_exp(numpy.ravel(fx))) 
47 |         self.assertAlmostEqual(z * xx[1] ** 2, 1., places=1)
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     
52 |     test.Console()
53 |         
54 | 


--------------------------------------------------------------------------------
/csb/test/cases/statistics/mixtures.py:
--------------------------------------------------------------------------------
 1 | from numpy import array, linspace
 2 | 
 3 | from csb import test
 4 | from csb.bio.io.wwpdb import LegacyStructureParser
 5 | from csb.statistics import mixtures
 6 | 
 7 | 
 8 | @test.functional
 9 | class TestMixtures(test.Case):
10 | 
11 |     w_ref_segments = array([
12 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
13 |         2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14 |         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
15 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
16 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1,
17 |         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18 |         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
19 |         1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
20 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
21 |         2, 2, 2, 2])
22 | 
23 |     w_ref_conformers = array([2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1])
24 | 
25 |     def _ake_ensemble_coords(self):
26 | 
27 |         pdbfile = self.config.getTestFile('ake-xray-ensemble-ca.pdb')
28 |         ensemble = LegacyStructureParser(pdbfile).parse_models()
29 |         X = array([model.get_coordinates(['CA'], True) for model in ensemble])
30 | 
31 |         self.assertEqual(X.shape, (16, 211, 3))
32 | 
33 |         self._ake_ensemble_coords = lambda: X
34 | 
35 |         return X
36 |     
37 |     def testSegmentMixture(self):
38 | 
39 |         self._testMixture(mixtures.SegmentMixture, self.w_ref_segments)
40 | 
41 |     def testConformerMixture(self):
42 | 
43 |         self._testMixture(mixtures.ConformerMixture, self.w_ref_conformers, 14./16.)
44 | 
45 |     def _testMixture(self, cls, w_ref, min_overlap=0.9, repeats=5):
46 | 
47 |         X = self._ake_ensemble_coords()
48 |         K = len(set(w_ref))
49 | 
50 |         # non-randomized heuristic with BIC
51 |         m = cls.new(X)
52 |         overlap = m.overlap(w_ref)
53 | 
54 |         self.assertTrue(overlap >= min_overlap, 'mixture not reproduced with heuristic')
55 | 
56 |         # annealing (randomized initialization)
57 |         m = cls(X, K, False)
58 |         for _ in range(repeats):
59 |             m.randomize_scales()
60 |             m.anneal(linspace(2.0, 0.1, 10))
61 | 
62 |             overlap = m.overlap(w_ref)
63 |             if overlap >= min_overlap:
64 |                 break
65 |         else:
66 |             self.assertTrue(False, 'mixture not reproduced with annealing')
67 | 
68 | 
69 | if __name__ == '__main__':
70 | 
71 |     test.Console()
72 | 
73 | # vi:expandtab:smarttab
74 | 


--------------------------------------------------------------------------------
/csb/test/cases/statistics/rand.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import warnings
  3 | 
  4 | import csb.test as test
  5 | 
  6 | from csb.numeric import exp, log_sum_exp, log
  7 | from csb.statistics.rand import truncated_gamma, truncated_normal, sample_from_histogram
  8 | from csb.statistics.pdf import Normal
  9 | from csb.statistics import density
 10 | 
 11 | 
 12 | @test.functional
 13 | class TestRand(test.Case):
 14 | 
 15 |     def testTruncatedGamma(self):
 16 |         alpha = 2.
 17 |         beta = 1.
 18 |         x_min = 0.1
 19 |         x_max = 5.
 20 | 
 21 |         x = truncated_gamma(10000, alpha, beta, x_min, x_max)
 22 | 
 23 |         self.assertTrue((x <= x_max).all())
 24 |         self.assertTrue((x >= x_min).all())
 25 | 
 26 |         hy, hx = density(x, 100)
 27 |         hx = 0.5 * (hx[1:] + hx[:-1])
 28 |         hy = hy.astype('d')
 29 | 
 30 |         with warnings.catch_warnings(record=True) as warning:
 31 |             warnings.simplefilter("always")            
 32 |             
 33 |             hy /= (hx[1] - hx[0]) * hy.sum()
 34 |             
 35 |             self.assertLessEqual(len(warning), 1)
 36 |             
 37 |             if len(warning) == 1:
 38 |                 warning = warning[0]
 39 |                 self.assertEqual(warning.category, RuntimeWarning)
 40 |                 self.assertTrue(str(warning.message).startswith('divide by zero encountered'))            
 41 | 
 42 |         x = numpy.linspace(x_min, x_max, 1000)
 43 |         p = (alpha - 1) * log(x) - beta * x
 44 |         p -= log_sum_exp(p)
 45 |         p = exp(p) / (x[1] - x[0])
 46 | 
 47 |     def testTruncatedNormal(self):
 48 | 
 49 |         mu = 2.
 50 |         sigma = 1.
 51 |         x_min = -1.
 52 |         x_max = 5.
 53 | 
 54 |         x = truncated_normal(10000, mu, sigma, x_min, x_max)
 55 | 
 56 |         self.assertAlmostEqual(numpy.mean(x), mu, delta=1e-1)
 57 |         self.assertAlmostEqual(numpy.var(x), sigma, delta=1e-1)
 58 | 
 59 |         self.assertTrue((x <= x_max).all())
 60 |         self.assertTrue((x >= x_min).all())
 61 | 
 62 |         hy, hx = density(x, 100)
 63 |         hx = 0.5 * (hx[1:] + hx[:-1])
 64 |         hy = hy.astype('d')
 65 |         
 66 |         with warnings.catch_warnings(record=True) as warning:
 67 |             warnings.simplefilter("always")        
 68 |             
 69 |             hy /= (hx[1] - hx[0]) * hy.sum()
 70 |             
 71 |             self.assertLessEqual(len(warning), 1)
 72 |             
 73 |             if len(warning) == 1:
 74 |                 warning = warning[0]
 75 |                 self.assertEqual(warning.category, RuntimeWarning)
 76 |                 self.assertTrue(str(warning.message).startswith('divide by zero encountered'))
 77 |             
 78 |         x = numpy.linspace(mu - 5 * sigma, mu + 5 * sigma, 1000)
 79 | 
 80 |         p = -0.5 * (x - mu) ** 2 / sigma ** 2
 81 |         p -= log_sum_exp(p)
 82 |         p = exp(p) / (x[1] - x[0])
 83 |     
 84 |     
 85 | 
 86 |     def testSampleFromHistogram(self):
 87 |         mu = 5.
 88 |         sigma = 1.
 89 | 
 90 |         normal = Normal(mu, sigma)
 91 | 
 92 |         x = normal.random(10000)
 93 |         hx, p = density(x, 100)
 94 | 
 95 |         samples = hx[sample_from_histogram(p, n_samples=10000)]
 96 | 
 97 |         self.assertAlmostEqual(mu, numpy.mean(samples), delta=0.5)
 98 |         self.assertAlmostEqual(sigma, numpy.std(samples), delta=0.5)
 99 | 
100 | 
101 | if __name__ == '__main__':
102 | 
103 |     test.Console()
104 | 


--------------------------------------------------------------------------------
/csb/test/cases/statmech/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csb-toolbox/CSB/1a858c9a8bbb5e528b06dc0ffb67cf151489413b/csb/test/cases/statmech/__init__.py


--------------------------------------------------------------------------------
/csb/test/cases/statmech/ensembles.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | import csb.numeric
 4 | import csb.test as test
 5 | 
 6 | from csb.statmech.ensembles import BoltzmannEnsemble, TsallisEnsemble, CompositeEnsemble
 7 | 
 8 | 
 9 | @test.functional
10 | class TestEnergy(test.Case):
11 | 
12 |     def testBoltzmann(self):
13 |         e = numpy.linspace(-50, 1000, 1000)
14 | 
15 |         be = BoltzmannEnsemble(beta=1,)
16 |         te = be.energy(e)
17 |         
18 |         for i in range(len(e)):
19 |             self.assertEqual(e[i], te[i])
20 | 
21 |         be = BoltzmannEnsemble(beta=0.001,)
22 |         te = be.energy(e)
23 |         
24 |         for i in range(len(e)):
25 |             self.assertEqual(e[i] * 0.001, te[i])
26 | 
27 |     def testTsallis(self):
28 |         e = numpy.linspace(-50, 1000, 1000)
29 | 
30 |         tsallis = TsallisEnsemble(q=1.,)
31 |         te = tsallis.energy(e)
32 |         
33 |         for i in range(len(e)):
34 |             self.assertEqual(e[i], te[i])
35 | 
36 |         tsallis = TsallisEnsemble(q=1.1, e_min= -50.)
37 |         te = tsallis.energy(e)
38 |         q = 1.1
39 |         ee = q / (q - 1.) * csb.numeric.log(1 + (q - 1) * (e + 50.)) - 50
40 |         
41 |         for i in range(len(e)):
42 |             self.assertAlmostEqual(ee[i], te[i], delta=1e-5)
43 | 
44 | 
45 |     def testComposite(self):
46 |         e1 = numpy.linspace(-50, 1000, 1000)
47 |         e2 = numpy.linspace(-30, 3000, 1000)
48 | 
49 |         q = 1.1
50 |         beta = 0.1
51 |         ee = q / (q - 1.) * csb.numeric.log(1 + (q - 1) * (e1 + 50.)) - 50
52 |         ee += e2 * beta
53 | 
54 |         ce = CompositeEnsemble([TsallisEnsemble(q=q, e_min= -50.),
55 |                                 BoltzmannEnsemble(beta=beta,)])
56 | 
57 |         cee = ce.energy([e1, e2])
58 |         
59 |         for i in range(len(e1)):
60 |             self.assertAlmostEqual(ee[i], cee[i], delta=1e-5)
61 | 
62 |                 
63 | if __name__ == '__main__':
64 |     
65 |     test.Console()
66 | 
67 | 


--------------------------------------------------------------------------------
/csb/test/cases/statmech/wham.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import csb.test as test
 3 | 
 4 | from csb.statmech.ensembles import BoltzmannEnsemble
 5 | from csb.statmech.wham import WHAM, NonparametricWHAM
 6 | 
 7 | 
 8 | class FunnyGaussian(object):
 9 | 
10 |     def __init__(self, d, k=100.):
11 | 
12 |         self.d = int(d)
13 |         self.k = float(k)
14 | 
15 |     def sample(self, n_samples, inv_T=1):
16 | 
17 |         from numpy.random import standard_normal
18 |         from numpy import sqrt, sum
19 |         from csb.statistics.rand import truncated_gamma
20 |     
21 |         x = standard_normal((self.d, n_samples))
22 |         x /= sqrt(sum(x ** 2, 0))
23 |     
24 |         r = truncated_gamma(n_samples, 0.5 * self.d, self.k * inv_T, 0., 0.5)
25 |         r = (2 * r) ** 0.5
26 |     
27 |         return (x * r).T
28 | 
29 |     def energy(self, x):
30 | 
31 |         x = numpy.array(x)
32 |         return 0.5 * self.k * numpy.sum(x ** 2, -1)
33 | 
34 |     def log_Z(self, beta=1.):
35 | 
36 |         from csb.numeric import log
37 |         from scipy.special import gammainc, gammaln
38 | 
39 |         return log(0.5 * self.d) + log(gammainc(0.5 * self.d, 0.5 * self.k)) + \
40 |                gammaln(0.5 * self.d) + (0.5 * self.d) * (log(2) - log(self.k))
41 | 
42 |     def log_g(self, energies):
43 |         
44 |         from csb.numeric import log
45 |         return (0.5 * self.d - 1) * log(2 * energies / self.k) + log(self.d / self.k)
46 | 
47 |  
48 | @test.functional
49 | class TestWHAM(test.Case):
50 | 
51 |     def setUp(self):
52 |         self.betas = numpy.linspace(1e-5, 1., 10)
53 |         self.n = n = 1000
54 |         
55 |         gaussian = FunnyGaussian(10, 100.)
56 |         
57 |         self.samples = []
58 |         self.raw_energies = []
59 |         
60 |         
61 |         for beta in self.betas:
62 |             self.samples.append(gaussian.sample(n, beta))
63 |             self.raw_energies.append(gaussian.energy(self.samples[-1]))
64 |         
65 |         self.raw_energies = numpy.array(self.raw_energies)
66 |         self.ensembles = [BoltzmannEnsemble(beta=beta) for beta in self.betas]
67 |         
68 |         self.log_z = gaussian.log_Z()
69 |         self.log_g = gaussian.log_g(numpy.ravel(self.raw_energies))
70 |         
71 |     def testWHAM(self):
72 | 
73 |         w = WHAM(self.ensembles,
74 |                  numpy.ravel(self.raw_energies),
75 |                  numpy.array([self.n] * len(self.betas)))
76 |         w.estimate()
77 |         
78 |         self.assertAlmostEqual(numpy.dot(numpy.array([1, -1]), 
79 |                                          w.log_z(numpy.array([1., 0.]))),
80 |                                self.log_z, delta=0.5)
81 | 
82 |     def testNonparametricWHAM(self):
83 |         
84 |         w = NonparametricWHAM(self.ensembles,
85 |                          numpy.ravel(self.raw_energies),
86 |                          [self.n] * len(self.betas))
87 |         w.estimate()
88 |         ens = [BoltzmannEnsemble(beta=1.,),
89 |                BoltzmannEnsemble(beta=0.)]
90 |         self.assertAlmostEqual(numpy.dot(numpy.array([1, -1]),
91 |                                          w.log_z(ensembles=ens)),
92 |                                self.log_z, delta=0.5)
93 | 
94 | 
95 | if __name__ == '__main__':
96 | 
97 |     test.Console()
98 | 
99 | 


--------------------------------------------------------------------------------
/csb/test/data/1C3W_10.mrc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csb-toolbox/CSB/1a858c9a8bbb5e528b06dc0ffb67cf151489413b/csb/test/data/1C3W_10.mrc


--------------------------------------------------------------------------------
/csb/test/data/1nz9A.frags:
--------------------------------------------------------------------------------
  1 |  position:            1 neighbors:            1
  2 | 
  3 |  1nz9 A     1 A L    0.000  112.130  179.977    0.000
  4 |  1nz9 A     2 Q L -134.597  155.738  179.916    0.000
  5 |  1nz9 A     3 V L -143.771  148.819 -179.986    0.000
  6 |  1nz9 A     4 A L  -86.179  -44.448 -179.977    0.000
  7 |  1nz9 A     5 F L -151.432  157.481  179.946    0.000
  8 |  1nz9 A     6 R L -150.513  134.103  179.981    0.000
  9 |  1nz9 A     7 E L  -58.233  147.798 -179.882    0.000
 10 |  1nz9 A     8 G L   79.541    6.522 -179.989    0.000
 11 |  1nz9 A     9 D L  -86.869  174.534  179.959    0.000
 12 |  1nz9 A    10 Q L -113.960  132.607 -179.950    0.000
 13 | 
 14 |  position:            6 neighbors:            1
 15 | 
 16 |  1nz9 A     6 R L -150.513  134.103  179.981    0.000
 17 |  1nz9 A     7 E L  -58.233  147.798 -179.882    0.000
 18 |  1nz9 A     8 G L   79.541    6.522 -179.989    0.000
 19 |  1nz9 A     9 D L  -86.869  174.534  179.959    0.000
 20 |  1nz9 A    10 Q L -113.960  132.607 -179.950    0.000
 21 |  1nz9 A    11 V L -131.901  136.198  179.915    0.000
 22 |  1nz9 A    12 R L  -80.030  166.265 -179.985    0.000
 23 |  1nz9 A    13 V L  -79.855  167.337 -179.201    0.000
 24 |  1nz9 A    14 V L  -79.313  -60.298 -179.542    0.000
 25 |  1nz9 A    15 S L -160.425 -164.178 -179.951    0.000
 26 | 
 27 |  position:           11 neighbors:            1
 28 | 
 29 |  1nz9 A    11 V L -131.901  136.198  179.915    0.000
 30 |  1nz9 A    12 R L  -80.030  166.265 -179.985    0.000
 31 |  1nz9 A    13 V L  -79.855  167.337 -179.201    0.000
 32 |  1nz9 A    14 V L  -79.313  -60.298 -179.542    0.000
 33 |  1nz9 A    15 S L -160.425 -164.178 -179.951    0.000
 34 |  1nz9 A    16 G L   77.366 -168.013  179.593    0.000
 35 |  1nz9 A    17 P L  -60.374  -19.016  179.933    0.000
 36 |  1nz9 A    18 F L  -95.855   13.091 -179.361    0.000
 37 |  1nz9 A    19 A L  -79.136   39.692  179.336    0.000
 38 |  1nz9 A    20 D L -160.702   34.873 -179.986    0.000
 39 | 
 40 |  position:           16 neighbors:            1
 41 | 
 42 |  1nz9 A    16 G L   77.366 -168.013  179.593    0.000
 43 |  1nz9 A    17 P L  -60.374  -19.016  179.933    0.000
 44 |  1nz9 A    18 F L  -95.855   13.091 -179.361    0.000
 45 |  1nz9 A    19 A L  -79.136   39.692  179.336    0.000
 46 |  1nz9 A    20 D L -160.702   34.873 -179.986    0.000
 47 |  1nz9 A    21 F L -148.003  167.131  179.888    0.000
 48 |  1nz9 A    22 T L -127.158 -173.979 -179.963    0.000
 49 |  1nz9 A    23 G L -150.426  164.811 -179.999    0.000
 50 |  1nz9 A    24 T L -103.992  108.474 -179.996    0.000
 51 |  1nz9 A    25 V L  -57.418  113.884  179.985    0.000
 52 | 
 53 |  position:           21 neighbors:            1
 54 | 
 55 |  1nz9 A    21 F L -148.003  167.131  179.888    0.000
 56 |  1nz9 A    22 T L -127.158 -173.979 -179.963    0.000
 57 |  1nz9 A    23 G L -150.426  164.811 -179.999    0.000
 58 |  1nz9 A    24 T L -103.992  108.474 -179.996    0.000
 59 |  1nz9 A    25 V L  -57.418  113.884  179.985    0.000
 60 |  1nz9 A    26 T L  -98.876  -33.916  179.895    0.000
 61 |  1nz9 A    27 E L -153.483  158.479  179.975    0.000
 62 |  1nz9 A    28 I L -129.880  158.690 -179.997    0.000
 63 |  1nz9 A    29 N L -135.295   86.706  179.938    0.000
 64 |  1nz9 A    30 P L  -71.694  -34.126 -179.928    0.000
 65 | 
 66 |  position:           26 neighbors:            1
 67 | 
 68 |  1nz9 A    26 T L  -98.876  -33.916  179.895    0.000
 69 |  1nz9 A    27 E L -153.483  158.479  179.975    0.000
 70 |  1nz9 A    28 I L -129.880  158.690 -179.997    0.000
 71 |  1nz9 A    29 N L -135.295   86.706  179.938    0.000
 72 |  1nz9 A    30 P L  -71.694  -34.126 -179.928    0.000
 73 |  1nz9 A    31 E L  -58.984  -33.552 -179.959    0.000
 74 |  1nz9 A    32 R L -110.233  -61.226 -179.937    0.000
 75 |  1nz9 A    33 G L  127.040   42.732 -179.996    0.000
 76 |  1nz9 A    34 K L -115.588  170.344  179.961    0.000
 77 |  1nz9 A    35 V L -155.367  137.079  179.998    0.000
 78 | 
 79 |  position:           31 neighbors:            1
 80 | 
 81 |  1nz9 A    31 E L  -58.984  -33.552 -179.959    0.000
 82 |  1nz9 A    32 R L -110.233  -61.226 -179.937    0.000
 83 |  1nz9 A    33 G L  127.040   42.732 -179.996    0.000
 84 |  1nz9 A    34 K L -115.588  170.344  179.961    0.000
 85 |  1nz9 A    35 V L -155.367  137.079  179.998    0.000
 86 |  1nz9 A    36 K L -114.688  128.253 -179.986    0.000
 87 |  1nz9 A    37 V L -139.876  142.506  179.883    0.000
 88 |  1nz9 A    38 M L  -89.925  104.020 -179.791    0.000
 89 |  1nz9 A    39 V L  -91.762  171.108  179.934    0.000
 90 |  1nz9 A    40 T L -132.415  115.344 -179.976    0.000
 91 | 
 92 |  position:           36 neighbors:            1
 93 | 
 94 |  1nz9 A    36 K L -114.688  128.253 -179.986    0.000
 95 |  1nz9 A    37 V L -139.876  142.506  179.883    0.000
 96 |  1nz9 A    38 M L  -89.925  104.020 -179.791    0.000
 97 |  1nz9 A    39 V L  -91.762  171.108  179.934    0.000
 98 |  1nz9 A    40 T L -132.415  115.344 -179.976    0.000
 99 |  1nz9 A    41 I L -120.233  133.825  179.963    0.000
100 |  1nz9 A    42 F L   61.736   30.418  179.965    0.000
101 |  1nz9 A    43 G L   70.276   45.256 -179.931    0.000
102 |  1nz9 A    44 R L -159.979  138.860  179.979    0.000
103 |  1nz9 A    45 E L  -60.803  151.942 -179.994    0.000
104 | 
105 |  position:           41 neighbors:            1
106 | 
107 |  1nz9 A    41 I L -120.233  133.825  179.963    0.000
108 |  1nz9 A    42 F L   61.736   30.418  179.965    0.000
109 |  1nz9 A    43 G L   70.276   45.256 -179.931    0.000
110 |  1nz9 A    44 R L -159.979  138.860  179.979    0.000
111 |  1nz9 A    45 E L  -60.803  151.942 -179.994    0.000
112 |  1nz9 A    46 T L -160.015   91.165  179.949    0.000
113 |  1nz9 A    47 P L  -68.372  117.453 -179.998    0.000
114 |  1nz9 A    48 V L -132.835  154.984  179.741    0.000
115 |  1nz9 A    49 E L  -98.797  138.100 -179.796    0.000
116 |  1nz9 A    50 L L -144.335 -162.203  178.424    0.000
117 | 
118 |  position:           46 neighbors:            1
119 | 
120 |  1nz9 A    46 T L -160.015   91.165  179.949    0.000
121 |  1nz9 A    47 P L  -68.372  117.453 -179.998    0.000
122 |  1nz9 A    48 V L -132.835  154.984  179.741    0.000
123 |  1nz9 A    49 E L  -98.797  138.100 -179.796    0.000
124 |  1nz9 A    50 L L -144.335 -162.203  178.424    0.000
125 |  1nz9 A    51 D L -153.110  156.707 -179.714    0.000
126 |  1nz9 A    52 F L  -45.858  -29.377 -179.493    0.000
127 |  1nz9 A    53 S L  -76.012   -3.453 -179.846    0.000
128 |  1nz9 A    54 Q L -131.768   15.076  179.869    0.000
129 |  1nz9 A    55 V L -142.001  170.027 -179.926    0.000
130 | 
131 |  position:           51 neighbors:            1
132 | 
133 |  1nz9 A    51 D L -153.110  156.707 -179.714    0.000
134 |  1nz9 A    52 F L  -45.858  -29.377 -179.493    0.000
135 |  1nz9 A    53 S L  -76.012   -3.453 -179.846    0.000
136 |  1nz9 A    54 Q L -131.768   15.076  179.869    0.000
137 |  1nz9 A    55 V L -142.001  170.027 -179.926    0.000
138 |  1nz9 A    56 V L -142.412  146.709 -179.953    0.000
139 |  1nz9 A    57 K L  -83.362  131.334 -179.997    0.000
140 |  1nz9 A    58 A L -125.576    0.000    0.000    0.000
141 | 
142 |  position:           56 neighbors:            1
143 | 
144 |  1nz9 A    56 V L -142.412  146.709 -179.953    0.000
145 |  1nz9 A    57 K L  -83.362  131.334 -179.997    0.000
146 |  1nz9 A    58 A L -125.576    0.000    0.000    0.000
147 | 
148 | 


--------------------------------------------------------------------------------
/csb/test/data/2JZC.sum:
--------------------------------------------------------------------------------
 1 | 
 2 |  +----------<<<  P  R  O  C  H  E  C  K     S  U  M  M  A  R  Y  >>>----------+
 3 |  |                                                                            |
 4 |  | 2JZC.pdb   1.5                                                201 residues |
 5 |  |                                                                            |
 6 | *| Ramachandran plot:   69.5% core   22.6% allow    5.6% gener    2.3% disall |
 7 |  |                                                                            |
 8 | *| All Ramachandrans:   37 labelled residues (out of 199)                     |
 9 | *| Chi1-chi2 plots:      9 labelled residues (out of 123)                     |
10 | +| Main-chain params:    4 better     0 inside      2 worse                   |
11 |  | Side-chain params:    5 better     0 inside      0 worse                   |
12 |  |                                                                            |
13 | *| Residue properties: Max.deviation:    42.0              Bad contacts: 5581 |
14 | +|                     Bond len/angle:    3.3    Morris et al class:  2  2  2 |
15 |  |                                                                            |
16 |  | G-factors           Dihedrals:  -0.10  Covalent:   0.51    Overall:   0.14 |
17 |  |                                                                            |
18 |  | M/c bond lengths: 99.1% within limits   0.9% highlighted                   |
19 |  | M/c bond angles:  99.9% within limits   0.1% highlighted                   |
20 |  | Planar groups:   100.0% within limits   0.0% highlighted                   |
21 |  |                                                                            |
22 |  +----------------------------------------------------------------------------+
23 |    + May be worth investigating further.  * Worth investigating further.
24 | 
25 | 


--------------------------------------------------------------------------------
/csb/test/data/2l01.v2.str:
--------------------------------------------------------------------------------
 1 | save_assigned_chem_shift_list
 2 | save_
 3 | 
 4 | save_assigned_chem_shift_list_1
 5 | 
 6 |    loop_
 7 |       _Atom_shift_assign_ID
 8 |       _Residue_author_seq_code
 9 |       _Residue_seq_code
10 |       _Residue_label
11 |       _Atom_name
12 |       _Atom_type
13 |       _Chem_shift_value
14 |       _Chem_shift_value_error
15 |       _Chem_shift_ambiguity_code
16 | 
17 |         1  1  1 MET HA   H   3.977 0.020 1 
18 |         2  1  1 MET HB2  H   2.092 0.020 1 
19 |         3  1  1 MET HB3  H   2.092 0.020 1 
20 |         4  1  1 MET HE   H   2.111 0.020 1 
21 |         5  1  1 MET HG2  H   2.580 0.020 1 
22 |         6  1  1 MET HG3  H   2.580 0.020 1 
23 |         7  1  1 MET CA   C  55.300 0.200 1 
24 |         8  1  1 MET CB   C  33.840 0.200 1 
25 |         9  1  1 MET CE   C  16.841 0.200 1 
26 |        10  1  1 MET CG   C  30.975 0.200 1 
27 |        11  2  2 LYS HA   H   4.423 0.020 1 
28 | 
29 |    stop_
30 | 
31 | save_
32 | 


--------------------------------------------------------------------------------
/csb/test/data/2l01.v3.str:
--------------------------------------------------------------------------------
 1 | save_assigned_chem_shift_list
 2 | save_
 3 | 
 4 | save_assigned_chem_shift_list_1
 5 | 
 6 |    loop_
 7 |       _Atom_chem_shift.ID
 8 |       _Atom_chem_shift.Assembly_atom_ID
 9 |       _Atom_chem_shift.Entity_assembly_ID
10 |       _Atom_chem_shift.Entity_ID
11 |       _Atom_chem_shift.Comp_index_ID
12 |       _Atom_chem_shift.Seq_ID
13 |       _Atom_chem_shift.Comp_ID
14 |       _Atom_chem_shift.Atom_ID
15 |       _Atom_chem_shift.Atom_type
16 |       _Atom_chem_shift.Atom_isotope_number
17 |       _Atom_chem_shift.Val
18 |       _Atom_chem_shift.Val_err
19 |       _Atom_chem_shift.Assign_fig_of_merit
20 |       _Atom_chem_shift.Ambiguity_code
21 |       _Atom_chem_shift.Occupancy
22 |       _Atom_chem_shift.Resonance_ID
23 |       _Atom_chem_shift.Auth_entity_assembly_ID
24 |       _Atom_chem_shift.Auth_seq_ID
25 |       _Atom_chem_shift.Auth_comp_ID
26 |       _Atom_chem_shift.Auth_atom_ID
27 |       _Atom_chem_shift.Details
28 |       _Atom_chem_shift.Entry_ID
29 |       _Atom_chem_shift.Assigned_chem_shift_list_ID
30 | 
31 |         1 . 1 1  1  1 MET HA   H  1   3.977 0.020 . 1 . . .  1 MET HA   . 17025 1 
32 |         2 . 1 1  1  1 MET HB2  H  1   2.092 0.020 . 1 . . .  1 MET HB2  . 17025 1 
33 |         3 . 1 1  1  1 MET HB3  H  1   2.092 0.020 . 1 . . .  1 MET HB3  . 17025 1 
34 |         4 . 1 1  1  1 MET HE1  H  1   2.111 0.020 . 1 . . .  1 MET HE   . 17025 1 
35 |         5 . 1 1  1  1 MET HG2  H  1   2.580 0.020 . 1 . . .  1 MET HG2  . 17025 1 
36 |         6 . 1 1  1  1 MET HG3  H  1   2.580 0.020 . 1 . . .  1 MET HG3  . 17025 1 
37 |         7 . 1 1  1  1 MET CA   C 13  55.300 0.200 . 1 . . .  1 MET CA   . 17025 1 
38 |         8 . 1 1  1  1 MET CB   C 13  33.840 0.200 . 1 . . .  1 MET CB   . 17025 1 
39 |         9 . 1 1  1  1 MET CE   C 13  16.841 0.200 . 1 . . .  1 MET CE   . 17025 1 
40 |        10 . 1 1  1  1 MET CG   C 13  30.975 0.200 . 1 . . .  1 MET CG   . 17025 1 
41 |        12 . 1 1  2  2 LYS HA   H  1   4.423 0.020 . 1 . . .  2 LYS HA   . 17025 1 
42 | 
43 |    stop_
44 | 
45 | save_
46 | 


--------------------------------------------------------------------------------
/csb/test/data/Sparky.peaks:
--------------------------------------------------------------------------------
1 |       Assignment         w1         w2         w3   Data Height     Note    
2 | 
3 |             ?-?-?      3.418    114.437      7.440       157921 
4 |             ?-?-?      0.972    114.476      7.443       204746 
5 |             ?-?-?      1.147    114.481      7.445       147454 
6 | 


--------------------------------------------------------------------------------
/csb/test/data/Xeasy1.peaks:
--------------------------------------------------------------------------------
 1 | # Number of dimensions 3
 2 | #INAME 1 H1
 3 | #INAME 2 C2
 4 | #INAME 3 H3
 5 | #CYANAFORMAT hCH
 6 |     1   7.050  10.374   0.889 2 U       1.565890e+05 0.00e+00 m   0    0    0    0 0
 7 |     2   8.921  10.397   0.892 2 U       1.291120e+05 0.00e+00 m   0    0    0    0 0
 8 |     3   2.307  10.430   0.891 2 U       4.243830e+05 0.00e+00 m   0    0    0    0 0
 9 | 
10 | 


--------------------------------------------------------------------------------
/csb/test/data/Xeasy2.peaks:
--------------------------------------------------------------------------------
1 | # Number of dimensions 3
2 | #INAME 1 H1
3 | #INAME 2 2C
4 | #INAME 3 3H
5 |     1   7.050  10.374   0.889 2 U       1.565890e+05 0.00e+00 m   0    0    0    0 0
6 |     2   8.921  10.397   0.892 2 U       1.291120e+05 0.00e+00 m   0    0    0    0 0
7 |     3   2.307  10.430   0.891 2 U       4.243830e+05 0.00e+00 m   0    0    0    0 0
8 | 
9 | 


--------------------------------------------------------------------------------
/csb/test/data/csb.tsv:
--------------------------------------------------------------------------------
1 | # @TSV ID:int A:float B:str 
2 | 11	11.1	Row eleven
3 | 12	12.2	Row twelve
4 | 13		Row thirteen
5 | 


--------------------------------------------------------------------------------
/csb/test/data/d1nz0a_.a3m:
--------------------------------------------------------------------------------
 1 | >d1nz0a_ d.14.1.2 (A:) RNase P protein {Thermotoga maritima [TaxId: 2336]}
 2 | ERLRLRRDFLLIFKEGKSLQNEYFVVLFRKNGMDYSRLGIVVKRKFGKATRRNKLKRWVREIFRRNKGVIPKGFDIVVIPRKKLSEEFERVDFWTVREKLLNLLKRIEG
 3 | >gi|108802371|ref|YP_642568.1|(7-116:118) ribonuclease P [Mycobacterium sp. MCS]   gi|119866064|ref|YP_936016.1| ribonuclease P [Mycobacterium sp. KMS]   gi|126438351|ref|YP_001074042.1| ribonuclease P [Mycobacterium sp. JLS]   gi|123177783|sp|Q1B0S2.1|RNPA_MYCSS RecName: Full=Ribonuclease P protein component; Short=RNaseP protein; Short=RNase P protein; AltName: Full=Protein C5   gi|166226724|sp|A3Q8S4.1|RNPA_MYCSJ RecName: Full=Ribonuclease P protein component; Short=RNaseP protein; Short=RNase P protein; AltName: Full=Protein C5   gi|166226725|sp|A1U8R8.1|RNPA_MYCSK RecName: Full=Ribonuclease P protein component; Short=RNaseP protein; Short=RNase P protein; AltName: Full=Protein C5   gi|108772790|gb|ABG11512.1| ribonuclease P protein component [Mycobacterium sp. MCS]   gi|119692153|gb|ABL89226.1| ribonuclease P protein component [Mycobacterium sp. KMS]   gi|126238151|gb|ABO01552.1| ribonuclease P protein component [Mycobacterium sp. JLS]  E=3e-08 s/c=0.55 id=17% cov=100%
 4 | -RMTRSTEFSTTVSKGVRSAQPDLVLHMANvlDDPSGPRVGLVVAKSVGNAVVRHRVSRRLRHSVHPMLDELQPGHRLVIRALPGAASATSARLHQELSAALRRARPRVEA
 5 | >gi|227373914|ref|ZP_03857386.1|(6-99:111) ribonuclease P protein component [Thermobaculum terrenum ATCC BAA-798]   gi|227062537|gb|EEI01571.1| ribonuclease P protein component [Thermobaculum terrenum ATCC BAA-798]  E=3e-06 s/c=0.57 id=23% cov=87%
 6 | -RLTSSKDWKEVRTRGRCSRSSFATICVLFEGESE-KFGFAAAKSIGSVAKRNRAKRRLREAFRQTYKFGSKPCLVIAIA----GPECLTMDFQELKSKL---------
 7 | >gi|124010240|ref|ZP_01694895.1|(9-122:122) ribonuclease P protein component [Microscilla marina ATCC 23134]   gi|123983732|gb|EAY24164.1| ribonuclease P protein component [Microscilla marina ATCC 23134]  E=8e-05 s/c=0.43 id=24% cov=99%
 8 | ERLKSKKIIQSLFPKGKDAFVYPIKvkyILHPTPSNTPPQVLFTVPKRtFKRAVDRNAIKRLLKEAYRLNKHLLhdeAGSYKIAYIAFVYIAK--EKLPFDTIERKTISVFERLKG
 9 | >gi|139352214|gb|ECE59672.1|(37-150:150) hypothetical protein GOS_6065400 [marine metagenome]   gi|142774203|gb|EDA48250.1| hypothetical protein GOS_1993299 [marine metagenome]   gi|139024765|gb|ECC88500.1| hypothetical protein GOS_5642689 [marine metagenome]   gi|139647524|gb|ECG49761.1| hypothetical protein GOS_5517516 [marine metagenome]  E=0.0002 s/c=0.42 id=21% cov=96%
10 | ESLKKSSHFGTVLKN-RVINNDFYTIYRKKNfikkasNEKKLYISFVMKKKVGNAVKRNRIKRKLKgvvQKMLKINNSINLNYTYVIFGKEKIYSEHSNSLFKNMEKSFNKINK----
11 | >gi|137813163|gb|EBW14305.1|(5-114:118) hypothetical protein GOS_6793674 [marine metagenome]   gi|143750626|gb|EDG59861.1| hypothetical protein GOS_754256 [marine metagenome]  E=2e-12 s/c=0.68 id=24% cov=99%
12 | KRMTKRGDFLRAQQGNIKYITSSVVIQLIPNDIQgkfSTRVGFTASKKIGNAVKRNYAKRLMRSLVYRQSNELASSFDYVFIARQAILNKKFYLIESEIMRVLKHFNKNI--
13 | >gi|148654187|ref|YP_001281280.1|(10-116:130) ribonuclease P protein component [Psychrobacter sp. PRwf-1]   gi|229470482|sp|A5WI39.1|RNPA_PSYWF RecName: Full=Ribonuclease P protein component; Short=RNaseP protein; Short=RNase P protein; AltName: Full=Protein C5   gi|148573271|gb|ABQ95330.1| ribonuclease P protein component [Psychrobacter sp. PRwf-1]  E=1e-11 s/c=0.67 id=24% cov=97%
14 | KRLLKPAEFKPVFNQPlFKVHQTHFMAFAYDSDHLQARLGMAItKKKIPTAVARNTIKRIIREQFRHTHAQLPA-LDVVFILKKSTKALSNEQMRQEISDILSKVISK---
15 | >gi|142801636|gb|EDA68688.1|(13-118:120) hypothetical protein GOS_1956086 [marine metagenome]   E=4e-05 s/c=0.48 id=23% cov=95%
16 | --LKVNSSTIKILNNKPVYNSKILKLYTIPNSEDGPRLAIQITKRaIRLAVTRNLVRRKIKEDFRANYAEIAKHDCLLVISSKisSAKHEISDILMQEWKQSLKSLEK----
17 | >gi|143373151|gb|EDE62902.1|(90-193:197) hypothetical protein GOS_1097530 [marine metagenome]   E=0.0003 s/c=0.46 id=21% cov=95%
18 | -RLSRSHEFQRLRREGTRVRSGYLwCVMLQDPSLPGPAVAFAIGRPFGSAVRRNRLRRQLRSILSDRESAMGGG--MFLIGVNNPHRDLPMPSFAQLTHDIDEILNK---


--------------------------------------------------------------------------------
/csb/test/data/d1nz0a_.mfasta:
--------------------------------------------------------------------------------
 1 | >d1nz0a_ d.14.1.2 (A:) RNase P protein {Thermotoga maritima [TaxId: 2336]}
 2 | ERLRLRRDFLLIFKEG-KSLQNEYF-V---VLFRK--N------GMD---YSRLGIVV-KRK-FGKATRRNKLKRWVR---EIFRRNKGVI---PKGFDIVVIPRK--KLSEEFERVDFWTVREKLLNLLKRIEG
 3 | >gi|108802371|ref|YP_642568.1|(7-116:118) ribonuclease P [Mycobacterium sp. MCS]   gi|119866064|ref|YP_936016.1| ribonuclease P [Mycobacterium sp. KMS]   gi|126438351|ref|YP_001074042.1| ribonuclease P [Mycobacterium sp. JLS]   gi|123177783|sp|Q1B0S2.1|RNPA_MYCSS RecName: Full=Ribonuclease P protein component; Short=RNaseP protein; Short=RNase P protein; AltName: Full=Protein C5   gi|166226724|sp|A3Q8S4.1|RNPA_MYCSJ RecName: Full=Ribonuclease P protein component; Short=RNaseP protein; Short=RNase P protein; AltName: Full=Protein C5   gi|166226725|sp|A1U8R8.1|RNPA_MYCSK RecName: Full=Ribonuclease P protein component; Short=RNaseP protein; Short=RNase P protein; AltName: Full=Protein C5   gi|108772790|gb|ABG11512.1| ribonuclease P protein component [Mycobacterium sp. MCS]   gi|119692153|gb|ABL89226.1| ribonuclease P protein component [Mycobacterium sp. KMS]   gi|126238151|gb|ABO01552.1| ribonuclease P protein component [Mycobacterium sp. JLS]  E=3e-08 s/c=0.55 id=17% cov=100%
 4 | -RMTRSTEFSTTVSKG-VRSAQPDL-V---LHMANVLD------DPS---GPRVGLVV-AKS-VGNAVVRHRVSRRLR---HSVHPMLDEL---QPGHRLVIRALP--GAASATSARLHQELSAALRRARPRVEA
 5 | >gi|227373914|ref|ZP_03857386.1|(6-99:111) ribonuclease P protein component [Thermobaculum terrenum ATCC BAA-798]   gi|227062537|gb|EEI01571.1| ribonuclease P protein component [Thermobaculum terrenum ATCC BAA-798]  E=3e-06 s/c=0.57 id=23% cov=87%
 6 | -RLTSSKDWKEVRTRG-RCSRSSFA-T---ICVLF--E------GES---E-KFGFAA-AKS-IGSVAKRNRAKRRLR---EAFRQTYKFG---SKPCLVIAIA------GPECLTMDFQELKSKL---------
 7 | >gi|124010240|ref|ZP_01694895.1|(9-122:122) ribonuclease P protein component [Microscilla marina ATCC 23134]   gi|123983732|gb|EAY24164.1| ribonuclease P protein component [Microscilla marina ATCC 23134]  E=8e-05 s/c=0.43 id=24% cov=99%
 8 | ERLKSKKIIQSLFPKG-KDAFVYPI-KVKYILHPT--P------SNT---PPQVLFTV-PKRTFKRAVDRNAIKRLLK---EAYRLNKHLLHDEAGSYKIAYIAFV--YIAK--EKLPFDTIERKTISVFERLKG
 9 | >gi|139352214|gb|ECE59672.1|(37-150:150) hypothetical protein GOS_6065400 [marine metagenome]   gi|142774203|gb|EDA48250.1| hypothetical protein GOS_1993299 [marine metagenome]   gi|139024765|gb|ECC88500.1| hypothetical protein GOS_5642689 [marine metagenome]   gi|139647524|gb|ECG49761.1| hypothetical protein GOS_5517516 [marine metagenome]  E=0.0002 s/c=0.42 id=21% cov=96%
10 | ESLKKSSHFGTVLKN--RVINNDFY-T---IYRKK--NFIKKASNEK---KLYISFVM-KKK-VGNAVKRNRIKRKLKGVVQKMLKINNSI---NLNYTYVIFGKE--KIYSEHSNSLFKNMEKSFNKINK----
11 | >gi|137813163|gb|EBW14305.1|(5-114:118) hypothetical protein GOS_6793674 [marine metagenome]   gi|143750626|gb|EDG59861.1| hypothetical protein GOS_754256 [marine metagenome]  E=2e-12 s/c=0.68 id=24% cov=99%
12 | KRMTKRGDFLRAQQGN-IKYITSSV-V---IQLIP--N------DIQGKFSTRVGFTA-SKK-IGNAVKRNYAKRLMR---SLVYRQSNEL---ASSFDYVFIARQ--AILNKKFYLIESEIMRVLKHFNKNI--
13 | >gi|148654187|ref|YP_001281280.1|(10-116:130) ribonuclease P protein component [Psychrobacter sp. PRwf-1]   gi|229470482|sp|A5WI39.1|RNPA_PSYWF RecName: Full=Ribonuclease P protein component; Short=RNaseP protein; Short=RNase P protein; AltName: Full=Protein C5   gi|148573271|gb|ABQ95330.1| ribonuclease P protein component [Psychrobacter sp. PRwf-1]  E=1e-11 s/c=0.67 id=24% cov=97%
14 | KRLLKPAEFKPVFNQPLFKVHQTHF-M---AFAYD--S------DHL---QARLGMAITKKK-IPTAVARNTIKRIIR---EQFRHTHAQL---PA-LDVVFILKK--STKALSNEQMRQEISDILSKVISK---
15 | >gi|142801636|gb|EDA68688.1|(13-118:120) hypothetical protein GOS_1956086 [marine metagenome]   E=4e-05 s/c=0.48 id=23% cov=95%
16 | --LKVNSSTIKILNNK-PVYNSKIL-K---LYTIP--N------SED---GPRLAIQI-TKRAIRLAVTRNLVRRKIK---EDFRANYAEI---AKHDCLLVISSKISSAKHEISDILMQEWKQSLKSLEK----
17 | >gi|143373151|gb|EDE62902.1|(90-193:197) hypothetical protein GOS_1097530 [marine metagenome]   E=0.0003 s/c=0.46 id=21% cov=95%
18 | -RLSRSHEFQRLRREG-TRVRSGYLWC---VMLQD--P------SLP---GPAVAFAI-GRP-FGSAVRRNRLRRQLR---SILSDRESAM---GGG--MFLIGVN--NPHRDLPMPSFAQLTHDIDEILNK---


--------------------------------------------------------------------------------
/csb/test/data/mapping.pdb:
--------------------------------------------------------------------------------
 1 | HEADER    RIBOSOME                                30-MAR-01   1GIY              
 2 | COMPND    MOL_ID: 1;                                                            
 3 | COMPND   2 MOLECULE: 50S RIBOSOMAL PROTEIN L3;                                  
 4 | COMPND   3 CHAIN: E;                                                            
 5 | SEQRES   1 E  338  LEU VAL ASN ASP GLU PRO ASN SER PRO ARG GLU GLY MET          
 6 | SEQRES   2 E  338  GLU GLU THR VAL PRO VAL THR VAL ILE GLU THR PRO PRO          
 7 | ATOM   3430  CA  MET E  65     -35.315 183.547 344.254  1.00  0.00           C  
 8 | ATOM   3431  CA  GLU E  66     -31.330 184.145 343.173  1.00  0.00           C  
 9 | ATOM   3432  CA  THR E  67     -27.574 184.326 344.054  1.00  0.00           C  
10 | ATOM   3433  CA  VAL E  68     -25.637 187.772 343.919  1.00  0.00           C  
11 | TER    3634      VAL E  68                                                      
12 | END                                                                             
13 | 


--------------------------------------------------------------------------------
/csb/test/data/mapping2.pdb:
--------------------------------------------------------------------------------
 1 | HEADER    RIBOSOME                                30-MAR-01   1GIY              
 2 | COMPND    MOL_ID: 1;                                                            
 3 | COMPND   2 MOLECULE: 50S RIBOSOMAL PROTEIN L3;                                  
 4 | COMPND   3 CHAIN: E;                                                            
 5 | SEQRES   1 E  338  LEU VAL ASN ASP GLU PRO ASN SER PRO ARG GLU GLY MET          
 6 | SEQRES   2 E  338  GLU THR VAL PRO VAL THR VAL ILE GLU THR PRO PRO          
 7 | ATOM   3430  CA  MET E  65     -35.315 183.547 344.254  1.00  0.00           C  
 8 | ATOM   3433  CA  VAL E  68     -25.637 187.772 343.919  1.00  0.00           C  
 9 | TER    3634      VAL E  68                                                      
10 | END                                                                             
11 | 


--------------------------------------------------------------------------------
/csb/test/data/mapping3.pdb:
--------------------------------------------------------------------------------
 1 | HEADER    RIBOSOME                                30-MAR-01   1GIY              
 2 | COMPND    MOL_ID: 1;                                                            
 3 | COMPND   2 MOLECULE: 50S RIBOSOMAL PROTEIN L3;                                  
 4 | COMPND   3 CHAIN: E;                                                            
 5 | SEQRES   1 E  338  LEU VAL ASN ASP GLU PRO ASN                                  
 6 | ATOM   3430  CA  SER E  65     -35.315 183.547 344.254  1.00  0.00           C  
 7 | ATOM   3433  CA  GLY E  68     -25.637 187.772 343.919  1.00  0.00           C  
 8 | TER    3634      GLY E  68                                                      
 9 | END                                                                             
10 | 


--------------------------------------------------------------------------------
/csb/test/data/maxent.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csb-toolbox/CSB/1a858c9a8bbb5e528b06dc0ffb67cf151489413b/csb/test/data/maxent.pickle


--------------------------------------------------------------------------------
/csb/test/data/modified.pdb:
--------------------------------------------------------------------------------
 1 | HEADER    .                                       12-Mar-13   TEST              
 2 | COMPND   1 MOL_ID: 1;                                                           
 3 | COMPND   2 MOLECULE: HYPOTHETICAL PROTEIN RV0983;                               
 4 | COMPND   3 CHAIN: A;                                                            
 5 | SEQRES   1 A   20  MET PRO PRO GLY SER VAL GLU GLN VAL ALA ALA LYS VAL          
 6 | SEQRES   2 A   20  VAL PRO SER VAL VAL MSE                                      
 7 | ATOM     95  N   MSE A  21      55.075  23.677  19.139  1.00 33.35           N  
 8 | ATOM     96  CA  MSE A  21      54.672  23.803  17.741  1.00 37.62           C  
 9 | ATOM     97  C   MSE A  21      54.539  22.403  17.111  1.00 35.23           C  
10 | ATOM     98  O   MSE A  21      55.344  21.485  17.419  1.00 34.50           O  
11 | ATOM     99  CB  MSE A  21      55.662  24.626  16.915  1.00 34.96           C  
12 | ATOM    100  CG  MSE A  21      55.211  24.834  15.458  1.00 34.65           C  
13 | ATOM    101 SE   MSE A  21      56.402  26.367  14.841  1.00 52.51          Se  
14 | ATOM    102  CE  MSE A  21      56.143  26.398  12.634  1.00 50.46           C
15 | TER                                                                             
16 | END                                                                             
17 | 


--------------------------------------------------------------------------------
/csb/test/data/modified2.pdb:
--------------------------------------------------------------------------------
 1 | HEADER    .                                       12-Mar-13   TEST              
 2 | COMPND   1 MOL_ID: 1;                                                           
 3 | COMPND   2 MOLECULE: HYPOTHETICAL PROTEIN RV0983;                               
 4 | COMPND   3 CHAIN: A;                                                            
 5 | SEQRES   1 A   20  MSE PRO PRO GLY SER VAL GLU GLN VAL ALA ALA LYS VAL          
 6 | SEQRES   2 A   20  VAL PRO SER VAL VAL MET                                      
 7 | ATOM     95  N   MSE A  21      55.075  23.677  19.139  1.00 33.35           N  
 8 | ATOM     96  CA  MSE A  21      54.672  23.803  17.741  1.00 37.62           C  
 9 | ATOM     97  C   MSE A  21      54.539  22.403  17.111  1.00 35.23           C  
10 | ATOM     98  O   MSE A  21      55.344  21.485  17.419  1.00 34.50           O  
11 | ATOM     99  CB  MSE A  21      55.662  24.626  16.915  1.00 34.96           C  
12 | ATOM    100  CG  MSE A  21      55.211  24.834  15.458  1.00 34.65           C  
13 | ATOM    101 SE   MSE A  21      56.402  26.367  14.841  1.00 52.51          Se  
14 | ATOM    102  CE  MSE A  21      56.143  26.398  12.634  1.00 50.46           C
15 | TER                                                                             
16 | END                                                                             
17 | 


--------------------------------------------------------------------------------
/csb/test/data/standard.tsv:
--------------------------------------------------------------------------------
1 | 11	11.1	Row eleven
2 | 12	12.2	Row twelve
3 | 13	13.3	Row thirteen
4 | 


--------------------------------------------------------------------------------
/csb/test/data/struct.ali.mfasta:
--------------------------------------------------------------------------------
1 | >3p1uB
2 | ENPDKPTDDV-NYNMNEPRLASTLR--G
3 | >1d3zA
4 | EV-EPS-DTIENVK------AKIQDKEG
5 | 


--------------------------------------------------------------------------------
/csb/test/data/test.fa:
--------------------------------------------------------------------------------
 1 | >gi|148654187 ribonuclease P protein component 
 2 | KRLLKPAEFKPVFNQPlFKVHQTHFMAFAYDSDHLQARLGMAItKKKIPTAVARNTIKRIIREQFRHTHAQLPALDVVF
 3 | ILKKSTKALSNEQMRQEISDILSKVISK
 4 | 
 5 | >gi|142801636|gb|EDA68688.1 hypothetical protein GOS_1956086
 6 | LKVNSSTIKILNNKPVYNSKILKLYTIPNSEDGPRLAIQITKRaIRLAVTRNLVRRKIKEDFRANYAEIAKHDCLLVIS
 7 | SKisSAKHEISDILMQEWKQSLKSLEK
 8 | 
 9 | >gi|143373151
10 | RLSRSHEFQRLRREGTRVRSGYLwCVMLQDPSLPGPAVAFAIGRPFGSAVRRNRLRRQLRSILSDRESAMGGGMFLIGV
11 | NNPHRDLPMPSFAQLTHDIDEILNK


--------------------------------------------------------------------------------
/csb/test/data/test.hhm:
--------------------------------------------------------------------------------
 1 | HHsearch 1.5
 2 | NAME  name
 3 | FAM   fam
 4 | LENG  2 match states, 2 columns in multiple alignment
 5 | NEFF  10
 6 | PCT   False
 7 | SEQ
 8 | #
 9 | NULL   5346	8263	5863
10 | HMM    A	C	E
11 |        M->M	M->I	M->D	I->M	I->I	D->M	D->D	Neff	Neff_I	Neff_D
12 |        321    	3321   	3321   	*	*	*	*	*	*	*	
13 | A 1    0      	1000   	1584   	1
14 |        321    	3321   	3321   	1000   	1000   	1000   	1000   	*      	*      	*      	
15 | 
16 | C 2    0      	1000   	1584   	2
17 |        152    	3321   	*	1000   	1000   	0      	*	*      	*      	*      	
18 | 
19 | //


--------------------------------------------------------------------------------
/csb/test/data/test2.hhm:
--------------------------------------------------------------------------------
 1 | HHsearch 1.5
 2 | NAME  name
 3 | FAM   fam
 4 | LENG  2 match states, 2 columns in multiple alignment
 5 | NEFF  0.0
 6 | PCT   False
 7 | SEQ
 8 | #
 9 | NULL   5346	8263	5863
10 | HMM    A	C	E
11 |        M->M	M->I	M->D	I->M	I->I	D->M	D->D	Neff	Neff_I	Neff_D
12 |        0      	*	0      	*	*	*	*	*	*	*	
13 | A 1    0      	1000   	1584   	1
14 |        321    	3321   	3321   	1000   	1000   	1000   	1000   	*      	*      	*      	
15 | 
16 | C 2    0      	1000   	1584   	2
17 |        0      	*	*	0      	*	0      	*	*      	*      	*      	
18 | 
19 | //


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | matplotlib
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from setuptools import setup, find_packages
 4 | from csb.build import ROOT 
 5 | from io import open
 6 | 
 7 | try:
 8 |     __doc__ = open('README.rst', encoding="utf-8").read()
 9 | except IOError:
10 |     __doc__ = ""
11 | 
12 | 
13 | NAME = ROOT
14 | AUTHOR = "Michael Habeck et al."
15 | EMAIL = "ivan.kalev@gmail.com"
16 | URL = "http://github.com/csb-toolbox"
17 | SUMMARY = "Computational Structural Biology Toolbox"
18 | DESCRIPTION = __doc__
19 | LICENSE = 'MIT'
20 | 
21 | REQUIREMENTS = open("requirements.txt", encoding="utf-8").readlines()
22 | DEV_REQUIREMENTS = ["setuptools"]
23 | 
24 | v = {}
25 | exec(open(ROOT + "/__init__.py", encoding="utf-8").read(), v)
26 | VERSION = v["Version"]()
27 | 
28 | 
29 | def build():
30 | 
31 |     return setup(
32 |         name=NAME,
33 |         packages=find_packages(),
34 |         include_package_data=True,
35 |         version=VERSION.short,
36 |         author=AUTHOR,
37 |         author_email=EMAIL,
38 |         url=URL,
39 |         description=SUMMARY,
40 |         long_description=DESCRIPTION,
41 |         license=LICENSE,
42 |         install_requires=REQUIREMENTS,
43 |         tests_require=DEV_REQUIREMENTS,
44 |         extras_require={
45 |             'dev': DEV_REQUIREMENTS
46 |         },
47 |         test_suite="csb.test.cases",
48 |         entry_points={
49 |             'console_scripts': [
50 |                 'csb-test = csb.test.app:main',
51 |                 'csb-bfit = csb.apps.bfit:main',
52 |                 'csb-bfite = csb.apps.bfite:main',
53 |                 'csb-csfrag = csb.apps.csfrag:main',
54 |                 'csb-hhfrag = csb.apps.hhfrag:main',
55 |                 'csb-buildhmm = csb.apps.buildhmm:main',
56 |                 'csb-hhsearch = csb.apps.hhsearch:main',
57 |                 'csb-precision = csb.apps.precision:main',
58 |                 'csb-promix = csb.apps.promix:main',
59 |                 'csb-embd = csb.apps.embd:main'
60 |             ]
61 |         },
62 |         classifiers=(
63 |             'Development Status :: 5 - Production/Stable',
64 |             'Intended Audience :: Developers',
65 |             'Intended Audience :: Science/Research',
66 |             'License :: OSI Approved :: MIT License',
67 |             'Operating System :: OS Independent',
68 |             'Programming Language :: Python',
69 |             'Programming Language :: Python :: 3.6',
70 |             'Programming Language :: Python :: 3.7',
71 |             'Programming Language :: Python :: 3.8',
72 |             'Programming Language :: Python :: 3.9',
73 |             'Programming Language :: Python :: 3.10',
74 |             'Programming Language :: Python :: 3.11',
75 |             'Programming Language :: Python :: 3.12',
76 |             'Topic :: Scientific/Engineering',
77 |             'Topic :: Scientific/Engineering :: Bio-Informatics',
78 |             'Topic :: Scientific/Engineering :: Mathematics',
79 |             'Topic :: Scientific/Engineering :: Physics',
80 |             'Topic :: Software Development :: Libraries'
81 |         )
82 |     )
83 | 
84 | 
85 | if __name__ == '__main__':
86 |     build()
87 | 


--------------------------------------------------------------------------------