├── .github
    └── workflows
    │   └── release.yml
├── .gitignore
├── LICENSE
├── README.md
├── docs
    ├── .nojekyll
    ├── README.md
    ├── _sidebar.md
    ├── assets
    │   └── pseudoknot.png
    ├── development
    │   └── README.md
    ├── index.html
    ├── setup
    │   ├── environment.md
    │   ├── install.md
    │   └── packages.md
    ├── sherlock
    │   ├── README.md
    │   ├── environment.md
    │   └── jobs.md
    └── usage
    │   ├── README.md
    │   ├── pseudoknots.md
    │   ├── structure_prediction.md
    │   └── utilities.md
├── example_arnie_file.txt
├── examples
    ├── data_for_examples
    │   └── ribologic_SI.txt
    └── start_here.ipynb
├── notebooks
    ├── IntroToArnie.ipynb
    └── README.md
├── parameter_files
    ├── contrafold.params.complementary
    ├── contrafold.params.noncomplementary
    ├── learntofold.contrafold.params
    ├── rna_andronescu2007.par
    ├── rna_langdon2018.par
    ├── rna_turner1999.par
    └── rna_turner2004.par
├── pyproject.toml
├── pytest.ini
├── scripts
    ├── score_pseudoacc_mea.py
    ├── write_bpp_matrices.py
    └── write_unpaired_vectors.py
├── src
    └── arnie
    │   ├── __init__.py
    │   ├── bpps.py
    │   ├── free_energy.py
    │   ├── mea
    │       ├── __init__.py
    │       ├── mea.py
    │       ├── mea_utils.py
    │       └── threshknot.py
    │   ├── mfe.py
    │   ├── mfe_bootstrap.py
    │   ├── pfunc.py
    │   ├── pk_predictors.py
    │   ├── sample_structures.py
    │   ├── utils.py
    │   └── viz.py
└── tests
    ├── __init__.py
    ├── test_bpps.py
    ├── test_converters.py
    ├── test_evaluation_metrics.py
    ├── test_file_readers.py
    ├── test_files
        ├── samiv_eternafold.prob
        ├── seq.bpseq
        ├── seq.ct
        └── seq.prob
    ├── test_helix_getting_and_removing.py
    ├── test_linearpartition.py
    ├── test_pfunc.py
    ├── test_pk.py
    ├── test_sample_struct.py
    ├── test_settings.py
    ├── test_structure_handling.py
    └── test_vfold_versions.py


/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Python package to PyPI
 2 | 
 3 | on: push
 4 | 
 5 | jobs:
 6 |   build:
 7 |     name: Build distribution package
 8 |     runs-on: "ubuntu-latest"
 9 | 
10 |     steps:
11 |     - name: Checkout source
12 |       uses: actions/checkout@v4
13 | 
14 |     - name: Set up Python 3.12
15 |       uses: actions/setup-python@v5
16 |       with:
17 |         python-version: "3.12"
18 | 
19 |     - name: Install pypa/build
20 |       run: python3 -m pip install build --user
21 | 
22 |     - name: Build a binary wheel and a source tarball
23 |       run: python3 -m build
24 | 
25 |     - name: Store the distribution packages
26 |       uses: actions/upload-artifact@v4
27 |       with:
28 |         name: release-distributions
29 |         path: dist/
30 | 
31 |   publish-to-pypi:
32 |     name: Publish Python distribution to PyPI
33 |     if: startsWith(github.ref, 'refs/tags/')  # only publish to PyPI on tag pushes
34 |     needs:
35 |     - build
36 |     runs-on: ubuntu-latest
37 |     environment:
38 |       name: pypi
39 |       url: https://pypi.org/p/arnie
40 |     permissions:
41 |       id-token: write  # IMPORTANT: mandatory for trusted publishing
42 | 
43 |     steps:
44 |     - name: Download all the dists
45 |       uses: actions/download-artifact@v4
46 |       with:
47 |         name: release-distributions
48 |         path: dist/
49 |     - name: Publish distribution to PyPI
50 |       uses: pypa/gh-action-pypi-publish@release/v1
51 | 
52 |   github-release:
53 |     name: >-
54 |       Sign the Python distribution with Sigstore
55 |       and upload them to GitHub Release
56 |     needs:
57 |     - publish-to-pypi
58 |     runs-on: ubuntu-latest
59 | 
60 |     permissions:
61 |       contents: write  # IMPORTANT: mandatory for making GitHub Releases
62 |       id-token: write  # IMPORTANT: mandatory for sigstore
63 | 
64 |     steps:
65 |     - name: Download all the dists
66 |       uses: actions/download-artifact@v4
67 |       with:
68 |         name: release-distributions
69 |         path: dist/
70 |     - name: Sign the dists with Sigstore
71 |       uses: sigstore/gh-action-sigstore-python@v1.2.3
72 |       with:
73 |         inputs: >-
74 |           ./dist/*.tar.gz
75 |           ./dist/*.whl
76 |     - name: Create GitHub Release
77 |       env:
78 |         GITHUB_TOKEN: ${{ github.token }}
79 |       run: >-
80 |         gh release create
81 |         '${{ github.ref_name }}'
82 |         --repo '${{ github.repository }}'
83 |         --notes ""
84 |     - name: Upload artifact signatures to GitHub Release
85 |       env:
86 |         GITHUB_TOKEN: ${{ github.token }}
87 |       # Upload to GitHub Release using the `gh` CLI.
88 |       # `dist/` contains the built packages, and the
89 |       # sigstore-produced signatures and certificates.
90 |       run: >-
91 |         gh release upload
92 |         '${{ github.ref_name }}' dist/**
93 |         --repo '${{ github.repository }}'
94 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | __pycache__/*
3 | .ipynb_checkpoints/*
4 | */.ipynb_checkpoints/*
5 | rna.ps
6 | *.arnie
7 | dist/*


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Leland Stanford Junior University
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # arnie
 2 | Python API to compute RNA energetics and do structure prediction across multiple secondary structure packages.
 3 | 
 4 | ## Documentation
 5 | [See our full docs.](https://daslab.github.io/arnie)
 6 | 
 7 | ## Install
 8 | `arnie` is [available on PyPI](https://pypi.org/project/arnie/).
 9 | 
10 | `pip install arnie`
11 | 
12 | ## Repo Organization
13 | 
14 | `src/arnie`: source code for the arnie package.
15 | 
16 | `docs`: docsify-based markdown documentation for the arnie package.
17 | 
18 | `tests`: unit tests 
19 | 
20 | `notebooks`: example jupyter notebooks with usage.
21 | 
22 | `scripts`: scripts for processing sequences in batch.
23 | 
24 | `parameter_files`: dir of various parameter files for packages, put here out of convenience.
25 | 
26 | 
27 | (c) 2024 Leland Stanford Jr University
28 | Authors:
29 | Hannah Wayment-Steele


--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DasLab/arnie/660de8139bd2198bbe115adadd5bc5f12183f9f4/docs/.nojekyll


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # arnie
 2 | Arnie is a Python API to compute RNA energetics and do structure prediction across multiple secondary structure packages.
 3 | 
 4 | ## Install
 5 | `arnie` is [available on PyPI](https://pypi.org/project/arnie/).
 6 | 
 7 | `pip install arnie`
 8 | 
 9 | ## Simple Setup
10 | Arnie works by delegating calls for structure predictions to various RNA prediction libraries. To use arnie we need to have these libraries installed, and we need to point to these their installed locations with environment variables. Here we will use [Eternafold](https://github.com/eternagame/Eternafold) which is simple to install via [Bioconda](https://bioconda.github.io/recipes/eternafold/README.html). This example assumes you have conda installed already; see the full [setup page](/setup/environment.md) for more details about setting up an arnie environment.
11 | 
12 | 
13 | ```
14 | conda install -c bioconda eternafold
15 | export eternafold_PATH=/path/to/installed/location
16 | ```
17 | 
18 | ## Usage:
19 | 
20 | See the [usage docs](/usage/structure_prediction) for example syntax. In brief, comparing across packages is simple. For computing base pairing probability matrices:
21 | 
22 | ```
23 | from arnie.bpps import bpps
24 | 
25 | bpps_dict = {}
26 | my_sequence = 'CGCUGUCUGUACUUGUAUCAGUACACUGACGAGUCCCUAAAGGACGAAACAGCG'
27 | 
28 | for pkg in ['vienna','nupack','RNAstructure','contrafold','RNAsoft']:
29 |     bpps_dict[pkg] = bpps(my_sequence, package=pkg)
30 | ```
31 | 
32 | (c) 2024 [Das Lab](https://daslab.stanford.edu/), Leland Stanford Jr University


--------------------------------------------------------------------------------
/docs/_sidebar.md:
--------------------------------------------------------------------------------
 1 | <!-- docs/_sidebar.md -->
 2 | 
 3 | - [Home](README.md)
 4 | - Getting Started
 5 |   - [Installation](setup/install.md)
 6 |   - [Environment](setup/environment.md)
 7 | - Using Arnie
 8 |   - [Basics](usage/README.md)
 9 |   - [Structure Prediction](usage/structure_prediction.md)
10 |   - [Pseudoknot Prediction](usage/pseudoknots.md)
11 | - Arnie on Sherlock
12 |   - [Environment](sherlock/environment.md)
13 |   - [Jobs](sherlock/jobs.md)
14 | - [Contributing](development/README.md)


--------------------------------------------------------------------------------
/docs/assets/pseudoknot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DasLab/arnie/660de8139bd2198bbe115adadd5bc5f12183f9f4/docs/assets/pseudoknot.png


--------------------------------------------------------------------------------
/docs/development/README.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | ## Installing via Github
 4 | The `arnie` package source code is hosted on [Github](https://github.com/DasLab/arnie). You can clone the repo as below.
 5 | 
 6 | ```
 7 | git clone https://github.com/DasLab/arnie.git
 8 | ```
 9 | 
10 | You can also use pip to install arnie from our Github repo:
11 | ```
12 | pip install git+https://github.com/DasLab/arnie
13 | ```
14 | This is particularly useful for testing new features internally before releasing on PyPI.
15 | 
16 | ## Repo Organization
17 | 
18 | `src/arnie`: source code for the arnie package.
19 | 
20 | `docs`: docsify-based markdown documentation for the arnie package.
21 | 
22 | `tests`: unit tests 
23 | 
24 | `notebooks`: example jupyter notebooks with usage.
25 | 
26 | `scripts`: scripts for processing sequences in batch.
27 | 
28 | `parameter_files`: dir of various parameter files for packages, put here out of convenience.
29 | 
30 | 
31 | 
32 | ## Github Issues
33 | We use [Github issues](https://github.com/DasLab/arnie/issues) to coordinate development tasks and track feature development and bug fixes. If you run into problems while using `arnie`, please file an issue so that we can address the bug. Similarly, if you have a feature idea that could simplify your research, file an issue detailing your proposed feature. 
34 | 
35 | ## Package Testing
36 | Tests are located in the `tests` directory of the repo. We use the [pytest](https://docs.pytest.org/en/stable/) testing framework. Tests are run in the repo root directory. 
37 | 
38 | To run all the tests,
39 | ```
40 | pytest
41 | ```
42 | To run a specific test,
43 | ```
44 | pytest tests/test_structure_handling.py
45 | ```
46 | If you add new features or fix a bug, make sure to update the tests appropriately. 
47 | 
48 | ## Package Distribution
49 | We distribute arnie via the [Python Package Index](https://pypi.org/). The DasLab has a [PyPI account](https://pypi.org/user/daslab/) for all our packages, with `arnie` available [here](https://pypi.org/project/arnie/)
50 | 
51 | Arnie package release is automated via Github Actions. The [release workflow](https://github.com/DasLab/arnie/actions/workflows/release.yml) builds the package for distribution, publishes to PyPI and releases a Github release. The action is triggered on new git tag push. 
52 | 
53 | To push a new release, update the `pyproject.toml` version number as appropriate (we follow the [semantic versioning](https://semver.org/) standard). Next, define a matching git tag for the version number, and then push to Github.
54 | ```
55 | git checkout master
56 | git tag -a v1.1.0 -m "Arnie Release v1.1.0"
57 | git push origin tag v1.1.0 
58 | ```


--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <title>Document</title>
 6 |   <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
 7 |   <meta name="description" content="Description">
 8 |   <meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=1.0">
 9 |   <meta
10 |   name="viewport"
11 |   content="width=device-width, initial-scale=1.0, minimum-scale=1.0"
12 | />
13 | <link
14 |   rel="stylesheet"
15 |   href="//cdn.jsdelivr.net/npm/docsify@4/lib/themes/vue.css"
16 |   title="vue"
17 | />
18 | <link
19 |   rel="stylesheet"
20 |   href="//cdn.jsdelivr.net/npm/docsify@4/lib/themes/dark.css"
21 |   title="dark"
22 |   disabled
23 | />
24 | <link
25 |   rel="stylesheet"
26 |   href="//cdn.jsdelivr.net/npm/docsify@4/lib/themes/buble.css"
27 |   title="buble"
28 |   disabled
29 | />
30 | <link
31 |   rel="stylesheet"
32 |   href="//cdn.jsdelivr.net/npm/docsify@4/lib/themes/pure.css"
33 |   title="pure"
34 |   disabled
35 | />
36 | <style>
37 |   /* nav.app-nav li ul {
38 |     min-width: 100px;
39 |   } */
40 | 
41 |   .sidebar-nav li {
42 |     font-weight: 900;
43 |   }
44 | </style>
45 | </head>
46 | <body>
47 |   <div id="app"></div>
48 |   <script>
49 |     window.$docsify = {
50 |       name: '',
51 |       repo: 'https://github.com/DasLab/arnie',
52 |       loadSidebar: true,
53 |       subMaxLevel: 2
54 |     }
55 |   </script>
56 |   <!-- Docsify v4 -->
57 |   <script src="//cdn.jsdelivr.net/npm/docsify@4"></script>
58 | </body>
59 | </html>
60 | 


--------------------------------------------------------------------------------
/docs/setup/environment.md:
--------------------------------------------------------------------------------
 1 | # Setting up an arnie environment
 2 | 
 3 | `arnie` is a Python package to simplify interacting with various RNA prediction and analysis libraries. To work, the `arnie` package needs to know the location of those libraries on the local filesystem. `arnie` uses environment variables to point to package locations. 
 4 | 
 5 | ## Environment Variables
 6 | Here we assume you've already installed a package you want arnie to use (visit the [supported packages page](/setup/packages.md) for more details about specific package installation requirements). Arnie expects environment variables in the form of "{package_name}_PATH". So for `contrafold`, we specify its installed location for arnie with `export contrafold_PATH=/path/to/executable/`. Certain packages require additional resources for arnie to operate. For example, `SpotRNA` also requires a pointer to the conda environment it is installed with. The [supported packages page](/setup/packages.md) details each package's expected environment variables.
 7 | 
 8 | Arnie also expects an `arnie_TMP` environment variable to define where arnie should write temporary files to. Some predictor packages write to files to generate their output; arnie uses the `arnie_TMP` location to support these packages.
 9 | 
10 | ## Arnie File
11 | As a fallback, you can also specify an "arnie_file.txt" that defines these paths. There is an example arnie_file.txt included in the arnie repo that demonstrates the expected syntax. If using the arnie_file approach, you need to set an `ARNIEFILE` environment variable pointing to your arnie_file.txt (e.g, `export ARNIEFILE="/path/to/arnie/<my_file.txt>"`)
12 | 
13 | ## Conda Environments
14 | We recommend using [conda](https://anaconda.org/anaconda/conda) to set up private Python execution environments for your arnie operations. Conda simplifies the sometimes complicated process of managing Python dependencies by creating virtual environments that isolate installed packages. Conda also supports simplified distribution of a wide range of scientific Python libraries, and even a number of RNA structure packages. We recommend the following setup for your RNA science conda environment.
15 | ```
16 | conda config --add channels bioconda
17 | conda config --add channels conda-forge
18 | conda config --set channel_priority strict
19 | ```
20 | [Bioconda](https://bioconda.github.io/) and [Conda-Forge](https://conda-forge.org/) are distribution channels for conda packages. Bioconda, for instance, hosts `ViennaRNA` and `Eternafold` RNA packages
21 | 
22 | We set up an example conda environment to support our arnie work below. First we create the environment. Next we activate the environment, which sets up our isolated Python execution environment. After activation, we pip install arnie (which will be installed into the isolated environment with proper PYTHONPATH handling). 
23 | ```
24 | conda create -n rna-env
25 | conda activate rna-env
26 | pip install arnie
27 | ```


--------------------------------------------------------------------------------
/docs/setup/install.md:
--------------------------------------------------------------------------------
 1 | # Installing Arnie
 2 | We recommend installing `arnie` as a package from [PyPI](https://pypi.org/project/arnie/) via pip.
 3 | ```
 4 | pip install arnie
 5 | ```
 6 | 
 7 | You can also use pip to install arnie directly from our Github repo:
 8 | ```
 9 | pip install git+https://github.com/DasLab/arnie
10 | ```
11 | 
12 | ## Installing via Github
13 | Alternatively, the `arnie` package source code is hosted on [Github](https://github.com/DasLab/arnie). You can install via source code as below.
14 | 
15 | ```
16 | git clone https://github.com/DasLab/arnie.git
17 | ```
18 | 
19 | Note that if you install via the Github Repo, you will need to add the path to arnie to your PYTHONPATH (for example, in your .bashrc as `export PYTHONPATH=$PYTHONPATH:/path/to/arnie`). 


--------------------------------------------------------------------------------
/docs/setup/packages.md:
--------------------------------------------------------------------------------
 1 | # Supported Packages
 2 | 
 3 | ## Eternafold
 4 | 
 5 | ## Contrafold
 6 | 
 7 | ## Vienna
 8 | 
 9 | ## NuPACK
10 | 
11 | ## RNAstructure
12 | 
13 | ## RNAsoft
14 | 
15 | 


--------------------------------------------------------------------------------
/docs/sherlock/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DasLab/arnie/660de8139bd2198bbe115adadd5bc5f12183f9f4/docs/sherlock/README.md


--------------------------------------------------------------------------------
/docs/sherlock/environment.md:
--------------------------------------------------------------------------------
 1 | # Sherlock
 2 | 
 3 | [Sherlock](https://www.sherlock.stanford.edu/), Stanford's high-performance computing cluster, is a useful resource for compute-intensive arnie tasks. 
 4 | 
 5 | If you've never worked with cluster computing before, there are some differences in how things work from your laptop. The [Sherlock docs](https://www.sherlock.stanford.edu/docs/) are a great place to start, and the Sherlock team even does [onboarding sessions and office hours](https://www.sherlock.stanford.edu/docs/#onboarding-sessions) to help new users.
 6 | 
 7 | ## Storage on Sherlock
 8 | The first thing to understand when setting up your Sherlock environment is where to store data. Sherlock offers several [data storage systems](https://www.sherlock.stanford.edu/docs/storage/) tailored for specific needs. We recommend using them as follows:
 9 | - `$HOME`: storage for your rna-environment, miniconda install, source code, etc
10 | - `$GROUPHOME`: storage for shared resources or projects that other lab members may access
11 | - `$SCRATCH/$GROUP_SCRATCH`: high-performance storage for large datasets and temporary files (WARNING: files on SCRATCH and GROUP_SCRATCH are automatically purged
12 |  90 days after their last content modification; make sure you back up data there before this window)
13 | - `$LSCRATCH`: node-local SSD; useful for specific jobs where high IOPS are important or when performing large batch jobs that may impact group resources
14 | - `$OAK`: long-term storage of large research datasets
15 | 
16 | ## Installing software on Sherlock
17 | Sherlock provides specific scientific computing software pre-installed on the Sherlock system via ["modules"](https://www.sherlock.stanford.edu/docs/software/modules/). These modules are selected and maintained to provide maximum compatibility and reduce dependency conflicts. You can search for available modules [here](https://www.sherlock.stanford.edu/docs/software/list/). If you want to use Sherlock's module system instead of setting up your own Python environment, we recommend the following modules to load for arnie (and various downstream prediction algorithms):
18 | ```
19 | module load python/3.6.1
20 | module load py-numpy/1.18.1_py36 
21 | module load py-pandas/1.0.3_py36
22 | module load py-scipy/1.4.1_py36
23 | module load gcc
24 | module load glpk
25 | module load mpfr
26 | ```
27 | 
28 | ## Setting up your environment on Sherlock
29 | Setting up your environment on Sherlock is fairly straightforward. First, set up a folder for yourself in $GROUPHOME (`mkdir $GROUPHOME/{your_name}`) to store shared resources. Next, we'll install [miniconda](https://docs.anaconda.com/miniconda/) for Python environment management and package installation. After installing miniconda, we can configure conda with useful package channels, create and activate an rna-env environment to store our packages, and install arnie. We provide an environment.yaml folder at `$GROUPHOME/rna-env/rna-environment.yaml` to create a standard environment with some standard packages. 
30 | ```
31 | conda config --add channels bioconda
32 | conda config --add channels conda-forge
33 | conda config --set channel_priority strict
34 | conda create -n rna-env -f $GROUPHOME/rna-env/rna-environment.yaml
35 | conda activate rna-env
36 | pip install arnie
37 | ```
38 | 
39 | Your new conda environment has arnie and a few predictors installed. However, many prediction libraries are not available via conda or pip and usually require installing from source. The lab maintains a directory of predictors on Sherlock that you should copy to your $HOME directory.
40 | ```
41 | cd $HOME
42 | git clone $GROUPHOME/rna-env
43 | ```
44 | Predictors are stored under `rna-env/predictors`. If you add new predictors in the course of your work, make sure to push your updates back to the $GROUPHOME origin repo.
45 | 
46 | Now that your environment is set up, let's take a look at [using Sherlock for compute jobs with arnie](jobs.md).


--------------------------------------------------------------------------------
/docs/sherlock/jobs.md:
--------------------------------------------------------------------------------
 1 | # Jobs on Sherlock
 2 | 
 3 | [Running jobs](https://www.sherlock.stanford.edu/docs/user-guide/running-jobs/) on Sherlock is a little different. In order to fairly distribute cluster resources, Sherlock uses a scheduler called SLURM. Users define what work they want done in job files and submit them to the scheduler, which allocates the requested compute resources when it can. When your job is allocated resources, compute nodes with the requested resources will run your job automatically.
 4 | 
 5 | ## Batch Jobs
 6 | > A job is simply an instance of your program, for example your R, Python or Matlab script that is submitted to and executed by the scheduler (Slurm). When you submit a job with the `sbatch` command it's called a batch job and it will either run immediately or will pend (wait) in the queue.
 7 | 
 8 | The [Sherlock jobs docs](https://www.sherlock.stanford.edu/docs/user-guide/running-jobs/#batch-jobs) are fairly comprehensive and will provide more detail than we can here. We will provide a few example batch scripts to demonstrate some standard uses. 
 9 | 
10 | ## Interactive jobs
11 | It can be helpful when initially creating a job to work on it interactively on a compute node like the ones that will run your job. You can request a compute node via the command `sh_dev`. By default, sh_dev allocates one core and 4 GB of memory on one node for one hour. See [the docs](https://www.sherlock.stanford.edu/docs/user-guide/running-jobs/#interactive-jobs) for more details about requesting an interactive node.
12 | 
13 | ## Interactive applications
14 | Sherlock provides several [interactive applications](https://www.sherlock.stanford.edu/docs/user-guide/ondemand/?h=jupyter#interactive-applications) if you need to run GUI based interactive software. When working with arnie you will most likely use [JupyterNotebooks](https://www.sherlock.stanford.edu/docs/user-guide/ondemand/?h=jupyter#jupyter-notebooks) or [JupyterLab](https://www.sherlock.stanford.edu/docs/user-guide/ondemand/?h=jupyter#jupyterlab) to interactively explore your research questions. 
15 | 
16 | ## Important Notes
17 | We have run into some common issues using Sherlock over the years. Here's a non-comprehensive list of things to watch out for while using Sherlock.
18 | 
19 | - **Permissions errors**:
20 | 
21 |   If you plan on working on projects with group members and share files in `$GROUPHOME`, remember to set the permissions of files you create to be group accessible. By default, files will be read-only for group members. `chmod -R 770 /path/to/file` will allow group members to read, write, and execute shared files in `$GROUPHOME`.
22 | 
23 | - **Large array jobs impacting $GROUPHOME**:
24 | 
25 |   Be careful about accessing resources in `$GROUPHOME` when running large array jobs. Thousands of the same job accessing the same files on `$GROUPHOME` can slow down file access for other lab members. In many cases, your code may not be the one accessing files in `$GROUPHOME`, but a predictor you're using might (`spotrna` causes this issue often). The best solution is to copy the files you're accessing to your `$SCRATCH` folder and access them there. If you have large array jobs requesting thousands of nodes, you may want to copy the files to the node's `$LSCRATCH` instead. See the array job example sbatch file for more details. 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/docs/usage/README.md:
--------------------------------------------------------------------------------
 1 | # Using Arnie
 2 | Arnie's primary purpose is to simplify the process of making structure predictions for an RNA sequence with a variety of structure prediction libraries. 
 3 | 
 4 | ## RNA structure
 5 | RNA molecules form complex three-dimensional shapes in nature. We represent these forms in three structure levels of increasing complexity. 
 6 | 
 7 | 1. **Primary Structure**
 8 | The primary structure of an RNA molecule is the base identity of the various nucleotides that make up the molecule. This sequence string is typically written in the 5' to 3' direction.
 9 | Example: "AGUAUCAAAAAAGAUAC"
10 | 
11 | 2. **Secondary Structure**
12 | The secondary structure of an RNA molecule is the set of base paring interactions between nucleotides in an RNA molecule. There are multiple ways to computationally represent secondary structure, although arnie primarily uses two: the base pairing matrix and the dot bracket string.
13 | 
14 |   A ***base pairing matrix*** is an NxN matrix (where N is the length of the RNA sequence), with the value of the `i,j` position representing the probability of the `i` nucleotide pairing with the `j` nucleotide.
15 | 
16 |   A ***dot bracket string*** is a representation of secondary structure where `(` and `)` characters represent base pairs and `.` characters represent unpaired bases. For example, `((....))` in dot bracket notation indicates that the 1st nucleotide is paired with the 8th nucleotide, the 2nd nucleotide is paired with the 7th nucleotide, and the others are unpaired. More complex secondary structures can also be represented in dot bracket notation (see [Pseudoknots](usage/pseudoknots.md) for more details).
17 | 
18 |   Arnie provides [several methods to predict secondary structures](usage/structure_prediction.md).
19 | 
20 | 3. **Tertiary Structure**
21 | The tertiary structure is the three-dimensional structure of the RNA molecule, with each atom located in a 3D coordinate space. Arnie doesn't work with this level of structure.
22 | 
23 | ## Examples
24 | The easiest way to get started with arnie is trying out our example notebooks to explore the functionality arnie provides.
25 | 
26 | - [Basic Introduction / Install](https://github.com/daslab/arnie/blob/master/notebooks/IntroToArnie.ipynb)
27 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/daslab/arnie/blob/master/notebooks/IntroToArnie.ipynb)
28 | 


--------------------------------------------------------------------------------
/docs/usage/pseudoknots.md:
--------------------------------------------------------------------------------
  1 | # Pseudoknots
  2 | 
  3 | [Pseudoknots](https://en.wikipedia.org/wiki/Pseudoknot) are a more complex form of secondary structure. 
  4 | 
  5 | <img src='/assets/pseudoknot.png' alt="example of a pseudoknot"></img>
  6 | 
  7 | Unpaired bases in a loop structure may pair with nucleotides elsewhere in the RNA sequence. This type of pairing is impossible to represent with the `(`, `)`, `.` and characters in traditional dot bracket notation, so we introduce new characters to represent various levels of pseudoknot pairings. In order, Arnie uses `[`, `{`, `<`, and lower case alphabet characters (`abc...`) to represent opening pairs, and `]`, `}`, `>`, and upper case alphabet characters (`ABC...`) to represent closing pairs. 
  8 | 
  9 | Here is an example pseudoknotted structure in dot bracket notation utilizing the expanded character set `...(((..[[[.(((...))))))]]]...`.
 10 | 
 11 | Many traditional structure prediction algorithms struggle with predicting pseudoknot structures, but there are a variety of approaches that can predict these complex folds. Arnie provides two main functions to predict pseudoknots: `pk_predict` and `pk_predict_from_bpp`.
 12 | 
 13 | ## pk_predict
 14 | `pk_predict` takes an input RNA sequence string and returns a predicted secondary structure string in dot bracket notation that may include pseudoknots. It's very similar to the `mfe` function, but supports a different set of predictor packages that focus on pseudoknot prediction. 
 15 | 
 16 | **Args:**
 17 | ```
 18 |   seq (str): nucleic acid sequence, required
 19 |   predictor (str): the folding library to use
 20 |   ipknot options:
 21 |     model: one of ["LinearPartition-C","LinearPartition-V","Boltzmann","ViennaRNA","CONTRAfold","NUPACK"]
 22 |     t1: probability threshold level 1 
 23 |     t2: probability threshold level 2
 24 |     refinement: number of times for refinment
 25 | 
 26 |   hotknots options:
 27 |     model: one of ["CC","RE","DP"]
 28 |     param: one of ["parameters_CC06.txt","parameters_CC09.txt","parameters_DP03.txt","parameters_DP09.txt"]
 29 | 
 30 |   spotrna options:
 31 |     cpu: number cpu threads
 32 | ```
 33 | 
 34 | **Returns:**
 35 | ```
 36 |   A string in dot bracket notation representing the predicted secondary structure of the provided sequence, potentially including pseudoknots.
 37 | ```
 38 | 
 39 | **Example:** 
 40 | ```
 41 | pk_predict("GUAUCAAAAAAGAUACGCCGUAUGCUAAUAUGUAUCUAUACUUGCUCUACAGGUUGAG", "knotty")
 42 | 
 43 | '..........(((((([[[[[[.[[...[[[))))))]]]...]]..]]].]]]....'
 44 | ```
 45 | 
 46 | **Supported packages:**
 47 | - `hotknots`
 48 | - `ipknot`
 49 | - `knotty`
 50 | - `spotrna`
 51 | - `spotrna2`
 52 | - `e2efold`
 53 | - `pknots`
 54 | - `nupack`
 55 | 
 56 | ## pk_predict_from_bpp
 57 | `pk_predict_from_bpp` takes a different approach to pseudoknot prediction. Rather than use dedicated pseudoknot prediction packages, `pk_predict_from_bpp` uses post-processing algorithms that can predict likely pseudoknots based on a sequence's predicted base pair probability matrix. This allows us to examine sequences for predicted pseudoknots with traditional predictive models that don't support pseudoknots by default. 
 58 | 
 59 | `pk_predict_from_bpp` provides two processing algorithms, [`threshknot`](https://arxiv.org/abs/1912.12796) and [`hungarian`](https://en.wikipedia.org/wiki/Hungarian_algorithm).
 60 | 
 61 | **Args:**
 62 | ```
 63 |   bpp (array): base pair probability matrix, required
 64 |   heuristic (str): the pk prediction algorithm to use; either "hungarian" or "threshknot"
 65 |   threshknot options:
 66 |     theta
 67 |     max_iter
 68 |     allowed_buldge_len
 69 |     min_len_helix
 70 | 
 71 |   hungarian options:
 72 |     add_p_unpaired
 73 |     theta (aka prob_to_0_threshold_post)
 74 |     prob_to_0_threshold_prior
 75 |     prob_to_1_threshold_prior
 76 |     exp
 77 |     sigmoid_slope_factor
 78 |     ln
 79 |     allowed_buldge_len
 80 |     min_len_helix
 81 | ```
 82 | 
 83 | **Returns:**
 84 | ```
 85 |   A string in dot bracket notation representing the predicted secondary structure of the provided sequence, potentially including pseudoknots.
 86 | ```
 87 | 
 88 | **Example:** 
 89 | ```
 90 | bpps = bpps("GUAUCAAAAAAGAUACGCCGUAUGCUAAUAUGUAGGCGCUAUACUUGCUCUACACCGGCGGUUGAG", package="eternafold")
 91 | pk_predict_bpp(bpps)
 92 | 
 93 | '(((((......)))))..........................................'
 94 | ```
 95 | 
 96 | **Supported packages:**
 97 | - `eternafold`
 98 | - `contrafold`
 99 | - `vienna`
100 | - `nupack`
101 | - `rnasoft`
102 | - `rnastructure`
103 | - `vfold`
104 | 
105 | 


--------------------------------------------------------------------------------
/docs/usage/structure_prediction.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ## Structure Prediction
  3 | 
  4 | ## MFE
  5 | The `mfe` function generates a "minimum free energy" structure prediction with the selected package. The minimum free energy prediction is the secondary structure calculated to have the lowest free energy value. In theory, the lower the free energy, the more likely the structure is to form. Not all predictors support free energy-based estimates (although many do). 
  6 | 
  7 | Note: `mfe` operates differently than [`mea`](#mea). That said, contrafold's default structure prediction is an MEA structure, not MFE. When using `mfe`, calling contrafold returns the default MEA structure unless the `--viterbi` flag is used, which will use the viterbi (MFE) algorithm in contrafold. 
  8 | 
  9 | 
 10 | **Args:**
 11 | ```
 12 |   seq (str): nucleic acid sequence, required
 13 |   package (str): the folding library to use
 14 |   T (float): temperature (Celsius)
 15 |   constraint (str): structure constraints
 16 |   motif (str): argument to vienna motif 
 17 |   linear (bool): call LinearFold to estimate MFE in Vienna or Contrafold
 18 |   return_dG_MFE (bool): also return dG(MFE) (specific to linearfold)
 19 |   dangles (bool): dangles or not (specific to linearfold)
 20 |   noncanonical(bool): include noncanonical pairs or not (specific to contrafold, RNAstructure (Cyclefold))
 21 |   param_file(str): path to specific thermodynamic parameter file (specific to contrafold, eternafold)
 22 |   coaxial (bool): coaxial stacking or not (specific to rnastructure)
 23 |   viterbi (bool): use the viterbi algorithm for mfe calculation (specific to contrafold)
 24 |   pseudo (bool): if True, will predict pseudoknots
 25 |   shape_signal (list): list of normalized SHAPE reactivities, with negative values indicating no signal (specific to rnastructure)
 26 |   dms_signal (list): list of normalized DMS reactivities, with negative values indicating no signal (specific to rnastructure)
 27 |   shape_file (str): path to file containing shape_signal (specific to rnastructure)
 28 |   dms_file (str): path to file containing dms_signal (specific to rnastructure)
 29 | ```
 30 | 
 31 | **Returns:**
 32 | ```
 33 |   A string in dot bracket notation representing the calculated MFE structure of the provided sequence.
 34 | ```
 35 | 
 36 | **Example:** 
 37 | ```
 38 | mfe("GUAUCAAAAAAGAUAC")
 39 | '(((((......)))))'
 40 | ```
 41 | 
 42 | **Supported packages:**
 43 | - `eternafold`
 44 | - `contrafold`
 45 | - `vienna`
 46 | - `rnastructure`
 47 | - `linearfold`
 48 | 
 49 | ## BPPS
 50 | The `bpps` function calculates the "base pairing probability matrix" with the selected package. The base pairing probaility matrix is an NxN matrix (where N is the length of the RNA sequence), with the value of the `i,j` position representing the probability of the `i` nucleotide pairing with the `j` nucleotide.
 51 | 
 52 | **Args:**
 53 | ```
 54 |   sequence (str): nucleic acid sequence, required
 55 |   package (str): the folding library to use
 56 |   constraint (str): structure constraint [vienna, contrafold, rnastructure]
 57 |   linear (bool): call LinearPartition to estimate Z in Vienna or Contrafold
 58 | 
 59 |   motif (str): argument to vienna motif
 60 |   pseudo (bool): (NUPACK only) include pseudoknot calculation
 61 |   dangles (bool): dangles or not, specifiable for vienna, nupack
 62 |   dna (bool): (NUPACK only) use SantaLucia 1998 parameters for DNA
 63 |   coaxial (bool): coaxial stacking or not, specifiable for rnastructure, vfold
 64 |   noncanonical(bool): include noncanonical pairs or not (for contrafold, RNAstructure (Cyclefold))
 65 |   beam size (int): Beam size for LinearPartition base pair calculation.
 66 |   DEBUG (bool): Output command-line calls to packages.
 67 |   threshknot (bool): calls threshknot to predict pseudoknots (for contrafold with LinearPartition)
 68 |   shape_signal (list): list of normalized SHAPE reactivities, with negative values indicating no signal (specific to rnastructure)
 69 |   dms_signal (list): list of normalized DMS reactivities, with negative values indicating no signal (specific to rnastructure)
 70 |   shape_file (str): path to file containing shape_signal (specific to rnastructure)
 71 |   dms_file (str): path to file containing dms_signal (specific to rnastructure)
 72 | ```
 73 | 
 74 | **Returns:**
 75 | ```
 76 |   array: NxN matrix of base pair probabilities
 77 | ```
 78 | 
 79 | **Example:** 
 80 | ```
 81 | bpps("GUAUCAAAAAAGAUAC")
 82 | array([[0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 3.77178e-04,
 83 |         0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
 84 |         0.00000e+00, 0.00000e+00, 0.00000e+00, 4.39771e-04, 0.00000e+00,
 85 |         8.24776e-01],
 86 |        [0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
 87 |         0.00000e+00, 1.69534e-04, 2.01963e-04, 1.93469e-04, 2.05658e-04,
 88 |         2.01099e-04, 1.37709e-04, 5.21924e-04, 0.00000e+00, 8.42528e-01,
 89 |         0.00000e+00],
 90 | ...
 91 | ```
 92 | 
 93 | **Supported packages:**
 94 | - `eternafold`
 95 | - `contrafold`
 96 | - `vienna`
 97 | - `nupack`
 98 | - `rnasoft`
 99 | - `rnastructure`
100 | - `vfold`


--------------------------------------------------------------------------------
/docs/usage/utilities.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DasLab/arnie/660de8139bd2198bbe115adadd5bc5f12183f9f4/docs/usage/utilities.md


--------------------------------------------------------------------------------
/example_arnie_file.txt:
--------------------------------------------------------------------------------
 1 | # paths to local installations of packages.  If package is not installed, leave as None
 2 | # Replace paths below with paths to your installations.
 3 | # NB: .gitignore file ignores *.arnie files. Name it as such if you don't want your local path file 
 4 | # included with your git repo.
 5 | 
 6 | rnastructure: /path/to/RNAstructure/exe
 7 | rnasoft: /path/to/MultiRNAFold
 8 | contrafold_2: /path/to/contrafold-se/src
 9 | eternafold: /path/to/eternafold/src/
10 | vfold: /path/to/Vfold2D
11 | nupack: /path/to/nupack3.2.2/build/bin
12 | 
13 | # for a Mac installed binary:
14 | vienna_2: /usr/local/bin
15 | # for path to a vienna build:
16 | vienna_2: /path/to/ViennaRNA-2.4.10/src/bin
17 | vienna_1: /path/to/ViennaRNA-1.8.5/bin
18 | 
19 | # for linear partition
20 | linearfold: /path/to/LinearFold/bin
21 | linearpartition: /path/to/LinearPartition/bin
22 | 
23 | # for PK predictors
24 | hotknots: /path/to/HotKnots_v2.0/bin
25 | ipknot: /path/to/ipknot/build
26 | knotty: /path/to/Knotty
27 | pknots: /path/to/PKNOTS/bin
28 | spotrna: /path/to/SPOT-RNA
29 | spotrna_conda_env: /path/to/miniconda3/envs/spotrna/bin
30 | spotrna2: /path/to/SPOT-RNA2
31 | e2efold: /path/to/e2efold/e2efold_productive
32 | e2efold_conda_env: /path/to/miniconda3/envs/e2efold/bin
33 | 
34 | #TMP: location for tmp files for packages. Update to where you want your tmp files stored.
35 | TMP: /tmp
36 | 


--------------------------------------------------------------------------------
/notebooks/README.md:
--------------------------------------------------------------------------------
1 | # Notebooks
2 | This directory houses various notebooks demonstrating key Arnie functionality. 
3 | 
4 | - [Basic Introduction / Install](https://github.com/daslab/arnie/blob/master/notebooks/IntroToArnie.ipynb)
5 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/daslab/arnie/blob/master/notebooks/IntroToArnie.ipynb)


--------------------------------------------------------------------------------
/parameter_files/contrafold.params.complementary:
--------------------------------------------------------------------------------
  1 | base_pair_AA 0
  2 | base_pair_AC 0
  3 | base_pair_AG 0
  4 | base_pair_AU 0.59791199
  5 | base_pair_CC 0
  6 | base_pair_CG 1.544290641
  7 | base_pair_CU 0
  8 | base_pair_GG 0
  9 | base_pair_GU -0.01304754992
 10 | base_pair_UU 0
 11 | terminal_mismatch_AAAA 0
 12 | terminal_mismatch_AAAC 0
 13 | terminal_mismatch_AAAG 0
 14 | terminal_mismatch_AAAU 0
 15 | terminal_mismatch_AACA 0
 16 | terminal_mismatch_AACC 0
 17 | terminal_mismatch_AACG 0
 18 | terminal_mismatch_AACU 0
 19 | terminal_mismatch_AAGA 0
 20 | terminal_mismatch_AAGC 0
 21 | terminal_mismatch_AAGG 0
 22 | terminal_mismatch_AAGU 0
 23 | terminal_mismatch_AAUA 0
 24 | terminal_mismatch_AAUC 0
 25 | terminal_mismatch_AAUG 0
 26 | terminal_mismatch_AAUU 0
 27 | terminal_mismatch_ACAA 0
 28 | terminal_mismatch_ACAC 0
 29 | terminal_mismatch_ACAG 0
 30 | terminal_mismatch_ACAU 0
 31 | terminal_mismatch_ACCA 0
 32 | terminal_mismatch_ACCC 0
 33 | terminal_mismatch_ACCG 0
 34 | terminal_mismatch_ACCU 0
 35 | terminal_mismatch_ACGA 0
 36 | terminal_mismatch_ACGC 0
 37 | terminal_mismatch_ACGG 0
 38 | terminal_mismatch_ACGU 0
 39 | terminal_mismatch_ACUA 0
 40 | terminal_mismatch_ACUC 0
 41 | terminal_mismatch_ACUG 0
 42 | terminal_mismatch_ACUU 0
 43 | terminal_mismatch_AGAA 0
 44 | terminal_mismatch_AGAC 0
 45 | terminal_mismatch_AGAG 0
 46 | terminal_mismatch_AGAU 0
 47 | terminal_mismatch_AGCA 0
 48 | terminal_mismatch_AGCC 0
 49 | terminal_mismatch_AGCG 0
 50 | terminal_mismatch_AGCU 0
 51 | terminal_mismatch_AGGA 0
 52 | terminal_mismatch_AGGC 0
 53 | terminal_mismatch_AGGG 0
 54 | terminal_mismatch_AGGU 0
 55 | terminal_mismatch_AGUA 0
 56 | terminal_mismatch_AGUC 0
 57 | terminal_mismatch_AGUG 0
 58 | terminal_mismatch_AGUU 0
 59 | terminal_mismatch_AUAA -0.184546064
 60 | terminal_mismatch_AUAC -0.1181844187
 61 | terminal_mismatch_AUAG -0.4461469607
 62 | terminal_mismatch_AUAU -0.6175254495
 63 | terminal_mismatch_AUCA 0.004788458708
 64 | terminal_mismatch_AUCC 0.08319395146
 65 | terminal_mismatch_AUCG -0.2249479995
 66 | terminal_mismatch_AUCU -0.3981327204
 67 | terminal_mismatch_AUGA 0.5191110288
 68 | terminal_mismatch_AUGC -0.3524119307
 69 | terminal_mismatch_AUGG -0.4056429433
 70 | terminal_mismatch_AUGU -0.7733932162
 71 | terminal_mismatch_AUUA -0.01574403519
 72 | terminal_mismatch_AUUC 0.268570042
 73 | terminal_mismatch_AUUG -0.0934388741
 74 | terminal_mismatch_AUUU 0.3373711531
 75 | terminal_mismatch_CAAA 0
 76 | terminal_mismatch_CAAC 0
 77 | terminal_mismatch_CAAG 0
 78 | terminal_mismatch_CAAU 0
 79 | terminal_mismatch_CACA 0
 80 | terminal_mismatch_CACC 0
 81 | terminal_mismatch_CACG 0
 82 | terminal_mismatch_CACU 0
 83 | terminal_mismatch_CAGA 0
 84 | terminal_mismatch_CAGC 0
 85 | terminal_mismatch_CAGG 0
 86 | terminal_mismatch_CAGU 0
 87 | terminal_mismatch_CAUA 0
 88 | terminal_mismatch_CAUC 0
 89 | terminal_mismatch_CAUG 0
 90 | terminal_mismatch_CAUU 0
 91 | terminal_mismatch_CCAA 0
 92 | terminal_mismatch_CCAC 0
 93 | terminal_mismatch_CCAG 0
 94 | terminal_mismatch_CCAU 0
 95 | terminal_mismatch_CCCA 0
 96 | terminal_mismatch_CCCC 0
 97 | terminal_mismatch_CCCG 0
 98 | terminal_mismatch_CCCU 0
 99 | terminal_mismatch_CCGA 0
100 | terminal_mismatch_CCGC 0
101 | terminal_mismatch_CCGG 0
102 | terminal_mismatch_CCGU 0
103 | terminal_mismatch_CCUA 0
104 | terminal_mismatch_CCUC 0
105 | terminal_mismatch_CCUG 0
106 | terminal_mismatch_CCUU 0
107 | terminal_mismatch_CGAA 0.08386423535
108 | terminal_mismatch_CGAC -0.2520716816
109 | terminal_mismatch_CGAG -0.6711841881
110 | terminal_mismatch_CGAU -0.3816350028
111 | terminal_mismatch_CGCA 0.1117852189
112 | terminal_mismatch_CGCC -0.1704393624
113 | terminal_mismatch_CGCG -0.2179987732
114 | terminal_mismatch_CGCU -0.459267635
115 | terminal_mismatch_CGGA 0.8520640313
116 | terminal_mismatch_CGGC -0.9332488517
117 | terminal_mismatch_CGGG -0.3289551692
118 | terminal_mismatch_CGGU -0.7778822056
119 | terminal_mismatch_CGUA -0.2422339958
120 | terminal_mismatch_CGUC -0.03780509247
121 | terminal_mismatch_CGUG -0.4322334143
122 | terminal_mismatch_CGUU -0.2419976114
123 | terminal_mismatch_CUAA 0
124 | terminal_mismatch_CUAC 0
125 | terminal_mismatch_CUAG 0
126 | terminal_mismatch_CUAU 0
127 | terminal_mismatch_CUCA 0
128 | terminal_mismatch_CUCC 0
129 | terminal_mismatch_CUCG 0
130 | terminal_mismatch_CUCU 0
131 | terminal_mismatch_CUGA 0
132 | terminal_mismatch_CUGC 0
133 | terminal_mismatch_CUGG 0
134 | terminal_mismatch_CUGU 0
135 | terminal_mismatch_CUUA 0
136 | terminal_mismatch_CUUC 0
137 | terminal_mismatch_CUUG 0
138 | terminal_mismatch_CUUU 0
139 | terminal_mismatch_GAAA 0
140 | terminal_mismatch_GAAC 0
141 | terminal_mismatch_GAAG 0
142 | terminal_mismatch_GAAU 0
143 | terminal_mismatch_GACA 0
144 | terminal_mismatch_GACC 0
145 | terminal_mismatch_GACG 0
146 | terminal_mismatch_GACU 0
147 | terminal_mismatch_GAGA 0
148 | terminal_mismatch_GAGC 0
149 | terminal_mismatch_GAGG 0
150 | terminal_mismatch_GAGU 0
151 | terminal_mismatch_GAUA 0
152 | terminal_mismatch_GAUC 0
153 | terminal_mismatch_GAUG 0
154 | terminal_mismatch_GAUU 0
155 | terminal_mismatch_GCAA -0.1703136025
156 | terminal_mismatch_GCAC -0.09154056357
157 | terminal_mismatch_GCAG -0.2522413002
158 | terminal_mismatch_GCAU -0.8520314799
159 | terminal_mismatch_GCCA 0.04763224188
160 | terminal_mismatch_GCCC -0.2428654283
161 | terminal_mismatch_GCCG -0.2079275061
162 | terminal_mismatch_GCCU -0.1874270053
163 | terminal_mismatch_GCGA 0.6540033983
164 | terminal_mismatch_GCGC -0.7823988605
165 | terminal_mismatch_GCGG 0.1995898255
166 | terminal_mismatch_GCGU -0.4432169392
167 | terminal_mismatch_GCUA -0.1736921762
168 | terminal_mismatch_GCUC 0.288494362
169 | terminal_mismatch_GCUG -0.01638238057
170 | terminal_mismatch_GCUU 0.6757988971
171 | terminal_mismatch_GGAA 0
172 | terminal_mismatch_GGAC 0
173 | terminal_mismatch_GGAG 0
174 | terminal_mismatch_GGAU 0
175 | terminal_mismatch_GGCA 0
176 | terminal_mismatch_GGCC 0
177 | terminal_mismatch_GGCG 0
178 | terminal_mismatch_GGCU 0
179 | terminal_mismatch_GGGA 0
180 | terminal_mismatch_GGGC 0
181 | terminal_mismatch_GGGG 0
182 | terminal_mismatch_GGGU 0
183 | terminal_mismatch_GGUA 0
184 | terminal_mismatch_GGUC 0
185 | terminal_mismatch_GGUG 0
186 | terminal_mismatch_GGUU 0
187 | terminal_mismatch_GUAA -0.4871607613
188 | terminal_mismatch_GUAC 0.1105031953
189 | terminal_mismatch_GUAG 0.363373916
190 | terminal_mismatch_GUAU -0.6193199348
191 | terminal_mismatch_GUCA 0.3451056056
192 | terminal_mismatch_GUCC 0.0314944976
193 | terminal_mismatch_GUCG -0.3799172956
194 | terminal_mismatch_GUCU -0.03222973182
195 | terminal_mismatch_GUGA 0.4948638637
196 | terminal_mismatch_GUGC -0.2821952552
197 | terminal_mismatch_GUGG -0.2702227211
198 | terminal_mismatch_GUGU -0.06658395291
199 | terminal_mismatch_GUUA -0.4306154451
200 | terminal_mismatch_GUUC -0.09497863465
201 | terminal_mismatch_GUUG -0.3130794485
202 | terminal_mismatch_GUUU -0.2283242981
203 | terminal_mismatch_UAAA 0.0115363879
204 | terminal_mismatch_UAAC -0.3923408221
205 | terminal_mismatch_UAAG 0.05661063599
206 | terminal_mismatch_UAAU -0.1251485388
207 | terminal_mismatch_UACA -0.06545074758
208 | terminal_mismatch_UACC -0.3167200568
209 | terminal_mismatch_UACG 0.002258383981
210 | terminal_mismatch_UACU -0.422217724
211 | terminal_mismatch_UAGA 0.5458416646
212 | terminal_mismatch_UAGC -0.2085887954
213 | terminal_mismatch_UAGG -0.1971766062
214 | terminal_mismatch_UAGU -0.4722410132
215 | terminal_mismatch_UAUA -0.1779642496
216 | terminal_mismatch_UAUC 0.1643454344
217 | terminal_mismatch_UAUG -0.5005617032
218 | terminal_mismatch_UAUU 0.1333867679
219 | terminal_mismatch_UCAA 0
220 | terminal_mismatch_UCAC 0
221 | terminal_mismatch_UCAG 0
222 | terminal_mismatch_UCAU 0
223 | terminal_mismatch_UCCA 0
224 | terminal_mismatch_UCCC 0
225 | terminal_mismatch_UCCG 0
226 | terminal_mismatch_UCCU 0
227 | terminal_mismatch_UCGA 0
228 | terminal_mismatch_UCGC 0
229 | terminal_mismatch_UCGG 0
230 | terminal_mismatch_UCGU 0
231 | terminal_mismatch_UCUA 0
232 | terminal_mismatch_UCUC 0
233 | terminal_mismatch_UCUG 0
234 | terminal_mismatch_UCUU 0
235 | terminal_mismatch_UGAA 0.1218741278
236 | terminal_mismatch_UGAC 0.1990260141
237 | terminal_mismatch_UGAG 0.04681893928
238 | terminal_mismatch_UGAU 0.3256264491
239 | terminal_mismatch_UGCA 0.1186812326
240 | terminal_mismatch_UGCC -0.1851065102
241 | terminal_mismatch_UGCG -0.04311512683
242 | terminal_mismatch_UGCU -0.6150608139
243 | terminal_mismatch_UGGA 0.754933218
244 | terminal_mismatch_UGGC -0.3150708483
245 | terminal_mismatch_UGGG 0.1569582926
246 | terminal_mismatch_UGGU -0.514970007
247 | terminal_mismatch_UGUA -0.2926246029
248 | terminal_mismatch_UGUC 0.1373068149
249 | terminal_mismatch_UGUG -0.05422333363
250 | terminal_mismatch_UGUU 0.03086776921
251 | terminal_mismatch_UUAA 0
252 | terminal_mismatch_UUAC 0
253 | terminal_mismatch_UUAG 0
254 | terminal_mismatch_UUAU 0
255 | terminal_mismatch_UUCA 0
256 | terminal_mismatch_UUCC 0
257 | terminal_mismatch_UUCG 0
258 | terminal_mismatch_UUCU 0
259 | terminal_mismatch_UUGA 0
260 | terminal_mismatch_UUGC 0
261 | terminal_mismatch_UUGG 0
262 | terminal_mismatch_UUGU 0
263 | terminal_mismatch_UUUA 0
264 | terminal_mismatch_UUUC 0
265 | terminal_mismatch_UUUG 0
266 | terminal_mismatch_UUUU 0
267 | hairpin_length_at_least_0 -5.993180158
268 | hairpin_length_at_least_1 -3.108105762
269 | hairpin_length_at_least_2 0.4168976347
270 | hairpin_length_at_least_3 2.205419066
271 | hairpin_length_at_least_4 1.926749692
272 | hairpin_length_at_least_5 -0.5873245329
273 | hairpin_length_at_least_6 -0.0827571778
274 | hairpin_length_at_least_7 0.5783889844
275 | hairpin_length_at_least_8 -0.7220883372
276 | hairpin_length_at_least_9 -0.1725874624
277 | hairpin_length_at_least_10 -0.3025089867
278 | hairpin_length_at_least_11 -0.0296315939
279 | hairpin_length_at_least_12 -0.9268995948
280 | hairpin_length_at_least_13 -0.03157753978
281 | hairpin_length_at_least_14 -0.1022472101
282 | hairpin_length_at_least_15 0.1901407346
283 | hairpin_length_at_least_16 -0.09280909826
284 | hairpin_length_at_least_17 0.1690448408
285 | hairpin_length_at_least_18 -0.08172566471
286 | hairpin_length_at_least_19 -0.3445939031
287 | hairpin_length_at_least_20 -0.109150294
288 | hairpin_length_at_least_21 -0.2903523693
289 | hairpin_length_at_least_22 -0.3393713667
290 | hairpin_length_at_least_23 -0.1915364117
291 | hairpin_length_at_least_24 -0.05019209379
292 | hairpin_length_at_least_25 -0.03874620924
293 | hairpin_length_at_least_26 0.04751470752
294 | hairpin_length_at_least_27 0.06744321926
295 | hairpin_length_at_least_28 0.09721875726
296 | hairpin_length_at_least_29 0.1673131733
297 | hairpin_length_at_least_30 0.2329937249
298 | internal_explicit_1_1 -0.1754591076
299 | internal_explicit_1_2 0.03083787104
300 | internal_explicit_1_3 -0.171565435
301 | internal_explicit_1_4 -0.2294680983
302 | internal_explicit_2_2 -0.1304072693
303 | internal_explicit_2_3 -0.07730329553
304 | internal_explicit_2_4 0.2782767264
305 | internal_explicit_3_3 -0.02898949617
306 | internal_explicit_3_4 0.3112350694
307 | internal_explicit_4_4 -0.3226348245
308 | bulge_length_at_least_1 -2.399548472
309 | bulge_length_at_least_2 -0.8945183117
310 | bulge_length_at_least_3 -0.9088550909
311 | bulge_length_at_least_4 -0.8412474755
312 | bulge_length_at_least_5 -0.4365479343
313 | bulge_length_at_least_6 -0.5699187801
314 | bulge_length_at_least_7 0.2002834224
315 | bulge_length_at_least_8 0.7538761358
316 | bulge_length_at_least_9 -0.6045045455
317 | bulge_length_at_least_10 -0.7200948098
318 | bulge_length_at_least_11 -0.5136721921
319 | bulge_length_at_least_12 -0.3614726679
320 | bulge_length_at_least_13 -0.2614454392
321 | bulge_length_at_least_14 -0.1593926893
322 | bulge_length_at_least_15 -0.08624668281
323 | bulge_length_at_least_16 -0.03107090996
324 | bulge_length_at_least_17 -0.01097222032
325 | bulge_length_at_least_18 0.03001220283
326 | bulge_length_at_least_19 0.04759123789
327 | bulge_length_at_least_20 -0.04296172065
328 | bulge_length_at_least_21 -0.01791899662
329 | bulge_length_at_least_22 -0.07800551522
330 | bulge_length_at_least_23 -0.0709932643
331 | bulge_length_at_least_24 -0.05767952896
332 | bulge_length_at_least_25 -0.04633794681
333 | bulge_length_at_least_26 -0.03559420456
334 | bulge_length_at_least_27 -0.02674934394
335 | bulge_length_at_least_28 -0.01818957972
336 | bulge_length_at_least_29 -0.01052300732
337 | bulge_length_at_least_30 -0.005153626846
338 | internal_length_at_least_2 -0.429061443
339 | internal_length_at_least_3 -0.3532111501
340 | internal_length_at_least_4 -0.3963797535
341 | internal_length_at_least_5 -0.3111199175
342 | internal_length_at_least_6 -0.2551945472
343 | internal_length_at_least_7 -0.05149116898
344 | internal_length_at_least_8 -0.04319002407
345 | internal_length_at_least_9 0.001985489485
346 | internal_length_at_least_10 -0.1761513136
347 | internal_length_at_least_11 -0.2639686207
348 | internal_length_at_least_12 -0.3460613577
349 | internal_length_at_least_13 -0.2926603079
350 | internal_length_at_least_14 -0.03624250307
351 | internal_length_at_least_15 -0.1199953761
352 | internal_length_at_least_16 -0.04354771926
353 | internal_length_at_least_17 -0.08209293135
354 | internal_length_at_least_18 -0.007113226038
355 | internal_length_at_least_19 0.02354824852
356 | internal_length_at_least_20 0.03066973571
357 | internal_length_at_least_21 -0.06618241094
358 | internal_length_at_least_22 -0.1316092383
359 | internal_length_at_least_23 -0.1407995514
360 | internal_length_at_least_24 -0.06600291862
361 | internal_length_at_least_25 -0.07779204744
362 | internal_length_at_least_26 -0.05084201265
363 | internal_length_at_least_27 -0.04139875601
364 | internal_length_at_least_28 0.003276583405
365 | internal_length_at_least_29 0.00592458284
366 | internal_length_at_least_30 0.006875738004
367 | internal_symmetric_length_at_least_1 -0.5467082599
368 | internal_symmetric_length_at_least_2 -0.3854701647
369 | internal_symmetric_length_at_least_3 -0.2588466401
370 | internal_symmetric_length_at_least_4 -0.2340836745
371 | internal_symmetric_length_at_least_5 0.1450577765
372 | internal_symmetric_length_at_least_6 -0.6562932515
373 | internal_symmetric_length_at_least_7 -0.3021088369
374 | internal_symmetric_length_at_least_8 -0.03032275267
375 | internal_symmetric_length_at_least_9 -0.3517944058
376 | internal_symmetric_length_at_least_10 -0.2159132506
377 | internal_symmetric_length_at_least_11 -0.1228270454
378 | internal_symmetric_length_at_least_12 -0.1552208595
379 | internal_symmetric_length_at_least_13 -0.08541120743
380 | internal_symmetric_length_at_least_14 -0.04592109799
381 | internal_symmetric_length_at_least_15 -0.02232234236
382 | internal_asymmetry_at_least_1 -2.105646719
383 | internal_asymmetry_at_least_2 -0.5520140431
384 | internal_asymmetry_at_least_3 -0.577070767
385 | internal_asymmetry_at_least_4 -0.6136667847
386 | internal_asymmetry_at_least_5 -0.3057156841
387 | internal_asymmetry_at_least_6 -0.1155052001
388 | internal_asymmetry_at_least_7 -0.2105612231
389 | internal_asymmetry_at_least_8 -0.314574313
390 | internal_asymmetry_at_least_9 -0.3148961681
391 | internal_asymmetry_at_least_10 -0.09018189492
392 | internal_asymmetry_at_least_11 -0.2200026794
393 | internal_asymmetry_at_least_12 -0.1406483243
394 | internal_asymmetry_at_least_13 -0.2162411259
395 | internal_asymmetry_at_least_14 -0.1725531435
396 | internal_asymmetry_at_least_15 -0.1558911866
397 | internal_asymmetry_at_least_16 -0.1040858663
398 | internal_asymmetry_at_least_17 -0.06967684228
399 | internal_asymmetry_at_least_18 -0.04105977494
400 | internal_asymmetry_at_least_19 -0.01570624316
401 | internal_asymmetry_at_least_20 0.01382000639
402 | internal_asymmetry_at_least_21 0.04131988563
403 | internal_asymmetry_at_least_22 0.0359418595
404 | internal_asymmetry_at_least_23 0.02822186282
405 | internal_asymmetry_at_least_24 0.01636585874
406 | internal_asymmetry_at_least_25 0.02550056175
407 | internal_asymmetry_at_least_26 0.03348032793
408 | internal_asymmetry_at_least_27 0.03971924412
409 | internal_asymmetry_at_least_28 -0.002545113932
410 | bulge_0x1_nucleotides_A -0.1216861662
411 | bulge_0x1_nucleotides_C -0.07111241127
412 | bulge_0x1_nucleotides_G 0.008947026647
413 | bulge_0x1_nucleotides_U -0.002685763742
414 | internal_1x1_nucleotides_AA 0.2944404686
415 | internal_1x1_nucleotides_AC 0.08641360967
416 | internal_1x1_nucleotides_AG -0.3664197228
417 | internal_1x1_nucleotides_AU -0.2053107048
418 | internal_1x1_nucleotides_CC -0.1582543624
419 | internal_1x1_nucleotides_CG 0.4175273724
420 | internal_1x1_nucleotides_CU 0.1368762582
421 | internal_1x1_nucleotides_GG -0.1193514754
422 | internal_1x1_nucleotides_GU -0.4188101413
423 | internal_1x1_nucleotides_UU 0.147140653
424 | helix_stacking_AAAA 0
425 | helix_stacking_AAAC 0
426 | helix_stacking_AAAG 0
427 | helix_stacking_AAAU 0
428 | helix_stacking_AACA 0
429 | helix_stacking_AACC 0
430 | helix_stacking_AACG 0
431 | helix_stacking_AACU 0
432 | helix_stacking_AAGA 0
433 | helix_stacking_AAGC 0
434 | helix_stacking_AAGG 0
435 | helix_stacking_AAGU 0
436 | helix_stacking_AAUA 0
437 | helix_stacking_AAUC 0
438 | helix_stacking_AAUG 0
439 | helix_stacking_AAUU 0
440 | helix_stacking_ACAC 0
441 | helix_stacking_ACAG 0
442 | helix_stacking_ACAU 0
443 | helix_stacking_ACCA 0
444 | helix_stacking_ACCC 0
445 | helix_stacking_ACCG 0
446 | helix_stacking_ACCU 0
447 | helix_stacking_ACGA 0
448 | helix_stacking_ACGC 0
449 | helix_stacking_ACGG 0
450 | helix_stacking_ACGU 0
451 | helix_stacking_ACUA 0
452 | helix_stacking_ACUC 0
453 | helix_stacking_ACUG 0
454 | helix_stacking_ACUU 0
455 | helix_stacking_AGAC 0
456 | helix_stacking_AGAG 0
457 | helix_stacking_AGAU 0
458 | helix_stacking_AGCC 0
459 | helix_stacking_AGCG 0
460 | helix_stacking_AGCU 0
461 | helix_stacking_AGGA 0
462 | helix_stacking_AGGC 0
463 | helix_stacking_AGGG 0
464 | helix_stacking_AGGU 0
465 | helix_stacking_AGUA 0
466 | helix_stacking_AGUC 0
467 | helix_stacking_AGUG 0
468 | helix_stacking_AGUU 0
469 | helix_stacking_AUAC 0
470 | helix_stacking_AUAG 0
471 | helix_stacking_AUAU 0.1482005248
472 | helix_stacking_AUCC 0
473 | helix_stacking_AUCG 0.4343497127
474 | helix_stacking_AUCU 0
475 | helix_stacking_AUGC 0.7079642577
476 | helix_stacking_AUGG 0
477 | helix_stacking_AUGU -0.1010777582
478 | helix_stacking_AUUA 0.243256656
479 | helix_stacking_AUUC 0
480 | helix_stacking_AUUG 0.1623654243
481 | helix_stacking_AUUU 0
482 | helix_stacking_CAAC 0
483 | helix_stacking_CAAG 0
484 | helix_stacking_CAAU 0
485 | helix_stacking_CACC 0
486 | helix_stacking_CACG 0
487 | helix_stacking_CACU 0
488 | helix_stacking_CAGC 0
489 | helix_stacking_CAGG 0
490 | helix_stacking_CAGU 0
491 | helix_stacking_CAUC 0
492 | helix_stacking_CAUG 0
493 | helix_stacking_CAUU 0
494 | helix_stacking_CCAG 0
495 | helix_stacking_CCAU 0
496 | helix_stacking_CCCC 0
497 | helix_stacking_CCCG 0
498 | helix_stacking_CCCU 0
499 | helix_stacking_CCGC 0
500 | helix_stacking_CCGG 0
501 | helix_stacking_CCGU 0
502 | helix_stacking_CCUC 0
503 | helix_stacking_CCUG 0
504 | helix_stacking_CCUU 0
505 | helix_stacking_CGAG 0
506 | helix_stacking_CGAU 0.4878707793
507 | helix_stacking_CGCG 0.8481320247
508 | helix_stacking_CGCU 0
509 | helix_stacking_CGGC 0.4784248478
510 | helix_stacking_CGGG 0
511 | helix_stacking_CGGU -0.1811268205
512 | helix_stacking_CGUC 0
513 | helix_stacking_CGUG 0.4849351028
514 | helix_stacking_CGUU 0
515 | helix_stacking_CUAG 0
516 | helix_stacking_CUAU 0
517 | helix_stacking_CUCG 0
518 | helix_stacking_CUCU 0
519 | helix_stacking_CUGG 0
520 | helix_stacking_CUGU 0
521 | helix_stacking_CUUC 0
522 | helix_stacking_CUUG 0
523 | helix_stacking_CUUU 0
524 | helix_stacking_GAAG 0
525 | helix_stacking_GAAU 0
526 | helix_stacking_GACG 0
527 | helix_stacking_GACU 0
528 | helix_stacking_GAGG 0
529 | helix_stacking_GAGU 0
530 | helix_stacking_GAUG 0
531 | helix_stacking_GAUU 0
532 | helix_stacking_GCAU 0.5551785831
533 | helix_stacking_GCCG 0.5008324248
534 | helix_stacking_GCCU 0
535 | helix_stacking_GCGG 0
536 | helix_stacking_GCGU 0.2165962476
537 | helix_stacking_GCUG 0.4864603589
538 | helix_stacking_GCUU 0
539 | helix_stacking_GGAU 0
540 | helix_stacking_GGCU 0
541 | helix_stacking_GGGG 0
542 | helix_stacking_GGGU 0
543 | helix_stacking_GGUG 0
544 | helix_stacking_GGUU 0
545 | helix_stacking_GUAU -0.04665365028
546 | helix_stacking_GUCU 0
547 | helix_stacking_GUGU 0.1833447295
548 | helix_stacking_GUUG -0.2858970755
549 | helix_stacking_GUUU 0
550 | helix_stacking_UAAU 0.3897593783
551 | helix_stacking_UACU 0
552 | helix_stacking_UAGU -0.1157333764
553 | helix_stacking_UAUU 0
554 | helix_stacking_UCCU 0
555 | helix_stacking_UCGU 0
556 | helix_stacking_UCUU 0
557 | helix_stacking_UGGU 0.120296538
558 | helix_stacking_UGUU 0
559 | helix_stacking_UUUU 0
560 | helix_closing_AA 0
561 | helix_closing_AC 0
562 | helix_closing_AG 0
563 | helix_closing_AU -0.9770893163
564 | helix_closing_CA 0
565 | helix_closing_CC 0
566 | helix_closing_CG -0.4574650937
567 | helix_closing_CU 0
568 | helix_closing_GA 0
569 | helix_closing_GC -0.8265995623
570 | helix_closing_GG 0
571 | helix_closing_GU -1.051678928
572 | helix_closing_UA -0.9246140521
573 | helix_closing_UC 0
574 | helix_closing_UG -0.3698708172
575 | helix_closing_UU 0
576 | multi_base -1.199055076
577 | multi_unpaired -0.1983300391
578 | multi_paired -0.9253883752
579 | dangle_left_AAA 0
580 | dangle_left_AAC 0
581 | dangle_left_AAG 0
582 | dangle_left_AAU 0
583 | dangle_left_ACA 0
584 | dangle_left_ACC 0
585 | dangle_left_ACG 0
586 | dangle_left_ACU 0
587 | dangle_left_AGA 0
588 | dangle_left_AGC 0
589 | dangle_left_AGG 0
590 | dangle_left_AGU 0
591 | dangle_left_AUA -0.1251037681
592 | dangle_left_AUC 0.0441606708
593 | dangle_left_AUG -0.02541879082
594 | dangle_left_AUU 0.00785098466
595 | dangle_left_CAA 0
596 | dangle_left_CAC 0
597 | dangle_left_CAG 0
598 | dangle_left_CAU 0
599 | dangle_left_CCA 0
600 | dangle_left_CCC 0
601 | dangle_left_CCG 0
602 | dangle_left_CCU 0
603 | dangle_left_CGA 0.07224381372
604 | dangle_left_CGC 0.05279281874
605 | dangle_left_CGG 0.1009554299
606 | dangle_left_CGU -0.1515059013
607 | dangle_left_CUA 0
608 | dangle_left_CUC 0
609 | dangle_left_CUG 0
610 | dangle_left_CUU 0
611 | dangle_left_GAA 0
612 | dangle_left_GAC 0
613 | dangle_left_GAG 0
614 | dangle_left_GAU 0
615 | dangle_left_GCA -0.1829535099
616 | dangle_left_GCC 0.03393000394
617 | dangle_left_GCG 0.1335339061
618 | dangle_left_GCU -0.1604274506
619 | dangle_left_GGA 0
620 | dangle_left_GGC 0
621 | dangle_left_GGG 0
622 | dangle_left_GGU 0
623 | dangle_left_GUA -0.06517511341
624 | dangle_left_GUC -0.04250882422
625 | dangle_left_GUG 0.02875971806
626 | dangle_left_GUU -0.04359727428
627 | dangle_left_UAA -0.03373847659
628 | dangle_left_UAC -0.005070324324
629 | dangle_left_UAG -0.1186861149
630 | dangle_left_UAU -0.01162357727
631 | dangle_left_UCA 0
632 | dangle_left_UCC 0
633 | dangle_left_UCG 0
634 | dangle_left_UCU 0
635 | dangle_left_UGA -0.08047139148
636 | dangle_left_UGC 0.001608000669
637 | dangle_left_UGG 0.1016272216
638 | dangle_left_UGU -0.09200842832
639 | dangle_left_UUA 0
640 | dangle_left_UUC 0
641 | dangle_left_UUG 0
642 | dangle_left_UUU 0
643 | dangle_right_AAA 0
644 | dangle_right_AAC 0
645 | dangle_right_AAG 0
646 | dangle_right_AAU 0
647 | dangle_right_ACA 0
648 | dangle_right_ACC 0
649 | dangle_right_ACG 0
650 | dangle_right_ACU 0
651 | dangle_right_AGA 0
652 | dangle_right_AGC 0
653 | dangle_right_AGG 0
654 | dangle_right_AGU 0
655 | dangle_right_AUA 0.03232578201
656 | dangle_right_AUC -0.09096819493
657 | dangle_right_AUG -0.0740750973
658 | dangle_right_AUU -0.01621157379
659 | dangle_right_CAA 0
660 | dangle_right_CAC 0
661 | dangle_right_CAG 0
662 | dangle_right_CAU 0
663 | dangle_right_CCA 0
664 | dangle_right_CCC 0
665 | dangle_right_CCG 0
666 | dangle_right_CCU 0
667 | dangle_right_CGA 0.2133964379
668 | dangle_right_CGC -0.06234810991
669 | dangle_right_CGG -0.07008531041
670 | dangle_right_CGU -0.2141912285
671 | dangle_right_CUA 0
672 | dangle_right_CUC 0
673 | dangle_right_CUG 0
674 | dangle_right_CUU 0
675 | dangle_right_GAA 0
676 | dangle_right_GAC 0
677 | dangle_right_GAG 0
678 | dangle_right_GAU 0
679 | dangle_right_GCA 0.01581957549
680 | dangle_right_GCC 0.005644320058
681 | dangle_right_GCG -0.00943297687
682 | dangle_right_GCU -0.2597793095
683 | dangle_right_GGA 0
684 | dangle_right_GGC 0
685 | dangle_right_GGG 0
686 | dangle_right_GGU 0
687 | dangle_right_GUA -0.04480271781
688 | dangle_right_GUC -0.07321213002
689 | dangle_right_GUG 0.01270494867
690 | dangle_right_GUU -0.05717033985
691 | dangle_right_UAA -0.1631918513
692 | dangle_right_UAC 0.06769304994
693 | dangle_right_UAG -0.08789074414
694 | dangle_right_UAU -0.05525570007
695 | dangle_right_UCA 0
696 | dangle_right_UCC 0
697 | dangle_right_UCG 0
698 | dangle_right_UCU 0
699 | dangle_right_UGA 0.04105458185
700 | dangle_right_UGC -0.008136642572
701 | dangle_right_UGG -0.03808592022
702 | dangle_right_UGU -0.08629373429
703 | dangle_right_UUA 0
704 | dangle_right_UUC 0
705 | dangle_right_UUG 0
706 | dangle_right_UUU 0
707 | external_unpaired -0.00972883093
708 | external_paired -0.0009674111431
709 | 


--------------------------------------------------------------------------------
/parameter_files/learntofold.contrafold.params:
--------------------------------------------------------------------------------
  1 | base_pair_AA 0
  2 | base_pair_AC 0
  3 | base_pair_AG 0
  4 | base_pair_AU 0.117196
  5 | base_pair_CC 0
  6 | base_pair_CG 0.42785
  7 | base_pair_CU 0
  8 | base_pair_GG 0
  9 | base_pair_GU -0.144535
 10 | base_pair_UU 0
 11 | terminal_mismatch_AAAA 0
 12 | terminal_mismatch_AAAC 0
 13 | terminal_mismatch_AAAG 0
 14 | terminal_mismatch_AAAU 0
 15 | terminal_mismatch_AACA 0
 16 | terminal_mismatch_AACC 0
 17 | terminal_mismatch_AACG 0
 18 | terminal_mismatch_AACU 0
 19 | terminal_mismatch_AAGA 0
 20 | terminal_mismatch_AAGC 0
 21 | terminal_mismatch_AAGG 0
 22 | terminal_mismatch_AAGU 0
 23 | terminal_mismatch_AAUA 0
 24 | terminal_mismatch_AAUC 0
 25 | terminal_mismatch_AAUG 0
 26 | terminal_mismatch_AAUU 0
 27 | terminal_mismatch_ACAA 0
 28 | terminal_mismatch_ACAC 0
 29 | terminal_mismatch_ACAG 0
 30 | terminal_mismatch_ACAU 0
 31 | terminal_mismatch_ACCA 0
 32 | terminal_mismatch_ACCC 0
 33 | terminal_mismatch_ACCG 0
 34 | terminal_mismatch_ACCU 0
 35 | terminal_mismatch_ACGA 0
 36 | terminal_mismatch_ACGC 0
 37 | terminal_mismatch_ACGG 0
 38 | terminal_mismatch_ACGU 0
 39 | terminal_mismatch_ACUA 0
 40 | terminal_mismatch_ACUC 0
 41 | terminal_mismatch_ACUG 0
 42 | terminal_mismatch_ACUU 0
 43 | terminal_mismatch_AGAA 0
 44 | terminal_mismatch_AGAC 0
 45 | terminal_mismatch_AGAG 0
 46 | terminal_mismatch_AGAU 0
 47 | terminal_mismatch_AGCA 0
 48 | terminal_mismatch_AGCC 0
 49 | terminal_mismatch_AGCG 0
 50 | terminal_mismatch_AGCU 0
 51 | terminal_mismatch_AGGA 0
 52 | terminal_mismatch_AGGC 0
 53 | terminal_mismatch_AGGG 0
 54 | terminal_mismatch_AGGU 0
 55 | terminal_mismatch_AGUA 0
 56 | terminal_mismatch_AGUC 0
 57 | terminal_mismatch_AGUG 0
 58 | terminal_mismatch_AGUU 0
 59 | terminal_mismatch_AUAA -0.168158
 60 | terminal_mismatch_AUAC -0.242468
 61 | terminal_mismatch_AUAG -0.171538
 62 | terminal_mismatch_AUAU 0.063824
 63 | terminal_mismatch_AUCA -0.136324
 64 | terminal_mismatch_AUCC 0.0340154
 65 | terminal_mismatch_AUCG 0.412095
 66 | terminal_mismatch_AUCU -0.158066
 67 | terminal_mismatch_AUGA 0.235308
 68 | terminal_mismatch_AUGC 0.446161
 69 | terminal_mismatch_AUGG -0.31236
 70 | terminal_mismatch_AUGU -0.174198
 71 | terminal_mismatch_AUUA 0.427164
 72 | terminal_mismatch_AUUC 0.351693
 73 | terminal_mismatch_AUUG 0.112834
 74 | terminal_mismatch_AUUU 0.0114197
 75 | terminal_mismatch_CAAA 0
 76 | terminal_mismatch_CAAC 0
 77 | terminal_mismatch_CAAG 0
 78 | terminal_mismatch_CAAU 0
 79 | terminal_mismatch_CACA 0
 80 | terminal_mismatch_CACC 0
 81 | terminal_mismatch_CACG 0
 82 | terminal_mismatch_CACU 0
 83 | terminal_mismatch_CAGA 0
 84 | terminal_mismatch_CAGC 0
 85 | terminal_mismatch_CAGG 0
 86 | terminal_mismatch_CAGU 0
 87 | terminal_mismatch_CAUA 0
 88 | terminal_mismatch_CAUC 0
 89 | terminal_mismatch_CAUG 0
 90 | terminal_mismatch_CAUU 0
 91 | terminal_mismatch_CCAA 0
 92 | terminal_mismatch_CCAC 0
 93 | terminal_mismatch_CCAG 0
 94 | terminal_mismatch_CCAU 0
 95 | terminal_mismatch_CCCA 0
 96 | terminal_mismatch_CCCC 0
 97 | terminal_mismatch_CCCG 0
 98 | terminal_mismatch_CCCU 0
 99 | terminal_mismatch_CCGA 0
100 | terminal_mismatch_CCGC 0
101 | terminal_mismatch_CCGG 0
102 | terminal_mismatch_CCGU 0
103 | terminal_mismatch_CCUA 0
104 | terminal_mismatch_CCUC 0
105 | terminal_mismatch_CCUG 0
106 | terminal_mismatch_CCUU 0
107 | terminal_mismatch_CGAA -0.109134
108 | terminal_mismatch_CGAC -0.316447
109 | terminal_mismatch_CGAG -0.62242
110 | terminal_mismatch_CGAU 0.0216624
111 | terminal_mismatch_CGCA 0.0388758
112 | terminal_mismatch_CGCC -0.281257
113 | terminal_mismatch_CGCG 0.241614
114 | terminal_mismatch_CGCU -0.397997
115 | terminal_mismatch_CGGA 0.327717
116 | terminal_mismatch_CGGC 0.110783
117 | terminal_mismatch_CGGG -0.527171
118 | terminal_mismatch_CGGU -0.429919
119 | terminal_mismatch_CGUA 0.171414
120 | terminal_mismatch_CGUC -0.279608
121 | terminal_mismatch_CGUG 0.100497
122 | terminal_mismatch_CGUU -0.248438
123 | terminal_mismatch_CUAA 0
124 | terminal_mismatch_CUAC 0
125 | terminal_mismatch_CUAG 0
126 | terminal_mismatch_CUAU 0
127 | terminal_mismatch_CUCA 0
128 | terminal_mismatch_CUCC 0
129 | terminal_mismatch_CUCG 0
130 | terminal_mismatch_CUCU 0
131 | terminal_mismatch_CUGA 0
132 | terminal_mismatch_CUGC 0
133 | terminal_mismatch_CUGG 0
134 | terminal_mismatch_CUGU 0
135 | terminal_mismatch_CUUA 0
136 | terminal_mismatch_CUUC 0
137 | terminal_mismatch_CUUG 0
138 | terminal_mismatch_CUUU 0
139 | terminal_mismatch_GAAA 0
140 | terminal_mismatch_GAAC 0
141 | terminal_mismatch_GAAG 0
142 | terminal_mismatch_GAAU 0
143 | terminal_mismatch_GACA 0
144 | terminal_mismatch_GACC 0
145 | terminal_mismatch_GACG 0
146 | terminal_mismatch_GACU 0
147 | terminal_mismatch_GAGA 0
148 | terminal_mismatch_GAGC 0
149 | terminal_mismatch_GAGG 0
150 | terminal_mismatch_GAGU 0
151 | terminal_mismatch_GAUA 0
152 | terminal_mismatch_GAUC 0
153 | terminal_mismatch_GAUG 0
154 | terminal_mismatch_GAUU 0
155 | terminal_mismatch_GCAA -0.566345
156 | terminal_mismatch_GCAC -0.0306717
157 | terminal_mismatch_GCAG -0.266614
158 | terminal_mismatch_GCAU -0.154598
159 | terminal_mismatch_GCCA -0.316693
160 | terminal_mismatch_GCCC -0.131361
161 | terminal_mismatch_GCCG 0.363139
162 | terminal_mismatch_GCCU -0.41638
163 | terminal_mismatch_GCGA 0.353058
164 | terminal_mismatch_GCGC 0.368934
165 | terminal_mismatch_GCGG -0.0630469
166 | terminal_mismatch_GCGU -0.255096
167 | terminal_mismatch_GCUA 0.0728846
168 | terminal_mismatch_GCUC -0.0480102
169 | terminal_mismatch_GCUG 0.374379
170 | terminal_mismatch_GCUU 0.0624913
171 | terminal_mismatch_GGAA 0
172 | terminal_mismatch_GGAC 0
173 | terminal_mismatch_GGAG 0
174 | terminal_mismatch_GGAU 0
175 | terminal_mismatch_GGCA 0
176 | terminal_mismatch_GGCC 0
177 | terminal_mismatch_GGCG 0
178 | terminal_mismatch_GGCU 0
179 | terminal_mismatch_GGGA 0
180 | terminal_mismatch_GGGC 0
181 | terminal_mismatch_GGGG 0
182 | terminal_mismatch_GGGU 0
183 | terminal_mismatch_GGUA 0
184 | terminal_mismatch_GGUC 0
185 | terminal_mismatch_GGUG 0
186 | terminal_mismatch_GGUU 0
187 | terminal_mismatch_GUAA -0.22414
188 | terminal_mismatch_GUAC -0.133311
189 | terminal_mismatch_GUAG -0.359489
190 | terminal_mismatch_GUAU -0.330393
191 | terminal_mismatch_GUCA 0.0365249
192 | terminal_mismatch_GUCC 0.0615222
193 | terminal_mismatch_GUCG 0.290182
194 | terminal_mismatch_GUCU -0.176866
195 | terminal_mismatch_GUGA -0.19437
196 | terminal_mismatch_GUGC 0.0494159
197 | terminal_mismatch_GUGG -0.203475
198 | terminal_mismatch_GUGU -0.171151
199 | terminal_mismatch_GUUA 0.0401032
200 | terminal_mismatch_GUUC -0.105719
201 | terminal_mismatch_GUUG -0.302561
202 | terminal_mismatch_GUUU -0.445895
203 | terminal_mismatch_UAAA -0.586072
204 | terminal_mismatch_UAAC -0.61438
205 | terminal_mismatch_UAAG -0.405239
206 | terminal_mismatch_UAAU -0.133188
207 | terminal_mismatch_UACA -0.616378
208 | terminal_mismatch_UACC -0.624385
209 | terminal_mismatch_UACG -0.258873
210 | terminal_mismatch_UACU -0.681676
211 | terminal_mismatch_UAGA -0.342396
212 | terminal_mismatch_UAGC 0.239263
213 | terminal_mismatch_UAGG -0.667443
214 | terminal_mismatch_UAGU -0.766636
215 | terminal_mismatch_UAUA -0.308715
216 | terminal_mismatch_UAUC -0.00697584
217 | terminal_mismatch_UAUG -0.502953
218 | terminal_mismatch_UAUU -0.528393
219 | terminal_mismatch_UCAA 0
220 | terminal_mismatch_UCAC 0
221 | terminal_mismatch_UCAG 0
222 | terminal_mismatch_UCAU 0
223 | terminal_mismatch_UCCA 0
224 | terminal_mismatch_UCCC 0
225 | terminal_mismatch_UCCG 0
226 | terminal_mismatch_UCCU 0
227 | terminal_mismatch_UCGA 0
228 | terminal_mismatch_UCGC 0
229 | terminal_mismatch_UCGG 0
230 | terminal_mismatch_UCGU 0
231 | terminal_mismatch_UCUA 0
232 | terminal_mismatch_UCUC 0
233 | terminal_mismatch_UCUG 0
234 | terminal_mismatch_UCUU 0
235 | terminal_mismatch_UGAA 0.0753088
236 | terminal_mismatch_UGAC 0.27512
237 | terminal_mismatch_UGAG -0.050858
238 | terminal_mismatch_UGAU 0.192983
239 | terminal_mismatch_UGCA 0.443018
240 | terminal_mismatch_UGCC 0.0480001
241 | terminal_mismatch_UGCG 0.497822
242 | terminal_mismatch_UGCU 0.157055
243 | terminal_mismatch_UGGA 0.836611
244 | terminal_mismatch_UGGC 0.282301
245 | terminal_mismatch_UGGG 0.0988858
246 | terminal_mismatch_UGGU 0.234094
247 | terminal_mismatch_UGUA 0.114609
248 | terminal_mismatch_UGUC 0.214683
249 | terminal_mismatch_UGUG 0.246988
250 | terminal_mismatch_UGUU 0.371336
251 | terminal_mismatch_UUAA 0
252 | terminal_mismatch_UUAC 0
253 | terminal_mismatch_UUAG 0
254 | terminal_mismatch_UUAU 0
255 | terminal_mismatch_UUCA 0
256 | terminal_mismatch_UUCC 0
257 | terminal_mismatch_UUCG 0
258 | terminal_mismatch_UUCU 0
259 | terminal_mismatch_UUGA 0
260 | terminal_mismatch_UUGC 0
261 | terminal_mismatch_UUGG 0
262 | terminal_mismatch_UUGU 0
263 | terminal_mismatch_UUUA 0
264 | terminal_mismatch_UUUC 0
265 | terminal_mismatch_UUUG 0
266 | terminal_mismatch_UUUU 0
267 | hairpin_length_at_least_0 -1.84406
268 | hairpin_length_at_least_1 0.38098
269 | hairpin_length_at_least_2 0.859909
270 | hairpin_length_at_least_3 0.295419
271 | hairpin_length_at_least_4 0.7661
272 | hairpin_length_at_least_5 -0.338749
273 | hairpin_length_at_least_6 -0.0639211
274 | hairpin_length_at_least_7 0.315558
275 | hairpin_length_at_least_8 -0.362892
276 | hairpin_length_at_least_9 -0.176655
277 | hairpin_length_at_least_10 -0.263635
278 | hairpin_length_at_least_11 -0.129676
279 | hairpin_length_at_least_12 0.105682
280 | hairpin_length_at_least_13 0.08146
281 | hairpin_length_at_least_14 -0.855376
282 | hairpin_length_at_least_15 -0.0377099
283 | hairpin_length_at_least_16 0.0421525
284 | hairpin_length_at_least_17 0.107804
285 | hairpin_length_at_least_18 -0.216865
286 | hairpin_length_at_least_19 -0.0181023
287 | hairpin_length_at_least_20 -0.225869
288 | hairpin_length_at_least_21 -0.181939
289 | hairpin_length_at_least_22 0.0310624
290 | hairpin_length_at_least_23 -0.0905128
291 | hairpin_length_at_least_24 -0.306419
292 | hairpin_length_at_least_25 -0.13717
293 | hairpin_length_at_least_26 0.132407
294 | hairpin_length_at_least_27 -0.130469
295 | hairpin_length_at_least_28 -0.0067091
296 | hairpin_length_at_least_29 -0.115291
297 | hairpin_length_at_least_30 -0.39803
298 | internal_explicit_1_1 0.155859
299 | internal_explicit_1_2 -0.121667
300 | internal_explicit_1_3 0.0100364
301 | internal_explicit_1_4 0.199334
302 | internal_explicit_2_2 0.130952
303 | internal_explicit_2_3 -0.187011
304 | internal_explicit_2_4 -0.110813
305 | internal_explicit_3_3 0.0529937
306 | internal_explicit_3_4 -0.357182
307 | internal_explicit_4_4 0.12988
308 | bulge_length_at_least_1 -0.10609
309 | bulge_length_at_least_2 -0.294864
310 | bulge_length_at_least_3 -0.36619
311 | bulge_length_at_least_4 -0.577635
312 | bulge_length_at_least_5 -0.404122
313 | bulge_length_at_least_6 -0.508964
314 | bulge_length_at_least_7 -0.0211596
315 | bulge_length_at_least_8 0.749466
316 | bulge_length_at_least_9 -0.532326
317 | bulge_length_at_least_10 -0.585856
318 | bulge_length_at_least_11 -0.356308
319 | bulge_length_at_least_12 0.119846
320 | bulge_length_at_least_13 0.25548
321 | bulge_length_at_least_14 0.146516
322 | bulge_length_at_least_15 -0.546997
323 | bulge_length_at_least_16 0.147717
324 | bulge_length_at_least_17 0.0178208
325 | bulge_length_at_least_18 0.0080868
326 | bulge_length_at_least_19 0.456916
327 | bulge_length_at_least_20 -0.42458
328 | bulge_length_at_least_21 0.145037
329 | bulge_length_at_least_22 -0.105019
330 | bulge_length_at_least_23 -0.342105
331 | bulge_length_at_least_24 -0.0779023
332 | bulge_length_at_least_25 -0.193858
333 | bulge_length_at_least_26 -0.00769006
334 | bulge_length_at_least_27 -0.111807
335 | bulge_length_at_least_28 0.155611
336 | bulge_length_at_least_29 0.335468
337 | bulge_length_at_least_30 1.18348
338 | internal_length_at_least_2 0.0141383
339 | internal_length_at_least_3 -0.0934192
340 | internal_length_at_least_4 -0.0617787
341 | internal_length_at_least_5 -0.115015
342 | internal_length_at_least_6 -0.100272
343 | internal_length_at_least_7 0.260368
344 | internal_length_at_least_8 -0.258777
345 | internal_length_at_least_9 0.0776641
346 | internal_length_at_least_10 -0.249379
347 | internal_length_at_least_11 0.0528477
348 | internal_length_at_least_12 -0.478489
349 | internal_length_at_least_13 -0.106756
350 | internal_length_at_least_14 -0.000894333
351 | internal_length_at_least_15 -0.334079
352 | internal_length_at_least_16 0.0711885
353 | internal_length_at_least_17 -0.203494
354 | internal_length_at_least_18 0.253692
355 | internal_length_at_least_19 -0.232494
356 | internal_length_at_least_20 0.358359
357 | internal_length_at_least_21 -0.366355
358 | internal_length_at_least_22 0.245564
359 | internal_length_at_least_23 -0.489612
360 | internal_length_at_least_24 0.262947
361 | internal_length_at_least_25 -0.433761
362 | internal_length_at_least_26 0.0245611
363 | internal_length_at_least_27 -0.128352
364 | internal_length_at_least_28 0.100132
365 | internal_length_at_least_29 -0.208747
366 | internal_length_at_least_30 0.827826
367 | internal_symmetric_length_at_least_1 0.0656625
368 | internal_symmetric_length_at_least_2 -0.087095
369 | internal_symmetric_length_at_least_3 -0.0711241
370 | internal_symmetric_length_at_least_4 0.0126792
371 | internal_symmetric_length_at_least_5 -0.233107
372 | internal_symmetric_length_at_least_6 -0.112285
373 | internal_symmetric_length_at_least_7 -0.120892
374 | internal_symmetric_length_at_least_8 0.0783225
375 | internal_symmetric_length_at_least_9 -0.120047
376 | internal_symmetric_length_at_least_10 -0.44724
377 | internal_symmetric_length_at_least_11 -0.0132272
378 | internal_symmetric_length_at_least_12 -0.118194
379 | internal_symmetric_length_at_least_13 0.0859623
380 | internal_symmetric_length_at_least_14 -0.178603
381 | internal_symmetric_length_at_least_15 -0.178603
382 | internal_asymmetry_at_least_1 -0.0748923
383 | internal_asymmetry_at_least_2 -0.382543
384 | internal_asymmetry_at_least_3 -0.251796
385 | internal_asymmetry_at_least_4 -0.421874
386 | internal_asymmetry_at_least_5 -0.34332
387 | internal_asymmetry_at_least_6 -0.115644
388 | internal_asymmetry_at_least_7 -0.165334
389 | internal_asymmetry_at_least_8 0.197739
390 | internal_asymmetry_at_least_9 -0.186715
391 | internal_asymmetry_at_least_10 0.076971
392 | internal_asymmetry_at_least_11 0.0362528
393 | internal_asymmetry_at_least_12 -0.220953
394 | internal_asymmetry_at_least_13 0.108824
395 | internal_asymmetry_at_least_14 -0.0164457
396 | internal_asymmetry_at_least_15 0.368713
397 | internal_asymmetry_at_least_16 -0.438663
398 | internal_asymmetry_at_least_17 0.16405
399 | internal_asymmetry_at_least_18 -0.0398533
400 | internal_asymmetry_at_least_19 0.1949
401 | internal_asymmetry_at_least_20 0.0771696
402 | internal_asymmetry_at_least_21 0.41823
403 | internal_asymmetry_at_least_22 -0.632993
404 | internal_asymmetry_at_least_23 -0.116177
405 | internal_asymmetry_at_least_24 -0.12073
406 | internal_asymmetry_at_least_25 0.0344756
407 | internal_asymmetry_at_least_26 -0.0637855
408 | internal_asymmetry_at_least_27 0.264182
409 | internal_asymmetry_at_least_28 0.393391
410 | bulge_0x1_nucleotides_A 0.0265834
411 | bulge_0x1_nucleotides_C 0.187646
412 | bulge_0x1_nucleotides_G 0.213565
413 | bulge_0x1_nucleotides_U 0.139233
414 | internal_1x1_nucleotides_AA 0.115743
415 | internal_1x1_nucleotides_AC 0.0287969
416 | internal_1x1_nucleotides_AG -0.142761
417 | internal_1x1_nucleotides_AU 0.780265
418 | internal_1x1_nucleotides_CC 0.0215604
419 | internal_1x1_nucleotides_CG 0.834524
420 | internal_1x1_nucleotides_CU 0.0301214
421 | internal_1x1_nucleotides_GG 0.220881
422 | internal_1x1_nucleotides_GU 0.608098
423 | internal_1x1_nucleotides_UU 0.161178
424 | helix_stacking_AAAA 0
425 | helix_stacking_AAAC 0
426 | helix_stacking_AAAG 0
427 | helix_stacking_AAAU 0
428 | helix_stacking_AACA 0
429 | helix_stacking_AACC 0
430 | helix_stacking_AACG 0
431 | helix_stacking_AACU 0
432 | helix_stacking_AAGA 0
433 | helix_stacking_AAGC 0
434 | helix_stacking_AAGG 0
435 | helix_stacking_AAGU 0
436 | helix_stacking_AAUA 0
437 | helix_stacking_AAUC 0
438 | helix_stacking_AAUG 0
439 | helix_stacking_AAUU 0
440 | helix_stacking_ACAC 0
441 | helix_stacking_ACAG 0
442 | helix_stacking_ACAU 0
443 | helix_stacking_ACCA 0
444 | helix_stacking_ACCC 0
445 | helix_stacking_ACCG 0
446 | helix_stacking_ACCU 0
447 | helix_stacking_ACGA 0
448 | helix_stacking_ACGC 0
449 | helix_stacking_ACGG 0
450 | helix_stacking_ACGU 0
451 | helix_stacking_ACUA 0
452 | helix_stacking_ACUC 0
453 | helix_stacking_ACUG 0
454 | helix_stacking_ACUU 0
455 | helix_stacking_AGAC 0
456 | helix_stacking_AGAG 0
457 | helix_stacking_AGAU 0
458 | helix_stacking_AGCC 0
459 | helix_stacking_AGCG 0
460 | helix_stacking_AGCU 0
461 | helix_stacking_AGGA 0
462 | helix_stacking_AGGC 0
463 | helix_stacking_AGGG 0
464 | helix_stacking_AGGU 0
465 | helix_stacking_AGUA 0
466 | helix_stacking_AGUC 0
467 | helix_stacking_AGUG 0
468 | helix_stacking_AGUU 0
469 | helix_stacking_AUAC 0
470 | helix_stacking_AUAG 0
471 | helix_stacking_AUAU 0.166949
472 | helix_stacking_AUCC 0
473 | helix_stacking_AUCG 0.457814
474 | helix_stacking_AUCU 0
475 | helix_stacking_AUGC 0.625282
476 | helix_stacking_AUGG 0
477 | helix_stacking_AUGU -0.0635901
478 | helix_stacking_AUUA 0.484831
479 | helix_stacking_AUUC 0
480 | helix_stacking_AUUG 0.229207
481 | helix_stacking_AUUU 0
482 | helix_stacking_CAAC 0
483 | helix_stacking_CAAG 0
484 | helix_stacking_CAAU 0
485 | helix_stacking_CACC 0
486 | helix_stacking_CACG 0
487 | helix_stacking_CACU 0
488 | helix_stacking_CAGC 0
489 | helix_stacking_CAGG 0
490 | helix_stacking_CAGU 0
491 | helix_stacking_CAUC 0
492 | helix_stacking_CAUG 0
493 | helix_stacking_CAUU 0
494 | helix_stacking_CCAG 0
495 | helix_stacking_CCAU 0
496 | helix_stacking_CCCC 0
497 | helix_stacking_CCCG 0
498 | helix_stacking_CCCU 0
499 | helix_stacking_CCGC 0
500 | helix_stacking_CCGG 0
501 | helix_stacking_CCGU 0
502 | helix_stacking_CCUC 0
503 | helix_stacking_CCUG 0
504 | helix_stacking_CCUU 0
505 | helix_stacking_CGAG 0
506 | helix_stacking_CGAU 0.60886
507 | helix_stacking_CGCG 0.927152
508 | helix_stacking_CGCU 0
509 | helix_stacking_CGGC 0.483599
510 | helix_stacking_CGGG 0
511 | helix_stacking_CGGU 0.00568172
512 | helix_stacking_CGUC 0
513 | helix_stacking_CGUG 0.370247
514 | helix_stacking_CGUU 0
515 | helix_stacking_CUAG 0
516 | helix_stacking_CUAU 0
517 | helix_stacking_CUCG 0
518 | helix_stacking_CUCU 0
519 | helix_stacking_CUGG 0
520 | helix_stacking_CUGU 0
521 | helix_stacking_CUUC 0
522 | helix_stacking_CUUG 0
523 | helix_stacking_CUUU 0
524 | helix_stacking_GAAG 0
525 | helix_stacking_GAAU 0
526 | helix_stacking_GACG 0
527 | helix_stacking_GACU 0
528 | helix_stacking_GAGG 0
529 | helix_stacking_GAGU 0
530 | helix_stacking_GAUG 0
531 | helix_stacking_GAUU 0
532 | helix_stacking_GCAU 0.342121
533 | helix_stacking_GCCG 0.77176
534 | helix_stacking_GCCU 0
535 | helix_stacking_GCGG 0
536 | helix_stacking_GCGU 0.313625
537 | helix_stacking_GCUG 0.474024
538 | helix_stacking_GCUU 0
539 | helix_stacking_GGAU 0
540 | helix_stacking_GGCU 0
541 | helix_stacking_GGGG 0
542 | helix_stacking_GGGU 0
543 | helix_stacking_GGUG 0
544 | helix_stacking_GGUU 0
545 | helix_stacking_GUAU -0.0905706
546 | helix_stacking_GUCU 0
547 | helix_stacking_GUGU 0.175914
548 | helix_stacking_GUUG -0.265254
549 | helix_stacking_GUUU 0
550 | helix_stacking_UAAU 0.285857
551 | helix_stacking_UACU 0
552 | helix_stacking_UAGU -0.0092986
553 | helix_stacking_UAUU 0
554 | helix_stacking_UCCU 0
555 | helix_stacking_UCGU 0
556 | helix_stacking_UCUU 0
557 | helix_stacking_UGGU 0.605438
558 | helix_stacking_UGUU 0
559 | helix_stacking_UUUU 0
560 | helix_closing_AA 0
561 | helix_closing_AC 0
562 | helix_closing_AG 0
563 | helix_closing_AU -0.904257
564 | helix_closing_CA 0
565 | helix_closing_CC 0
566 | helix_closing_CG -0.447655
567 | helix_closing_CU 0
568 | helix_closing_GA 0
569 | helix_closing_GC -0.664996
570 | helix_closing_GG 0
571 | helix_closing_GU -0.551376
572 | helix_closing_UA -0.469223
573 | helix_closing_UC 0
574 | helix_closing_UG -0.690579
575 | helix_closing_UU 0
576 | multi_base 0.392109
577 | multi_unpaired -0.0305723
578 | multi_paired -0.324548
579 | dangle_left_AAA 0
580 | dangle_left_AAC 0
581 | dangle_left_AAG 0
582 | dangle_left_AAU 0
583 | dangle_left_ACA 0
584 | dangle_left_ACC 0
585 | dangle_left_ACG 0
586 | dangle_left_ACU 0
587 | dangle_left_AGA 0
588 | dangle_left_AGC 0
589 | dangle_left_AGG 0
590 | dangle_left_AGU 0
591 | dangle_left_AUA -0.0096949
592 | dangle_left_AUC 0.296587
593 | dangle_left_AUG 0.264354
594 | dangle_left_AUU 0.467729
595 | dangle_left_CAA 0
596 | dangle_left_CAC 0
597 | dangle_left_CAG 0
598 | dangle_left_CAU 0
599 | dangle_left_CCA 0
600 | dangle_left_CCC 0
601 | dangle_left_CCG 0
602 | dangle_left_CCU 0
603 | dangle_left_CGA 0.196253
604 | dangle_left_CGC 0.440535
605 | dangle_left_CGG 0.390397
606 | dangle_left_CGU 0.139024
607 | dangle_left_CUA 0
608 | dangle_left_CUC 0
609 | dangle_left_CUG 0
610 | dangle_left_CUU 0
611 | dangle_left_GAA 0
612 | dangle_left_GAC 0
613 | dangle_left_GAG 0
614 | dangle_left_GAU 0
615 | dangle_left_GCA -0.320284
616 | dangle_left_GCC -0.181196
617 | dangle_left_GCG 0.0390977
618 | dangle_left_GCU 0.175603
619 | dangle_left_GGA 0
620 | dangle_left_GGC 0
621 | dangle_left_GGG 0
622 | dangle_left_GGU 0
623 | dangle_left_GUA -0.0839476
624 | dangle_left_GUC 0.148304
625 | dangle_left_GUG 0.0216176
626 | dangle_left_GUU 0.053797
627 | dangle_left_UAA -0.0866879
628 | dangle_left_UAC -0.250894
629 | dangle_left_UAG -0.322181
630 | dangle_left_UAU -0.0654954
631 | dangle_left_UCA 0
632 | dangle_left_UCC 0
633 | dangle_left_UCG 0
634 | dangle_left_UCU 0
635 | dangle_left_UGA -0.168554
636 | dangle_left_UGC 0.117638
637 | dangle_left_UGG 0.304698
638 | dangle_left_UGU 0.0870223
639 | dangle_left_UUA 0
640 | dangle_left_UUC 0
641 | dangle_left_UUG 0
642 | dangle_left_UUU 0
643 | dangle_right_AAA 0
644 | dangle_right_AAC 0
645 | dangle_right_AAG 0
646 | dangle_right_AAU 0
647 | dangle_right_ACA 0
648 | dangle_right_ACC 0
649 | dangle_right_ACG 0
650 | dangle_right_ACU 0
651 | dangle_right_AGA 0
652 | dangle_right_AGC 0
653 | dangle_right_AGG 0
654 | dangle_right_AGU 0
655 | dangle_right_AUA -0.927456
656 | dangle_right_AUC -1.10559
657 | dangle_right_AUG -0.981522
658 | dangle_right_AUU -0.995162
659 | dangle_right_CAA 0
660 | dangle_right_CAC 0
661 | dangle_right_CAG 0
662 | dangle_right_CAU 0
663 | dangle_right_CCA 0
664 | dangle_right_CCC 0
665 | dangle_right_CCG 0
666 | dangle_right_CCU 0
667 | dangle_right_CGA -0.82867
668 | dangle_right_CGC -1.11699
669 | dangle_right_CGG -1.23095
670 | dangle_right_CGU -1.23702
671 | dangle_right_CUA 0
672 | dangle_right_CUC 0
673 | dangle_right_CUG 0
674 | dangle_right_CUU 0
675 | dangle_right_GAA 0
676 | dangle_right_GAC 0
677 | dangle_right_GAG 0
678 | dangle_right_GAU 0
679 | dangle_right_GCA -0.532095
680 | dangle_right_GCC -0.54946
681 | dangle_right_GCG -0.398636
682 | dangle_right_GCU -0.868356
683 | dangle_right_GGA 0
684 | dangle_right_GGC 0
685 | dangle_right_GGG 0
686 | dangle_right_GGU 0
687 | dangle_right_GUA -0.908315
688 | dangle_right_GUC -0.876077
689 | dangle_right_GUG -0.991237
690 | dangle_right_GUU -1.08336
691 | dangle_right_UAA -1.04753
692 | dangle_right_UAC -0.918508
693 | dangle_right_UAG -1.1966
694 | dangle_right_UAU -1.07818
695 | dangle_right_UCA 0
696 | dangle_right_UCC 0
697 | dangle_right_UCG 0
698 | dangle_right_UCU 0
699 | dangle_right_UGA -0.463016
700 | dangle_right_UGC -0.463076
701 | dangle_right_UGG -0.779374
702 | dangle_right_UGU -0.559652
703 | dangle_right_UUA 0
704 | dangle_right_UUC 0
705 | dangle_right_UUG 0
706 | dangle_right_UUU 0
707 | external_unpaired -0.144898
708 | external_paired -1.54974
709 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | 
 2 | [project]
 3 | name = "arnie"
 4 | version = "0.2.7"
 5 | authors = [
 6 |   { name="Das Lab", email="thedaslab@stanford.edu" },
 7 | ]
 8 | description = "A Python utility library to estimate, compare, and reweight RNA energetics across many secondary structure algorithms."
 9 | readme = "README.md"
10 | requires-python = ">=3.7"
11 | classifiers = [
12 |     "Programming Language :: Python :: 3",
13 |     "License :: OSI Approved :: MIT License",
14 |     "Operating System :: OS Independent",
15 | ]
16 | keywords = ["RNA", "RNA structure prediction", "Bioinformatics"]
17 | dependencies = [
18 |   "numpy>=1.15",
19 |   "scipy>=1.5.0",
20 |   "matplotlib>=3.0.0"
21 | ]
22 | 
23 | [project.urls]
24 | "Homepage" = "https://github.com/DasLab/arnie"
25 | "Documentation" = "https://daslab.github.io/arnie"
26 | "Bug Tracker" = "https://github.com/DasLab/arnie/issues"
27 | 
28 | [build-system]
29 | requires = ["setuptools>=61.0"]
30 | build-backend = "setuptools.build_meta"
31 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | pythonpath = src


--------------------------------------------------------------------------------
/scripts/score_pseudoacc_mea.py:
--------------------------------------------------------------------------------
  1 | from arnie.mea.mea import *
  2 | import numpy as np
  3 | from glob import glob
  4 | import argparse
  5 | import sys, os
  6 | 
  7 | def predict_MEA_structures(matrix_list, gamma_min=-7, gamma_max=7, verbose=False, metric='mcc', output_dir='MEA_output'):
  8 |     '''Estimate maximum expected pseudoaccuracy structures per Hamada et al. BMC Bioinf 2010 11:586.
  9 |     
 10 |     Note: Files in matrix_dir and true_structs need to have the same names corresponding to their same constructs, but suffixes don't matter.
 11 |         
 12 |     Inputs:
 13 | 
 14 |     matrix_dir: list of NxN base pair probability matrices.
 15 |     gamma_min, gamma_max: min/max log_2(gamma) value used, defaults are -7 and 7.
 16 |     metric: keyword-based, which metric to use to select structure. Options are 'sen', 'ppv', 'mcc', 'fscore'.
 17 |     verbose: print output or not (for command line use)
 18 | 
 19 |     Outputs:
 20 |     List of predicted structures (in dbn format) at each gamma.
 21 | 
 22 |     '''
 23 | 
 24 |     metric_ind = ['sen', 'ppv', 'mcc', 'fscore'].index(metric)
 25 | 
 26 |     if len(matrix_list) == 0:
 27 |         raise ValueError('No matrix files found!')
 28 | 
 29 |     matrices = [np.loadtxt(x) for x in matrix_list]
 30 |     pdb_indices = [os.path.basename(x).split('.')[0] for x in matrix_list]
 31 | 
 32 |     n_constructs = len(matrices)
 33 | 
 34 |     gamma_vals = [x for x in range(gamma_min, gamma_max)]
 35 |     best_metric_values, best_gammas, best_structs,best_metrics = [],[],[],[]
 36 | 
 37 |     metrics_across_gammas = {k:[] for k in gamma_vals}
 38 | 
 39 |     if verbose: print('\nmetric\tpdb_ind\tbest_log2g\tbest_metric_value\tbest_struct')
 40 | 
 41 |     for i, matrix in enumerate(matrices):
 42 | 
 43 |         running_best_metrics = []
 44 |         running_best_value = 0
 45 |         running_best_gamma = -101
 46 |         running_best_struct = ''
 47 | 
 48 |         for g in gamma_vals:
 49 | 
 50 |             mea_cls = MEA(matrix, gamma=2**g)
 51 | 
 52 |             metrics = mea_cls.score_expected() #sen, ppv, mcc, fscore
 53 |             metrics_across_gammas[g].append(metrics)
 54 | 
 55 |             if metrics[metric_ind] > running_best_value:
 56 |                 running_best_value = metrics[metric_ind]
 57 |                 running_best_metrics = metrics
 58 |                 running_best_gamma = g
 59 |                 running_best_struct = mea_cls.structure
 60 | 
 61 |         best_metrics.append(running_best_metrics)
 62 |         best_metric_values.append(running_best_value)
 63 |         best_gammas.append(running_best_gamma)
 64 |         best_structs.append(running_best_struct)
 65 | 
 66 |         if verbose: print("%s\t%s\t%d\t%.3f\t%s" % (metric, pdb_indices[i], running_best_gamma, running_best_value, running_best_struct))
 67 | 
 68 |     # print('Avg metrics across gamma vals')
 69 | 
 70 |     print('\t\tlog2(g)\tsen\tppv\tmcc\tfscore')
 71 | 
 72 |     for g in gamma_vals:
 73 | 
 74 |         [sen, ppv, mcc, fscore] = np.mean(metrics_across_gammas[g], axis=0)
 75 |         print('gamma_avg\t%d\t%.3f\t%.3f\t%.3f\t%.3f' % (g, sen, ppv, mcc, fscore))
 76 | 
 77 |     # print('Best avg metrics using individual gammas')
 78 |     [sen, ppv, mcc, fscore] = np.mean(np.array(best_metrics), axis=0)
 79 | 
 80 |     print('gamma_best\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f' % (np.mean(best_gammas), sen, ppv, mcc, fscore))
 81 | 
 82 |     if not os.path.exists(output_dir):
 83 |         os.makedirs(output_dir)
 84 | 
 85 |     for struct, ind in list(zip(best_structs, pdb_indices)):
 86 |         if os.path.exists('%s/%s.dbn' % (output_dir, ind)):
 87 |             print('NB: overwriting existing predicted structure')
 88 |         with open('%s/%s.dbn' % (output_dir, ind), 'w') as f:
 89 |             f.write(struct)
 90 | 
 91 |     return best_structs
 92 |     
 93 | def score_against_true_structs(pred_struct_list, true_struct_list, verbose=False, weight_by_n_bps=False):
 94 |     '''Score maximum expected pseudoaccuracy structures against provided 3D structures.  
 95 |     
 96 |     Note: Files in matrix_dir and true_structs need to have the same names corresponding
 97 |         to their same constructs, but suffixes don't matter.
 98 |         
 99 |     Inputs:
100 | 
101 |     pred_struct_list: list of predicted structures.
102 |     true_structs: list of NxN true structure base pair matrices. Can be
103 |         symmetric matrices or not; upper triangle is taken.
104 |     verbose: print output or not (for command line use)
105 | 
106 |     Outputs:
107 | 
108 |     SEN: TP/(TP+FN), library keyed by gamma values used.
109 |     PPV: TP/(TP+FP), "
110 |     MCC: Mathews correlation coefficient
111 |     Fscore: 2*TP/(2*TP + FP + FN)    
112 | 
113 |     '''
114 |     pred_structs, true_structs = [], []
115 | 
116 |     if len(pred_struct_list) == 0:
117 |         raise ValueError('No predicted structure files found!')
118 | 
119 |     if len(true_struct_list) == 0:
120 |         raise ValueError('No ground truth structure files found!')
121 | 
122 |     for x in pred_struct_list:
123 |         for s in true_struct_list:
124 |             if os.path.basename(x).split('.')[0] in s:
125 | 
126 |                 pstruct = load_matrix_or_dbn(x)
127 |                 pred_structs.append(pstruct)
128 | 
129 |                 struct = load_matrix_or_dbn(s)
130 |                 true_structs.append(struct)
131 | 
132 |     assert len(pred_structs) == len(true_structs)
133 | 
134 |     tally, ptl_sen, ptl_ppv, ptl_mcc, ptl_fscore = 0, 0, 0, 0, 0
135 | 
136 |     pdb_indices = [os.path.basename(x).split('.')[0] for x in pred_struct_list]
137 |     
138 |     for i in range(len(pred_structs)):
139 |         
140 |         sen, ppv, mcc, fscore, N = score_ground_truth(pred_structs[i], true_structs[i])
141 |         print('Score:\t%s\t%.3f\t%.3f\t%.3f\t%.3f' % (pdb_indices[i], sen, ppv, mcc, fscore))
142 | 
143 |         if weight_by_n_bps:
144 |             ptl_sen += sen*N
145 |             ptl_ppv += ppv*N
146 |             ptl_mcc += mcc*N
147 |             ptl_fscore += fscore*N
148 |             tally += N
149 | 
150 |         else:
151 |             ptl_sen += sen
152 |             ptl_ppv += ppv
153 |             ptl_mcc += mcc
154 |             ptl_fscore += fscore
155 |             tally += 1
156 | 
157 |     mean_sen = ptl_sen/tally
158 |     mean_ppv = ptl_ppv/tally
159 |     mean_mcc = ptl_mcc/tally
160 |     mean_fscore = ptl_fscore/tally
161 | 
162 |     print("Avg:\tsen\tppv\tmcc\tfscore\n\t%.3f\t%.3f\t%.3f\t%.3f" % (mean_sen, mean_ppv, mean_mcc, mean_fscore))
163 | 
164 |     return mean_sen, mean_ppv, mean_mcc, mean_fscore
165 | 
166 | if __name__ == '__main__':
167 | 
168 |     parser=argparse.ArgumentParser(
169 |         description='''Estimate maximum expected pseudoaccuracy structures per Hamada et al. BMC Bioinf 2010 11:586 and\
170 |         score against a ground truth dataset.\n
171 | 
172 |          Input format: Base pair probability matrices (specified in --bp_matrices) need to have same base names
173 |           as structures (specified in --true_structs, and can be either dbn strings or NxN matrices),
174 |            but the extensions for both types don't matter.''')
175 | 
176 |     parser.add_argument('--bp_matrices','-p', nargs='+', 
177 |         help='path to NxN matrices of bp probabilities, i.e. `contrafold/*.bpps`.')
178 | 
179 |     parser.add_argument('--output_dir', '-o', 
180 |         help="Path to output of predicted MEA structures. Default is `MEA_output`.", default = 'MEA_output')
181 | 
182 |     parser.add_argument('--true_structs','-s', nargs='+', 
183 |         help='Optional: path to true structures, i.e. `rnaview/*.struct`. These can be dbn structures or NxN matrices.', default=None)
184 | 
185 |     parser.add_argument('--metric', default='mcc',
186 |         help='Accuracy metric, options are `mcc`, `fscore`, `ppv`, or `sen`. Default is `mcc`.')
187 | 
188 |     parser.add_argument('--gamma_min',type=int, default=-7, help='Min value for log_2(gamma), default is -7')
189 |     parser.add_argument('--gamma_max',type=int, default=7, help='Max value for log_2(gamma), default is 7')
190 | 
191 |     parser.add_argument('--weight_by_n_bps', dest='weight_by_n_bps', action='store_true', 
192 |         help='For scoring to true structures, weight accuracy over dataset by number of bps.\
193 |          If flag not included, equal weight across constructs.')
194 | 
195 |     parser.add_argument('--verbose', dest='verbose', action='store_true')
196 |     parser.add_argument('--score_truth_only', dest='score_truth_only', action='store_true',
197 |         help='Use if MEA structures already generated and only scoring to ground truth dataset.')
198 | 
199 |     #print help and exit if no args
200 |     if len(sys.argv)==1:
201 |         parser.print_help(sys.stderr)
202 |         sys.exit(1)
203 |         
204 |     args = parser.parse_args()
205 | 
206 |     #if args.true_structs:
207 |         #assert len(args.bp_matrices) == len(args.true_structs)
208 | 
209 |     if args.verbose:
210 |         print('\nRNA MEA STRUCTURE PREDICTION')
211 |         print('Number of structures: %d' % len(args.bp_matrices))
212 |         print('Path to first base pair matrix: %s' % args.bp_matrices[0])
213 |         if args.true_structs:
214 |             print('Path to first true struct: %s' % args.true_structs[0])
215 |         print('\nScanning gamma for MEA structure prediction:')
216 | 
217 |     if not args.score_truth_only:
218 |         predict_MEA_structures(args.bp_matrices, gamma_min = args.gamma_min, gamma_max = args.gamma_max, verbose=args.verbose, metric = args.metric, output_dir = args.output_dir)
219 | 
220 |     if args.true_structs:
221 |         if args.verbose: print('\nScoring provided true structures against maximum expected pseudoaccuracy structures:')
222 |         score_against_true_structs(glob('%s/*' % args.output_dir), args.true_structs, verbose=args.verbose, weight_by_n_bps=args.weight_by_n_bps)
223 | 


--------------------------------------------------------------------------------
/scripts/write_bpp_matrices.py:
--------------------------------------------------------------------------------
 1 | import sys, os, argparse
 2 | import arnie.bpps as bpps
 3 | from arnie.utils import write_matrix_to_file
 4 | 
 5 | if __name__=='__main__':
 6 |     p = argparse.ArgumentParser(description=
 7 |         """
 8 |         Write base pairing probability matrices to files.
 9 |         """)
10 |     
11 |     p.add_argument("seq_dir", nargs='+',
12 |                    help="path to dir of *.seq files")
13 |     p.add_argument("-o", help="name of output dir")
14 |     p.add_argument("-p", "--package", default='vienna_2',
15 |                    help="Package to use")
16 | 
17 |     if len(sys.argv)==1:
18 |         p.print_help(sys.stderr)
19 |         sys.exit(1)
20 | 
21 |     args = p.parse_args()
22 | 
23 |     if not os.path.exists('./%s' % args.o):
24 |         os.makedirs('./%s' % args.o)
25 | 
26 |     for seqfile in args.seq_dir:
27 |         print(seqfile)
28 |         seq=open(seqfile,'r').readlines()[-1].rstrip()
29 |         seq_id = os.path.basename(seqfile).replace('.seq','')
30 |         bp_matrix = bpps.bpps(seq, package=args.package)
31 |         with open("%s/%s.bpps" % (args.o, seq_id),'w') as f:
32 |             write_matrix_to_file(bp_matrix, f)
33 | 


--------------------------------------------------------------------------------
/scripts/write_unpaired_vectors.py:
--------------------------------------------------------------------------------
 1 | import sys, os, argparse
 2 | import arnie.bpps as bpps
 3 | import numpy as np
 4 | from arnie.utils import write_vector_to_file
 5 | 
 6 | if __name__=='__main__':
 7 |     p = argparse.ArgumentParser(description=
 8 |         """Write unpaired posterior probabilities to files.
 9 |         """)
10 |     
11 |     p.add_argument("seq_dir", nargs='+',
12 |                    help="path to dir of *.seq files")
13 |     p.add_argument("-o", help="name of output dir")
14 |     p.add_argument("-p", "--package", default='vienna_2', help="Package to use")
15 | 
16 |     if len(sys.argv)==1:
17 |         p.print_help(sys.stderr)
18 |         sys.exit(1)
19 | 
20 |     args = p.parse_args()
21 | 
22 |     if not os.path.exists('./%s' % args.o):
23 |         os.makedirs('./%s' % args.o)
24 | 
25 |     for seqfile in args.seq_dir:
26 |         print(seqfile)
27 |         seq=open(seqfile,'r').readlines()[-1].rstrip()
28 |     	seq_id = os.path.basename(seqfile).replace('.seq','')
29 | 
30 |         unp_vector = 1-np.sum(bpps.bpps(seq, package=args.package),axis=0)
31 | 
32 |     	with open("%s/%s.unp" % (args.o, seq_id),'w') as f:
33 |     		write_vector_to_file(unp_vector, f)
34 | 


--------------------------------------------------------------------------------
/src/arnie/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/arnie/bpps.py:
--------------------------------------------------------------------------------
  1 | import os, re, sys
  2 | import subprocess as sp
  3 | import random, string
  4 | import numpy as np
  5 | from .utils import *
  6 | from .pfunc import pfunc
  7 | 
  8 | # load package locations from yaml file, watch! global dict
  9 | package_locs = load_package_locations()
 10 | 
 11 | def bpps(sequence, package='vienna', constraint=None, pseudo=False,
 12 |          T=37, coaxial=True, linear=False, dna=False,
 13 |         motif=None, dangles=True,param_file=None,reweight=None, beam_size=100, DEBUG=False, threshknot=False,
 14 |         probing_signal=None, probing_kws=None,DIRLOC=None):
 15 | 
 16 |     ''' Compute base pairing probability matrix for RNA sequence.
 17 | 
 18 |     Args:
 19 |     sequence (str): nucleic acid sequence
 20 |     T (float): temperature (Celsius)
 21 |     linear (bool): call LinearPartition to estimate Z in Vienna or Contrafold
 22 |     constraint (str): structure constraint (functional in vienna, contrafold, rnastructure)
 23 |     motif (str): argument to vienna motif
 24 |     pseudo (bool): (NUPACK only) include pseudoknot calculation
 25 |     dangles (bool): dangles or not, specifiable for vienna, nupack
 26 |     dna (bool): (NUPACK only) use SantaLucia 1998 parameters for DNA
 27 |     coaxial (bool): coaxial stacking or not, specifiable for rnastructure, vfold
 28 |     noncanonical(bool): include noncanonical pairs or not (for contrafold, RNAstructure (Cyclefold))
 29 |     beam size (int): Beam size for LinearPartition base pair calculation.
 30 |     DEBUG (bool): Output command-line calls to packages.
 31 |     threshknot (bool): calls threshknot to predict pseudoknots (for contrafold with LinearPartition)
 32 | 
 33 |     Possible packages: 'vienna_2', 'vienna_1','contrafold_1','contrafold_2',
 34 |     'nupack_95','nupack_99','rnasoft_2007','rnasoft_1999','rnastructure','vfold_0','vfold_1'
 35 | 
 36 |     Returns
 37 |     array: NxN matrix of base pair probabilities
 38 |   '''
 39 |     package = package.lower()
 40 |     try:
 41 |         pkg, version = package.split('_')
 42 |     except:
 43 |         pkg, version = package, None
 44 | 
 45 |     if motif is not None and pkg != 'vienna':
 46 |         raise ValueError('motif option can only be used with Vienna.')
 47 | 
 48 |     if pseudo and pkg != 'nupack':
 49 |         raise ValueError('pseudoknot option only implemented with Nupack.')
 50 | 
 51 |     if not dangles and pkg not in ['vienna','nupack']:
 52 |         print('Warning: %s does not support dangles options' % pkg)
 53 |     if not coaxial and pkg not in ['rnastructure','vfold']:
 54 |         print('Warning: %s does not support coaxial options' % pkg)
 55 |     if linear and pkg not in ['vienna','contrafold','eternafold']:
 56 |         print('Warning: LinearPartition only implemented for vienna, contrafold, eternafold.')
 57 | 
 58 |     if pkg=='nupack':
 59 |         return bpps_nupack_(sequence, version = version, dangles = dangles, T = T, pseudo=pseudo, dna=dna)
 60 | 
 61 |     elif pkg=='vfold':
 62 |         return bpps_vfold_(sequence, version = version, T = T, coaxial = coaxial)
 63 |     else:
 64 | 
 65 |         _, tmp_file = pfunc(sequence, package=package, bpps=True, linear=linear,
 66 |             motif=motif, constraint=constraint, T=T, coaxial=coaxial, probing_signal=probing_signal, probing_kws=probing_kws, DIRLOC=package_locs[package],
 67 |              dangles=dangles, param_file=param_file,reweight=reweight, beam_size=beam_size, DEBUG=DEBUG, threshknot=threshknot)
 68 | 
 69 |         if linear:
 70 |             #parse linearpartition output
 71 |             return bpps_linearpartition_(sequence, tmp_file)
 72 |         else:
 73 | 
 74 |             if 'contrafold' in pkg:
 75 |                 return bpps_contrafold_(sequence, tmp_file)
 76 |             if package=='eternafold':
 77 |                 return bpps_contrafold_(sequence, tmp_file)
 78 |             elif 'vienna' in pkg:
 79 |                 return bpps_vienna_(sequence, tmp_file)
 80 |             elif 'rnasoft' in pkg:
 81 |                 return bpps_rnasoft_(sequence, tmp_file)
 82 |             elif 'rnastructure' in pkg:
 83 |                 return bpps_rnastructure_(sequence, tmp_file, coaxial=coaxial)
 84 | 
 85 |             else:
 86 |                 raise RuntimeError('package not yet implemented')
 87 | 
 88 | def bpps_vienna_(sequence, tmp_file):
 89 | 
 90 |     dot_fname = tmp_file
 91 | 
 92 |     probs=np.zeros([len(sequence), len(sequence)])
 93 |     with open(dot_fname,'r') as f:
 94 |         for line in f.readlines():
 95 |             if 'ubox' in line:
 96 |                 try:
 97 |                     i, j, p, _ = line.split()
 98 |                     i, j, p = int(i)-1, int(j)-1, float(p)**2
 99 |                     probs[i,j] = p
100 |                     probs[j,i] = p
101 |                 except:
102 |                     pass
103 |     os.remove(dot_fname)
104 |     return probs
105 | 
106 | def bpps_contrafold_(sequence, tmp_file):
107 | 
108 |     fname = tmp_file
109 | 
110 |     probs=np.zeros([len(sequence), len(sequence)])
111 | 
112 |     for line in open(fname).readlines():
113 |         if len(line.split(':')) > 1:
114 |             first_ind = int(line.split()[0])-1
115 |             for x in line.split()[2:]:
116 |                 second_ind = int(x.split(':')[0])-1
117 |                 p = float(x.split(':')[1])
118 |                 probs[first_ind, second_ind] = p
119 |                 probs[second_ind, first_ind] = p
120 | 
121 |     os.remove(fname)
122 | 
123 |     return probs
124 | 
125 | def bpps_rnasoft_(sequence, tmp_file):
126 |     fname = tmp_file
127 | 
128 |     probs=np.zeros([len(sequence), len(sequence)])
129 |     for line in open(fname).readlines():
130 |         i,j,p = int(line.split()[0]), int(line.split()[1]), float(line.split()[2])
131 |         probs[i,j] = p
132 |         probs[j,i] = p
133 | 
134 |     os.remove(fname)
135 | 
136 |     return probs
137 | 
138 | def bpps_nupack_(sequence, version='95', T=37, dangles=True, pseudo=False,dna=False):
139 | 
140 |     if not version: version='95'
141 | 
142 |     nupack_materials={'95': 'rna1995', '99': 'rna1999'}
143 | 
144 |     if dna:
145 |         material='dna1998'
146 |     else:
147 |         material=nupack_materials[version]
148 | 
149 |     DIR = package_locs['nupack']
150 | 
151 |     if dangles:
152 |         dangle_option='some'
153 |     else:
154 |         dangle_option='none'
155 | 
156 |     seqfile = write([sequence])
157 | 
158 |     command=['%s/pairs' % DIR, '%s' % seqfile.replace('.in',''),
159 |       '-T', str(T), '-material', material, '-dangles', dangle_option, '-cutoff', '0.0000000001']
160 | 
161 |     if pseudo:
162 |         command.append('--pseudo')
163 |     p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE)
164 | 
165 |     stdout, stderr = p.communicate()
166 | 
167 |     if p.returncode:
168 |         raise Exception('Nupack pfunc failed: on %s\n%s' % (sequence, stderr))
169 | 
170 |     ppairs_file = '%s.ppairs' % seqfile.replace('.in','')
171 |     os.remove(seqfile)
172 | 
173 |     probs=np.zeros([len(sequence), len(sequence)])
174 | 
175 |     with open(ppairs_file, 'r') as f:
176 |         for line in f.readlines():
177 |             if not line.startswith('%'):
178 |                 fields = line.split()
179 |                 if len(fields) > 1:
180 |                     if int(fields[1]) <= len(sequence):
181 |                         i, j, p = int(fields[0])-1, int(fields[1])-1, float(fields[2])
182 |                         probs[i,j] = p
183 |                         probs[j,i] = p
184 |     os.remove(ppairs_file)
185 | 
186 |     return probs
187 | 
188 | def bpps_rnastructure_(sequence, tmp_file, coaxial=True, DEBUG=False):
189 | 
190 |     DIR = package_locs['rnastructure']
191 | 
192 |     pfsfile = tmp_file #'%s/rnastructtmp.pfs' % package_locs['TMP']
193 |     outfile = '%s.probs' % (tmp_file.replace('.pfs',''))
194 |     command = ['%s/ProbabilityPlot' % DIR, pfsfile, outfile, '-t', '-min', '0.0000000001']
195 | 
196 |     probs=np.zeros([len(sequence), len(sequence)])
197 | 
198 |     if DEBUG: print(' '.join(command))
199 |     p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE)
200 | 
201 |     stdout, stderr = p.communicate()
202 | 
203 |     if DEBUG:
204 |         print('stdout')
205 |         print(stdout)
206 |         print('stderr')
207 |         print(stderr)
208 | 
209 |     if p.returncode:
210 |         raise Exception('RNAstructure ProbabilityPlot failed: on %s\n%s' % (seq, stderr))
211 | 
212 |     with open(outfile, 'r') as f:
213 |         for line in f.readlines()[2:]:
214 |             fields = line.split()
215 |             i, j, p = int(fields[0])-1, int(fields[1])-1, 10**(-1*float(fields[2]))
216 |             probs[i,j] = p
217 |             probs[j,i] = p
218 | 
219 |     os.remove(outfile)
220 |     os.remove(pfsfile)
221 |     return probs
222 | 
223 | def bpps_vfold_(sequence, version='0',T=37, coaxial=True, DEBUG=False):
224 |     #available versions: 0 for Turner 04 params, 1 for Mfold 2.3 params
225 | 
226 |     DIR = package_locs["vfold"]
227 | 
228 |     cwd = os.getcwd()
229 |     os.chdir(DIR) #vfold precompiled binaries don't work being called from elsewhere
230 | 
231 |     if DEBUG: print(os.getcwd())
232 | 
233 |     seqfile = write([sequence])
234 | 
235 |     outfile = filename()+'.pij'
236 | 
237 |     if sys.platform=="linux":
238 |         platform='linux'
239 |     elif sys.platform=="darwin":
240 |         platform='mac'
241 |     elif sys.platform=="win32":
242 |         platform='win'
243 |     else:
244 |         raise RuntimeError('Vfold has binaries for linux, macOS, and win')
245 | 
246 |     command = ['./Vfold2d_npk_%s.o %d %d %s %s %d' % (platform, int(coaxial), T, seqfile, outfile, int(version))]
247 | 
248 |     if DEBUG: print(' '.join(command))
249 | 
250 |     p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE, shell=True)
251 | 
252 |     stdout, stderr = p.communicate()
253 |     os.chdir(cwd)
254 | 
255 |     if DEBUG:
256 |         print('stdout')
257 |         print(stdout)
258 |         print('stderr')
259 |         print(stderr)
260 |     if p.returncode:
261 |         raise Exception('Vfold2d_npk failed: on %s\n%s' % (sequence, stderr))
262 | 
263 |     os.remove(seqfile)
264 |     probs = np.zeros([len(sequence),len(sequence)])
265 |     p_ij_output = np.loadtxt(outfile,usecols=(0,2,3)) #col 0: set of inds 1, col 1: set of inds 2, col 2: bpp
266 | 
267 |     for i,j,p in p_ij_output:
268 |         probs[int(i-1),int(j-1)] = p
269 |         probs[int(j-1),int(i-1)] = p
270 |     os.remove(outfile)
271 | 
272 |     return probs
273 |     #output: take second field of last line for Z
274 | 
275 | 
276 | def bpps_linearpartition_(sequence, tmp_file):
277 | 
278 |     fname = tmp_file
279 | 
280 |     probs=np.zeros([len(sequence), len(sequence)])
281 | 
282 |     for line in open(fname,'r').readlines():
283 |         if len(line.strip())>0:
284 |             first_ind, second_ind, p = line.strip().split(' ')
285 |             first_ind = int(first_ind)-1
286 |             second_ind = int(second_ind)-1
287 |             p = float(p)
288 |             probs[first_ind, second_ind] = p
289 |             probs[second_ind, first_ind] = p
290 | 
291 |     os.remove(fname)
292 | 
293 |     return probs
294 | 


--------------------------------------------------------------------------------
/src/arnie/free_energy.py:
--------------------------------------------------------------------------------
 1 | import os, re, sys
 2 | import subprocess as sp
 3 | import random, string
 4 | import numpy as np
 5 | from .utils import *
 6 | from .pfunc import pfunc
 7 | 
 8 | DEBUG=False
 9 | 
10 | # load package locations from yaml file, watch! global dict
11 | package_locs = load_package_locations()
12 | 
13 | def free_energy(seq, constraint=None, package='vienna_2', T=37, coaxial=True, dna=False, beam_size=100,
14 | 		 pseudo=False, dangles=True, reweight=None, ensemble=True, param_file=None, linear=False,DEBUG=False):
15 | 	''' Compute free energy of RNA sequence. If structure is given, computes free energy of that structure. 
16 | 			Otherwise, returns MFE structure of sequence [NOT IMPLEMENTED YET].
17 | 
18 | 		Args:
19 | 		seq (str): nucleic acid sequence
20 | 		constraint (str, optional): possible structure to constrain to in dot bracket notation
21 | 		T (float): temperature (Celsius), default 37
22 | 
23 | 		ensemble (bool): to compute ensemble of constraint string or not.
24 | 			Just converts '.' to 'x' in string.
25 | 			If you want the free energy of just one structure,
26 | 			better practice is to use 'x' to denote unpaired. 
27 | 
28 | 
29 | 		motif (str): argument to vienna motif 
30 | 		beam_size (int): beam size for use in LinearPartition (Vienna, CONTRAfold, EternaFold only)
31 | 		dangles (bool): dangles or not, specifiable for vienna, nupack
32 |                 dna (bool): use SantaLucia model for DNA (NUPACK only)
33 | 		coaxial (bool): coaxial stacking or not, specifiable for rnastructure, vfold
34 | 		noncanonical(bool): include noncanonical pairs or not (for contrafold, RNAstructure (Cyclefold))
35 | 		pseudo (bool): include pseudoknot (nupack only)
36 | 		Implemented packages: 
37 | 		'vienna_1', 'vienna_2', 'contrafold'
38 | 
39 | 		NB: doesn't multiply by kT for contrafold...
40 | 		
41 | 	Returns
42 | 		free energy (float)
43 | 	'''
44 | 	if not ensemble:
45 | 		constraint = constraint.replace('.','x')
46 | 
47 | 	return pfunc(seq, package=package, T=T, dangles=dangles, coaxial=coaxial, pseudo=pseudo, dna=dna, beam_size = beam_size,
48 | 	 constraint=constraint, reweight=reweight, param_file=param_file, return_free_energy=True, linear=linear, DEBUG=DEBUG)
49 | 
50 | 	# if package.lower().startswith('contrafold'):
51 | 	# 	Z_constrained = pfunc(seq, package=package, T=T, dangles=dangles, constraint=constraint,param_file=param_file)
52 | 
53 | 	# 	return -1* np.log(Z_constrained) # .00198 is k in kcal/mol #0.0019899*(273+T) * 
54 | 	# else:
55 | 	# 	raise RuntimeError("%s `free_energy` not implemented yet" % package)
56 | 


--------------------------------------------------------------------------------
/src/arnie/mea/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DasLab/arnie/660de8139bd2198bbe115adadd5bc5f12183f9f4/src/arnie/mea/__init__.py


--------------------------------------------------------------------------------
/src/arnie/mea/mea.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import argparse, sys
  3 | from arnie.mea.mea_utils import *
  4 | from copy import copy
  5 | 
  6 | class MEA:
  7 |     def __init__(self, bpps, gamma = 1.0, debug=False, run_probknot_heuristic = False, theta=0, stochastic=False):
  8 |         self.debug = debug
  9 |         self.bpps = bpps
 10 |         self.N=self.bpps.shape[0]
 11 |         self.gamma = gamma
 12 |         self.theta = theta
 13 |         self.W = np.zeros([self.N,self.N])
 14 |         self.MEA_bp_list = []
 15 |         self.structure = ['.']*self.N
 16 |         self.MEA_bp_matrix = np.zeros([self.N, self.N])
 17 |         self.tb = np.zeros([self.N, self.N])
 18 |         self.min_hp_length = 3
 19 |         self.evaluated = False
 20 |         self.stochastic = stochastic
 21 | 
 22 |         if run_probknot_heuristic:
 23 |             self.run_ProbKnot()
 24 |         else:
 25 |             self.run_MEA()
 26 |         
 27 |     def fill_W(self, i, j):
 28 |         if self.stochastic:
 29 |             options = [self.W[i+1, j], self.W[i, j-1],\
 30 |              (self.gamma+1)*self.bpps[i,j] + self.W[i+1, j-1] - 1,\
 31 |             np.max([self.W[i,k] + self.W[k+1, j] for k in range(i+1,j)])]
 32 |             option_wts = options - np.min(options)
 33 |             option_wts /= np.sum(option_wts)
 34 |             selection = np.random.choice([0,1,2,3],p=option_wts)
 35 |             self.W[i,j] = options[selection]
 36 |             self.tb[i,j] = selection #0: 5' pass, 1: 3' pass, 2: bp, 3: multiloop
 37 | 
 38 |         else:
 39 |             options = [self.W[i+1, j], self.W[i, j-1],\
 40 |              (self.gamma+1)*self.bpps[i,j] + self.W[i+1, j-1] - 1,\
 41 |             np.max([self.W[i,k] + self.W[k+1, j] for k in range(i+1,j)])]
 42 |             self.W[i,j] = np.max(options) 
 43 |             self.tb[i,j] = np.argmax(options) #0: 5' pass, 1: 3' pass, 2: bp, 3: multiloop
 44 |             
 45 |     def run_MEA(self):
 46 |         # fill weight matrix
 47 |         for length in range(self.min_hp_length, self.N):
 48 |             for i in range(self.N-length):
 49 |                 j = i + length
 50 |                 self.fill_W(i,j)
 51 |                 
 52 |         self.traceback(0,self.N-1)
 53 |         
 54 |         for x in self.MEA_bp_list:
 55 |             self.MEA_bp_matrix[x[0],x[1]]=1
 56 |             self.structure[x[0]]='('
 57 |             self.structure[x[1]]=')'
 58 |         
 59 |         self.structure = ''.join(self.structure)
 60 |         if not self.evaluated: self.evaluated = True
 61 | 
 62 |     def run_ProbKnot(self):
 63 | 
 64 |         #Threshknot step: filter out bps below cutoff theta
 65 |         threshknot_filter = np.where(self.bpps <= self.theta)
 66 |         filtered_bpps = copy(self.bpps)
 67 |         filtered_bpps[threshknot_filter] = 0
 68 | 
 69 |         output = np.zeros([self.N, self.N])
 70 |         
 71 |         # ProbKnot heuristic part 1: get all base pairs where p(ij) == p_max(i)
 72 |         output[np.where(self.bpps == np.max(self.bpps, axis=0))] = 1
 73 |         
 74 |         # ProbKnot heuristic part 2: get all base pairs where p(ij) == p_max(j)
 75 |         self.MEA_bp_matrix = np.clip(output+np.transpose(output)-1,0,1)
 76 | 
 77 |         for [i, j] in np.array(np.where(self.MEA_bp_matrix == 1)).T:
 78 |             if np.abs(i - j) > 1:
 79 |                 if [j,i] not in self.MEA_bp_list:
 80 |                     self.MEA_bp_list.append([i,j])
 81 |                     #self.structure[i] = '('
 82 |                     #self.structure[j] = ')'
 83 |         #print('Warning: formatting pseudoknotted dot-bracket structures not yet supported. Any pseudoknotted stems will only appear as parentheses (not brackets).')
 84 |         #self.structure = ''.join(self.structure)
 85 |         self.structure = convert_bp_list_to_dotbracket(self.MEA_bp_list,len(self.bpps))
 86 | 
 87 |         if not self.evaluated: self.evaluated = True
 88 | 
 89 |     def traceback(self, i, j):
 90 |         if j <= i:
 91 |             return
 92 |         elif self.tb[i,j] == 0: #5' neighbor
 93 |             if self.debug: print(i,j, "5'")
 94 |             self.traceback(i+1,j)
 95 |         elif self.tb[i,j] == 1: #3' neighbor
 96 |             if self.debug: print(i,j, "3'")
 97 |             self.traceback(i,j-1)
 98 |         elif self.tb[i,j] == 2: # base pair
 99 |             if self.debug: print(i,j,'bp')
100 |             self.MEA_bp_list.append((i,j))
101 |             self.traceback(i+1,j-1)
102 |         else: #multiloop
103 |             for k in range(i+1,j):
104 |                 if self.W[i,j] == self.W[i, k] + self.W[k+1,j]:
105 |                     if self.debug: print(i,j,"multiloop, k=",k)
106 |                     self.traceback(i,k)
107 |                     self.traceback(k+1,j)
108 |                     break
109 | 
110 |     def score_expected(self):
111 |         '''Compute expected values of TP, FP, etc from predicted MEA structure.
112 | 
113 |          Returns: 
114 |          pseudoexpected SEN, PPV, MCC, F-score'''
115 | 
116 |         if not self.evaluated: 
117 |             if run_probknot_heuristic:
118 |                 self.run_ProbKnot()
119 |             else:
120 |                 self.run_MEA()
121 | 
122 |         pred_m = self.MEA_bp_matrix[np.triu_indices(self.N)]
123 |         probs = self.bpps[np.triu_indices(self.N)]
124 | 
125 |         TP = np.sum(np.multiply(pred_m, probs)) + 1e-6
126 |         TN = 0.5*self.N*self.N-1 - np.sum(pred_m) - np.sum(probs) + TP + 1e-6
127 |         FP = np.sum(np.multiply(pred_m, 1-probs)) + 1e-6
128 |         FN = np.sum(np.multiply(1-pred_m, probs)) + 1e-6
129 | 
130 |         a,b = np.triu_indices(self.N)
131 |         cFP = 1e-6
132 |         # for i in range(len(pred_m)):
133 |         #     if np.sum(self.MEA_bp_matrix,axis=0)[a[i]] + np.sum(self.MEA_bp_matrix,axis=0)[b[i]]==0:
134 |         #        cFP += np.multiply(pred_m[i], 1-probs[i])
135 | 
136 |         sen = TP/(TP + FN)
137 |         ppv = TP/(TP + FP - cFP)
138 |         mcc = (TP*TN - (FP - cFP)*FN)/np.sqrt((TP + FP - cFP)*(TP + FN)*(TN + FP - cFP)*(TN + FN))
139 |         fscore = 2*TP/(2*TP + FP - cFP + FN)
140 | 
141 |         return [sen, ppv, mcc, fscore]
142 | 
143 |     def score_ground_truth(self, ground_truth_struct, allow_pseudoknots=False):
144 |         if len(ground_truth_struct[0])==1:
145 |             gt_matrix = convert_dotbracket_to_matrix(ground_truth_struct)
146 |         else:
147 |             gt_matrix = ground_truth_struct
148 | 
149 |         if not self.evaluated: self.run_MEA()
150 |         sen, ppv, mcc, fscore, _ = score_ground_truth(self.MEA_bp_matrix, gt_matrix)
151 |         return [sen, ppv, mcc, fscore]
152 | 


--------------------------------------------------------------------------------
/src/arnie/mea/mea_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import argparse, sys
  3 | 
  4 | def convert_dotbracket_to_matrix(s):
  5 |     m = np.zeros([len(s),len(s)])
  6 |     for char_set in [['(',')'], ['[',']'],['{','}'],['<','>']]:
  7 |         bp1=[]
  8 |         bp2=[]
  9 |         for i, char in enumerate(s):
 10 |             if char==char_set[0]:
 11 |                 bp1.append(i)
 12 |             if char==char_set[1]:
 13 |                 bp2.append(i)
 14 |         for i in list(reversed(bp1)):
 15 |             for j in bp2:
 16 |                 if j > i:
 17 |                     m[i,j]=1.0
 18 |                     bp2.remove(j)
 19 |                     break
 20 |     return m
 21 | 
 22 | 
 23 | 
 24 | def convert_matrix_to_dotbracket(m):
 25 |     bp_list = convert_matrix_to_bp_list(m)
 26 |     return convert_bp_list_to_dotbracket(bp_list,len(m))
 27 | 
 28 | def convert_matrix_to_bp_list(m):
 29 |     bp_list = [] # convert adjacency matrix to adjacency list
 30 |     for i,row in enumerate(m):
 31 |         for j,is_bp in enumerate(row[i+1:]):
 32 |             if is_bp:
 33 |                 bp_list.append([i,i+1+j])
 34 |     return bp_list
 35 | 
 36 | 
 37 | def convert_bp_list_to_dotbracket(bp_list,seq_len):
 38 |     dotbracket = "."*seq_len
 39 |     # group into bps that are not intertwined and can use same brackets!
 40 |     groups = group_into_non_conflicting_bp_(bp_list)
 41 | 
 42 |     # all bp that are not intertwined get (), but all others are
 43 |     # groups to be nonconflicting and then asigned (), [], {}, <> by group
 44 |     chars_set = [("(",")"),("(",")"),("[","]"),("{","}"),("<",">")]
 45 |     if len(groups) > len(chars_set):
 46 |         print("WARNING: PK too complex, not enough brackets to represent it.")
 47 | 
 48 |     for group,chars in zip(groups,chars_set):
 49 |         for bp in group:
 50 |             dotbracket = dotbracket[:bp[0]] + chars[0] + dotbracket[bp[0]+1:bp[1]] + chars[1] + dotbracket[bp[1]+1:]
 51 |     return dotbracket
 52 | 
 53 | 
 54 | def load_matrix_or_dbn(s):
 55 |     num_lines = sum(1 for line in open(s))
 56 | 
 57 |     if num_lines > 2: #heuristic here
 58 |         struct = np.loadtxt(s) # load as base pair matrix
 59 |         assert struct.shape[0] == struct.shape[1]
 60 |     else:
 61 |         try: # load as dot-bracket string
 62 | 
 63 |             dbn_struct = open(s,'r').read().rstrip()
 64 | 
 65 |             struct = convert_dotbracket_to_matrix(dbn_struct)
 66 |         except:
 67 |             raise ValueError('Unable to parse structure %s' % s)
 68 |     return struct
 69 | 
 70 | def score_ground_truth(pred_matrix, true_matrix):
 71 |     '''Score a predicted structure against a true structure,
 72 |      input as NxN base pair matrix (takes top triangle).'''
 73 | 
 74 |     N = pred_matrix.shape[0]
 75 |     #print('pred',pred_matrix.shape, 'true', true_matrix.shape)
 76 |     assert pred_matrix.shape[1] == N
 77 |     assert true_matrix.shape[0] == N
 78 |     assert true_matrix.shape[1] == N
 79 | 
 80 |     true = true_matrix[np.triu_indices(N)]
 81 |     pred = pred_matrix[np.triu_indices(N)]
 82 | 
 83 |     TP, FP, cFP, TN, FN = 0, 0, 0, 0, 0
 84 | 
 85 |     for i in range(len(true)):
 86 |         if true[i] == 1:
 87 |             if pred[i] == 1:
 88 |                 TP += 1
 89 |             else:
 90 |                 FN += 1
 91 |         elif true[i] == 0:
 92 |             if pred[i] == 0:
 93 |                 TN += 1
 94 |             else:
 95 |                 FP += 1
 96 |                 #check for compatible false positive
 97 |                 a,b = np.triu_indices(N)
 98 |                 if np.sum(true_matrix,axis=0)[a[i]]+ np.sum(true_matrix,axis=0)[b[i]]==0:
 99 |                    cFP +=1
100 | 
101 |     # cFP = 0 #for debugging
102 | 
103 |     #print('TP', TP, 'TN', TN, 'FP', FP, 'FN', FN, 'cFP', cFP)
104 | 
105 |     if TP + FN == 0:
106 |         sen = 1
107 |     else:
108 |         sen = TP/(TP + FN)
109 | 
110 |     if TP + FP - cFP == 0:
111 |         ppv = 1
112 |     else:
113 |         ppv = TP/(TP + FP - cFP)
114 | 
115 |     mcc_num = (TP*TN - (FP - cFP)*FN)
116 |     mcc_denom = np.sqrt((TP + FP - cFP)*(TP + FN)*(TN + FP - cFP)*(TN + FN))
117 | 
118 |     if  mcc_denom == 0:
119 |         mcc = mcc_num
120 |     else:
121 |         mcc = mcc_num/mcc_denom
122 | 
123 |     if ppv + sen == 0:
124 |         fscore = 0
125 |     else:
126 |         fscore = 2*ppv*sen/(ppv+sen)
127 | 
128 |     return sen, ppv, mcc, fscore, N
129 | 
130 | 
131 | def group_into_non_conflicting_bp_(bp_list):
132 |     ''' given a conflict list from get_list_bp_conflicts_, group basepairs into groups that do not conflict
133 | 
134 |     Args
135 |         conflict_list: list of pairs of base_pairs that are intertwined basepairs
136 | 
137 |     Returns:
138 |         groups of baspairs that are not intertwined
139 |     '''
140 |     conflict_list = get_list_bp_conflicts_(bp_list)
141 | 
142 |     non_redudant_bp_list = get_non_redudant_bp_list_(conflict_list)
143 |     bp_with_no_conflict = [bp for bp in bp_list if bp not in non_redudant_bp_list]
144 |     groups = [bp_with_no_conflict]
145 |     while non_redudant_bp_list != []:
146 |         current_bp = non_redudant_bp_list[0]
147 |         current_bp_conflicts = []
148 |         for conflict in conflict_list:
149 |             if current_bp == conflict[0]:
150 |                 current_bp_conflicts.append(conflict[1])
151 |             elif current_bp == conflict[1]:
152 |                 current_bp_conflicts.append(conflict[0])
153 |         group = [bp for bp in non_redudant_bp_list if bp not in current_bp_conflicts]
154 |         groups.append(group)
155 |         non_redudant_bp_list = current_bp_conflicts
156 |         conflict_list = [conflict for conflict in conflict_list if conflict[0] not in group and conflict[1] not in group]
157 |     return groups
158 | 
159 | 
160 | def get_list_bp_conflicts_(bp_list):
161 |     '''given a bp_list gives the list of conflicts bp-s which indicate PK structure
162 |     Args:
163 |         bp_list: of list of base pairs where the base pairs are list of indeces of the bp in increasing order (bp[0]<bp[1])
164 |     returns:
165 |         List of conflicting basepairs, where conflicting is pairs of base pairs that are intertwined.
166 |     '''
167 |     if len(bp_list) <= 1:
168 |         return []
169 |     else:
170 |         current_bp = bp_list[0]
171 |         conflicts = []
172 |         for bp in bp_list[1:]:
173 |             if (bp[0] < current_bp[1] and current_bp[1] < bp[1]):
174 |                 conflicts.append([current_bp,bp])
175 |         return conflicts + get_list_bp_conflicts_(bp_list[1:])
176 | 
177 | def get_non_redudant_bp_list_(conflict_list):
178 |     ''' given a conflict list get the list of nonredundant basepairs this list has
179 | 
180 |     Args:
181 |         conflict_list: list of pairs of base_pairs that are intertwined basepairs
182 |     returns:
183 |         list of basepairs in conflict list without repeats
184 |     '''
185 |     non_redudant_bp_list = []
186 |     for conflict in conflict_list:
187 |         if conflict[0] not in non_redudant_bp_list:
188 |             non_redudant_bp_list.append(conflict[0])
189 |         if conflict[1] not in non_redudant_bp_list:
190 |             non_redudant_bp_list.append(conflict[1])
191 |     return non_redudant_bp_list
192 | 


--------------------------------------------------------------------------------
/src/arnie/mea/threshknot.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from arnie.bpps import bpps
 3 | 
 4 | def threshknot_util(sequence, package='vienna_2', theta=0):
 5 |     '''
 6 |     Inputs:
 7 |     sequence: RNA sequence
 8 |     package: folding package to use
 9 |     
10 |     Set theta = 0 to not filter base pairs as in ThreshKnot.
11 |     
12 |     Returns: N x N matrix of base pair probabilities. Nonzero entries represent base pairs
13 |     predicted in final (possibly pseudoknotted) structure.
14 |     Probabilities are their associated probability (obvs).
15 |     '''
16 |     
17 |     bp_matrix = bpps(sequence, package=package)
18 |     
19 |     # if desired, filter base pair probabilities below a cutoff
20 |     bp_matrix[np.where(bp_matrix <= theta)] = 0
21 |     output = np.zeros([len(sequence),len(sequence)])
22 |     
23 |     # ProbKnot heuristic part 1: get all base pairs where p(ij) == p_max(i)
24 |     output[np.where(bp_matrix == np.max(bp_matrix,axis=0))] = 1
25 |     
26 |     # ProbKnot heuristic part 2: get all base pairs where p(ij) == p_max(j)
27 |     array_of_bps = np.clip(output+np.transpose(output)-1,0,1)
28 |     
29 |     # setting all bp probabilities not corresponding to a final selected base pair to zero
30 |     bp_matrix[np.where(array_of_bps == 0)] = 0
31 |     
32 |     return bp_matrix
33 | 


--------------------------------------------------------------------------------
/src/arnie/mfe.py:
--------------------------------------------------------------------------------
  1 | import os, re, sys, shutil
  2 | import subprocess as sp
  3 | import random, string
  4 | import numpy as np
  5 | from .utils import *
  6 | 
  7 | DEBUG=False
  8 | 
  9 | # load package locations from yaml file, watch! global dict
 10 | package_locs = load_package_locations()
 11 | 
 12 | def mfe(seq, package='vienna_2', T=37,
 13 |     constraint=None, motif=None,
 14 |     linear=False, return_dG_MFE = False,
 15 |     dangles=True, noncanonical=False, beam_size=100,
 16 |     bpps=False, param_file=None, coaxial=True, reweight=None,viterbi = False,
 17 |     probing_signal=None,probing_kws=None, pseudo=False,
 18 |     shape_signal=None, dms_signal=None, shape_file=None, dms_file=None, **kwargs):
 19 | 
 20 |     ''' Compute MFE structure (within package) for RNA sequence.
 21 |     Note: this is distinct from the arnie MEA codebase, which takes any base pair probability matrix and computes the maximum expected accuracy structure.
 22 |     That said, Contrafold's default structure prediction is an MEA structure, not MFE.  In this module, calling Contrafold returns the default MEA structure unless the 
 23 |     --viterbi flag is used, which will do the viterbi (MFE) algorithm in contrafold.
 24 | 
 25 |         Args:
 26 |         seq (str): nucleic acid sequence
 27 |         T (float): temperature (Celsius)
 28 |         constraint (str): structure constraints
 29 |         linear (bool): call LinearFold to estimate MFE in Vienna or Contrafold
 30 |         motif (str): argument to vienna motif 
 31 |         return_dG_MFE (bool): also return dG(MFE) (specific to linearfold)
 32 |         dangles (bool): dangles or not, specifiable for vienna, nupack
 33 |         coaxial (bool): coaxial stacking or not, specifiable for rnastructure, vfold
 34 |         noncanonical(bool): include noncanonical pairs or not (for contrafold, RNAstructure (Cyclefold))
 35 |         shape_signal(list): list of normalized SHAPE reactivities, with negative values indicating no signal
 36 |         dms_signal(list): list of normalized DMS reactivities, with negative values indicating no signal
 37 |         pseudo: if True, will predict pseudoknots
 38 | 
 39 |         Possible packages: 
 40 |         'vienna_2', 'vienna_1','contrafold_1','contrafold_2', 'rnastructure'
 41 |         
 42 |     Returns
 43 |         string: MFE structure
 44 |     '''
 45 | 
 46 |     # TODO: update to be just probing_signal
 47 |     # if shape_signal:
 48 |     #     print('Warning: shape_signal is deprecated, use probing_signal')
 49 |     #     probing_signal = copy(shape_signal)
 50 | 
 51 |     # if dms_signal:
 52 |     #     print('Warning: dms_signal is deprecated, use probing_signal')
 53 |     #     probing_signal = copy(dms_signal)
 54 | 
 55 |     try:
 56 |         pkg, version = package.lower().split('_')
 57 |     except:
 58 |         pkg, version = package.lower(), None
 59 | 
 60 |     if not bpps: # if bpps, already printed these warnings
 61 |         if not dangles and pkg not in ['vienna', 'nupack']:
 62 |             print('Warning: %s does not support dangles options' % pkg)
 63 |         if not coaxial and pkg not in ['rnastructure', 'vfold']:
 64 |             print('Warning: %s does not support coaxial options' % pkg)
 65 | 
 66 |     if linear and pkg not in ['vienna','contrafold','eternafold']:
 67 |         print('Warning: LinearFold only implemented for vienna, contrafold, eternafold.')
 68 | 
 69 |     if pseudo and not pkg in ['rnastructure', 'nupack']:
 70 |         print('Warning: %s and pseudoknots not supported in Arnie yet' % pkg)
 71 | 
 72 |     if pkg=='vienna':
 73 |         if linear:
 74 |             if return_dG_MFE:
 75 |                 struct, dG_MFE = mfe_linearfold_(seq, package='vienna', return_dG_MFE=return_dG_MFE, beam_size=beam_size)
 76 |             else:
 77 |                 struct = mfe_linearfold_(seq, package='vienna', return_dG_MFE=return_dG_MFE)
 78 |         else:
 79 |             struct = mfe_vienna_(seq, version=version, T=T, dangles=dangles, constraint=constraint, motif=motif, param_file=param_file,
 80 |                 reweight=reweight, probing_signal=probing_signal, **kwargs)
 81 |  
 82 |     elif pkg=='contrafold':
 83 |         if linear:
 84 |             if return_dG_MFE:
 85 |                 struct, dG_MFE = mfe_linearfold_(seq, package='contrafold', return_dG_MFE=return_dG_MFE, beam_size=beam_size)
 86 |             else:
 87 |                 struct = mfe_linearfold_(seq, package='contrafold', return_dG_MFE=return_dG_MFE)
 88 |         else:
 89 |             struct = mfe_contrafold_(seq, version=version, T=T, constraint=constraint, param_file=param_file,viterbi=viterbi)
 90 | 
 91 |     elif pkg=='eternafold':
 92 |         if linear:
 93 |             if return_dG_MFE:
 94 |                 struct, dG_MFE = mfe_linearfold_(seq, package='eternafold', return_dG_MFE=return_dG_MFE, beam_size=beam_size)
 95 |             else:
 96 |                 struct = mfe_linearfold_(seq, package='eternafold', return_dG_MFE=return_dG_MFE)
 97 | 
 98 |         else:
 99 | 
100 |             if 'eternafoldparams' in package_locs.keys() and 'eternafold' not in package_locs.keys():
101 |                 struct = mfe_contrafold_(seq, version=version, T=T, constraint=constraint, param_file=package_locs['eternafoldparams'],viterbi=viterbi)
102 | 
103 |             elif 'eternafold' in package_locs.keys():
104 |                 
105 |                 # Using eternafold code and params in eternafold codebase
106 |                 efold_param_file = os.environ['ETERNAFOLD_PARAMETERS'] if os.environ.get('ETERNAFOLD_PARAMETERS') else package_locs['eternafold']+'/../parameters/EternaFoldParams.v1'
107 |                 if not os.path.exists(efold_param_file):
108 |                     raise RuntimeError('Error: Parameters not found at %s' % efold_param_file)
109 |                 else:
110 |                     struct = mfe_contrafold_(seq, version=version, T=T, constraint=constraint, DIRLOC=package_locs['eternafold'],
111 |                         param_file=efold_param_file,viterbi=viterbi, probing_signal=probing_signal, probing_kws=probing_kws)
112 | 
113 | 
114 |     elif pkg=='rnastructure':
115 |         if linear:
116 |             raise ValueError('package %s is not supported with linearfold.' % package)
117 |         else:
118 |             struct = mfe_rnastructure_(seq, version=version, T=T, constraint=constraint, 
119 |                 probing_signal=probing_signal,
120 |                 param_file=param_file, shape_signal=shape_signal, dms_signal=dms_signal, 
121 |                 shape_file=shape_file, dms_file=dms_file, pseudo = pseudo)
122 |     else:
123 |         raise ValueError('package %s not understood.' % package)
124 | 
125 |     if return_dG_MFE:
126 |         return struct, dG_MFE
127 |     else:
128 |         return struct
129 | 
130 | def mfe_vienna_(seq, T=37, version='2', constraint=None, motif=None, param_file=None, dangles=True, reweight=None,
131 |     probing_signal=None, shapeMethod='W', probing_kws=None, **kwargs):
132 |     """get minimum free energy structure with Vienna
133 | 
134 |     Args:
135 |         seq (str): nucleic acid sequence
136 |         T (float): temperature
137 |         constraint (str): structure constraints
138 |         motif (str): argument to vienna motif  
139 |     Returns
140 |         str: secondary structure representation for MFE
141 |     """
142 | 
143 |     if not version:
144 |         version='2'
145 | 
146 |     if version.startswith('2'):
147 |         LOC=package_locs['vienna_2']
148 |     elif version.startswith('1'):
149 | 
150 |         LOC=package_locs['vienna_1']
151 | 
152 |     else:
153 |         raise RuntimeError('Error, vienna version %s not present' % version)
154 | 
155 |     if constraint is not None:
156 |         fname = write([seq, constraint])
157 |         command.append('-C')
158 |         command.append('--enforceConstraint')
159 |     else:
160 |         fname = write([seq])
161 | 
162 |     command = ['%s/RNAfold' % LOC, '-T', str(T), '-p0'] #p0 doesn't predict bpps, saves time
163 |     if motif is not None:
164 |         command.append('--motif=%s' % motif)
165 | 
166 |     if probing_signal is not None:
167 |         if probing_kws is None:
168 |             probing_kws={}
169 | 
170 |         if shapeMethod=='W':
171 |             probing_file = run_RNAPVmin(probing_signal, seq, LOC, DEBUG, **probing_kws)
172 | 
173 |         elif shapeMethod=='D' or shapeMethod=='Z':
174 |             probing_file = write_reactivity_file_RNAstructure(probing_signal)
175 |             command.append('--shapeConversion=O')
176 | 
177 |         command.append('--shape=%s' % probing_file)
178 |         command.append('--shapeMethod=%s' % shapeMethod)
179 | 
180 |     if not dangles:
181 |         command.append('--dangles=0')
182 |         
183 |     if reweight is not None:
184 |         command.append('--commands=%s' % reweight)
185 | 
186 |     if param_file:
187 |         command.append('--paramFile=%s' % param_file)
188 | 
189 |     with open(fname) as f:
190 |         if DEBUG: print(fname)
191 |         if DEBUG: print(' '.join(command))
192 |         p = sp.Popen(command, stdin=f, stdout=sp.PIPE, stderr=sp.PIPE)
193 |     stdout, stderr = p.communicate()
194 | 
195 |     if DEBUG:
196 |         print('stdout')
197 |         print(stdout)
198 |         print('stderr')
199 |         print(stderr)
200 | 
201 |     if p.returncode:
202 |         raise Exception('RNAfold failed: on %s\n%s' % (seq, stderr))
203 |     os.remove(fname)
204 |     try:
205 |         os.remove('rna.ps')
206 |     except OSError:
207 |         pass
208 | 
209 |     if 'omitting constraint' in stderr.decode('utf-8'):
210 |         raise ValueError('Constraint caused impossible structure')
211 |     else:
212 |         return stdout.decode('utf-8').split('\n')[1].split(' ')[0]
213 | 
214 | def mfe_rnastructure_(seq, T=24, version=None, constraint=None, param_file=None, probing_signal=None,probing_kws=None,
215 |     shape_signal=None, dms_signal=None, shape_file=None, dms_file=None, pseudo=False):
216 |     """get minimum free energy structure
217 |         with SHAPE or DMS data, uses the default slope and intercept in RNAStructure
218 | 
219 |     Args:
220 |         seq (str): nucleic acid sequence
221 |         T (float): temperature
222 |     Returns
223 |         float: MFE structure
224 |     """
225 | 
226 |     if probing_signal is not None:
227 |         shape_signal = probing_signal
228 | 
229 |     if param_file is not None:
230 |         raise ValueError('Cannot run RNAstructure with non-default RNA parameters as specified in: %s' % param_file)
231 |     if version is not None:
232 |         raise ValueError('Cannot run RNAstructure with non-default version: %s' % version)
233 |     if (shape_signal is not None) and (shape_file is not None):
234 |         raise ValueError('Please specify SHAPE reactivities either as a list or in a SHAPE reactivity file')
235 |     if (dms_signal is not None) and (dms_file is not None):
236 |         raise ValueError('Please specify DMS reactivities either as a list or in a DMS reactivity file')
237 | 
238 |     LOC=package_locs['rnastructure']
239 | 
240 |     seq_file = write(['>sequence', seq])
241 |     ct_fname = '%s.ct' % filename()
242 | 
243 |     command = []
244 |     if not pseudo:
245 |         command = command + ['%s/Fold' % LOC, seq_file, ct_fname, '-T', str(T + 273.15)]
246 |     else:
247 |         command = command + ['%s/ShapeKnots' % LOC, seq_file, ct_fname]
248 |         # if dms_signal is not None:
249 |         #     raise ValueError('Cannot run RNAstructure with DMS signal and pseudoknots.')
250 |         if constraint is not None:
251 |             raise ValueError('Cannot run RNAstructure with constraints and pseudoknots.')
252 |     
253 |     con_fname = None
254 |     dms_fname = None
255 |     shape_fname = None
256 | 
257 |     if constraint is not None:
258 |         con_fname = '%s.CON' % filename()
259 |         convert_dbn_to_RNAstructure_input(seq, constraint, con_fname)
260 |         command.extend(['--constraint', con_fname])
261 | 
262 |     if dms_signal is not None:
263 |         if len(dms_signal) != len(seq):
264 |             raise RuntimeError('DMS signal used with RNAstructure must have same length as the sequence.')
265 |         dms_fname = write_reactivity_file_RNAstructure(dms_signal)
266 |         command.extend(['--DMS', dms_fname])
267 | 
268 |     if dms_file is not None:
269 |         command.extend(['--DMS', dms_file])
270 | 
271 |     if shape_signal is not None:
272 |         if len(shape_signal) != len(seq):
273 |             raise RuntimeError('SHAPE signal used with RNAstructure must have same length as the sequence.')
274 |         shape_fname = write_reactivity_file_RNAstructure(shape_signal)
275 |         command.extend(['--SHAPE', shape_fname])
276 | 
277 |     if shape_file is not None:
278 |         command.extend(['--SHAPE', shape_file])
279 | 
280 |     if DEBUG: print(' '.join(command))
281 | 
282 |     p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE)
283 | 
284 |     stdout, stderr = p.communicate()
285 | 
286 |     if DEBUG:
287 |         print('stdout')
288 |         print(stdout)
289 |         print('stderr')
290 |         print(stderr)
291 |     if p.returncode:
292 |         raise Exception('RNAstructure failed: on %s\n%s' % (seq, stderr))
293 | 
294 |     if con_fname is not None:
295 |         os.remove(con_fname)
296 |     if dms_fname is not None:
297 |         os.remove(dms_fname)
298 |     if shape_fname is not None:
299 |         os.remove(shape_fname)
300 |     if seq_file is not None:
301 |         os.remove(seq_file)
302 | 
303 |     dot_fname = '%s.dbn' % filename()
304 |     command = ['%s/ct2dot' % LOC, ct_fname, "1", dot_fname]
305 | 
306 |     if DEBUG: print(' '.join(command))
307 | 
308 |     p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE)
309 | 
310 |     stdout, stderr = p.communicate()
311 | 
312 |     if DEBUG:
313 |         print('stdout')
314 |         print(stdout)
315 |         print('stderr')
316 |         print(stderr)
317 |     if p.returncode:
318 |         raise Exception('RNAstructure ct2dot failed: on %s\n%s' % (seq, stderr))
319 | 
320 |     f = open(dot_fname)
321 |     dot_lines = f.readlines()        
322 |     f.close()
323 | 
324 |     mfe_struct = dot_lines[-1].strip('\n')
325 | 
326 |     os.remove(ct_fname)
327 |     os.remove(dot_fname)
328 | 
329 |     return mfe_struct
330 | 
331 | def mfe_contrafold_(seq, T=37, version='2', constraint=None, param_file=None,DIRLOC=None,
332 |     viterbi=False, probing_signal=None, probing_kws=None):
333 |     """get MFE structure for Contrafold
334 | 
335 |     Args:
336 |         seq (str): nucleic acid sequence
337 |         T (float): temperature
338 |         constraint (str): structure constraints
339 |         motif (str): argument to vienna motif  
340 |     Returns
341 |         secondary structure dot-bracket string for MFE
342 |     """
343 |     if not version: version='2'
344 | 
345 |     if probing_signal is not None:
346 |         fname = write_reactivity_file_contrafold(probing_signal, seq)
347 |     else:
348 |         fname = '%s.in' % filename()
349 | 
350 |     if DIRLOC is not None:
351 |         LOC=DIRLOC
352 |     elif version.startswith('2'):
353 |         LOC=package_locs['contrafold_2']
354 |     elif version.startswith('1'):
355 |         LOC=package_locs['contrafold_1']
356 |     else:
357 |         raise RuntimeError('Error, Contrafold version %s not present' % version)
358 | 
359 |     command = ['%s/contrafold' % LOC, 'predict', fname]
360 | 
361 |     if probing_signal is not None:
362 |         command = command + ['--evidence', '--params', package_locs['eternafold']+'/../parameters/EternaFoldParams_PLUS_POTENTIALS.v1', '--numdatasources','1', ]
363 |         if probing_kws is not None:
364 |             if 'kappa' in probing_kws.keys():
365 |                 command = command + ['--kappa', str(probing_kws['kappa']) ]
366 |     else:
367 |         if param_file is not None:
368 |             command = command + ['--params', param_file]
369 | 
370 |     if viterbi:
371 |         command.append('--viterbi')
372 | 
373 |     if constraint is not None:
374 |         convert_dbn_to_contrafold_input(seq, constraint, fname)
375 |         command.append('--constraints')
376 |     else:
377 |         if probing_signal is None:
378 |             convert_dbn_to_contrafold_input(seq, ''.join(['.' for x in range(len(seq))]), fname)
379 | 
380 |     if DEBUG: print(' '.join(command))
381 | 
382 |     p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE)
383 | 
384 |     stdout, stderr = p.communicate()
385 | 
386 |     if DEBUG:
387 |         print('stdout')
388 |         print(stdout)
389 |         print('stderr')
390 |         print(stderr)
391 |     if p.returncode:
392 |         raise Exception('Contrafold failed: on %s\n%s' % (seq, stderr))
393 | 
394 |     os.remove(fname)
395 |     
396 |     return stdout.decode('utf-8').split('\n')[-2]
397 | 
398 | def mfe_linearfold_(seq, bpps=False, package='contrafold', beam_size=100, return_dG_MFE=False):
399 |     
400 |     seqfile = write([seq])
401 | 
402 |     LOC = package_locs['linearfold']
403 | 
404 |     if bpps:
405 | 
406 |         pf_only = 0
407 |     else:
408 |         pf_only = 1
409 | 
410 |     # args:  beamsize, is_sharpturn, is_verbose, is_eval, is_constraints]
411 |     #Todo: implement constraint input
412 |     command=['echo %s | %s/linearfold_%s' % (seq, LOC, package[0]), str(beam_size), '0', '0', '0']
413 |     if DEBUG: print(' '.join(command))
414 |     p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE, shell=True)
415 | 
416 |     stdout, stderr = p.communicate()
417 | 
418 |     if DEBUG:
419 |         print('stdout')
420 |         print(stdout)
421 |         print('stderr')
422 |         print(stderr)
423 | 
424 |     if p.returncode:
425 |         raise Exception('LinearFold failed: on %s\n%s' % (seq, stderr))
426 | 
427 | 
428 |     # linearfold returns two different things depending on which package
429 |     struct = stdout.decode('utf-8').split('\n')[1].split(' ')[0]
430 | 
431 |     os.remove(seqfile)
432 | 
433 |     if return_dG_MFE:
434 | 
435 |         dG_mfe = float(stdout.decode('utf-8').split('\n')[1].split(' ')[1][1:-1])
436 | 
437 |         if package.lower() != 'vienna':
438 |             dG_mfe *= -1
439 | 
440 |         return struct, dG_mfe
441 | 
442 |     else:
443 |         return struct
444 | 
445 | 
446 | 


--------------------------------------------------------------------------------
/src/arnie/mfe_bootstrap.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | from .mfe import mfe
 4 | from .utils import get_bpp_from_dbn
 5 | from .utils import filename
 6 | from .utils import load_package_locations
 7 | from os import remove
 8 | 
 9 | # load package locations from yaml file, watch! global dict
10 | package_locs = load_package_locations()
11 | 
12 | def get_bootstrap_reac_file(reactivity):
13 |     reac_file = '%s.SHAPE' % filename()
14 |     range_arr = np.arange(1, len(reactivity) + 1)
15 |     reac_arr = np.array(reactivity)
16 |     shape_pos = np.array([range_arr, reac_arr]).T
17 |     sample_idx = np.random.choice(len(reactivity), len(reactivity))
18 |     shape_pos = shape_pos[sample_idx,:]
19 | 
20 |     f = open(reac_file, 'w')
21 |     for cur_sample in shape_pos:
22 |         pos, reactivity = cur_sample
23 |         if reactivity > 0:
24 |             f.write('%d %f\n' % (pos, reactivity))
25 |     f.close()
26 | 
27 |     return reac_file
28 | 
29 | def mfe_bootstrap(seq, num_bootstrap, 
30 |     package='rnastructure', T=37,
31 |     constraint=None, shape_signal=None, dms_signal=None, pseudo=False):
32 |     """
33 |     Compute MFE structure (within package) for RNA sequence with bootstrapping on the SHAPE/DMS data.
34 | 
35 |         Args:
36 |         seq (str): nucleic acid sequence
37 |         T (float): temperature (Celsius)
38 |         constraint (str): structure constraints
39 |         shape_signal(list): list of normalized SHAPE reactivities, with negative values indicating no signal
40 |         dms_signal(list): list of normalized DMS reactivities, with negative values indicating no signal
41 |         pseudo: if True, will predict pseudoknots, but only with RNAstructure
42 | 
43 |         Possible packages: 
44 |         'rnastructure'
45 |         
46 |     Returns
47 |         string: MFE structure
48 |         np array: Base-pair probability matrix from bootstrapping
49 |     """
50 |     if (shape_signal is None) and (dms_signal is None):
51 |         raise ValueError("Bootstrapping only applies if you have reactivity data.")
52 |     if package != 'rnastructure':
53 |         raise ValueError("Bootstrapping only runs for now with RNAstructure")
54 | 
55 |     bpp_matrix = np.zeros((len(seq), len(seq)))
56 | 
57 |     mfe_struct = mfe(seq, package=package, T=T, constraint=constraint, 
58 |         shape_signal=shape_signal, dms_signal=dms_signal, pseudo=pseudo)
59 | 
60 |     for bootstrap in range(num_bootstrap):
61 |         shape_file = None
62 |         dms_file = None
63 |         
64 |         if shape_signal is not None:
65 |             shape_file = get_bootstrap_reac_file(shape_signal)
66 |         if dms_signal is not None:
67 |             dms_file = get_bootstrap_reac_file(dms_signal)
68 | 
69 |         cur_mfe_struct = mfe(seq, package=package, T=T, constraint=constraint, 
70 |             shape_file=shape_file, dms_file=dms_file, pseudo=pseudo)
71 |         bpp_matrix += get_bpp_from_dbn(cur_mfe_struct)
72 | 
73 |         if shape_signal is not None:
74 |             remove(shape_file)
75 |         if dms_signal is not None:
76 |             remove(dms_file)
77 | 
78 |     return [mfe_struct, bpp_matrix/num_bootstrap]
79 | 


--------------------------------------------------------------------------------
/src/arnie/pk_predictors.py:
--------------------------------------------------------------------------------
  1 | import subprocess as sp
  2 | from arnie.utils import *
  3 | import glob
  4 | from os import getcwd, chdir, remove, mkdir, rmdir, path
  5 | from scipy.optimize import linear_sum_assignment
  6 | 
  7 | 
  8 | # TODO script all previous investigations
  9 | # TODO Debug modes to print output and err to help with install issues
 10 | # TODO pk_predict options +
 11 | 
 12 | package_locs = load_package_locations()
 13 | 
 14 | 
 15 | def pk_predict(seq, predictor,
 16 |                model="default", param="parameters_DP03.txt",
 17 |                refinement=1, t1="auto", t2='auto',
 18 |                cpu=32):
 19 |     '''
 20 | 
 21 |     ipknot options:
 22 |         model: one of ["LinearPartition-C","LinearPartition-V","Boltzmann","ViennaRNA","CONTRAfold","NUPACK"]
 23 |         t1: probability threshold level 1 
 24 |         t2: probability threshold level 2
 25 |         refinement: number of times for refinment
 26 | 
 27 |     hotknots options:
 28 |         model: one of ["CC","RE","DP"]
 29 |         param: one of ["parameters_CC06.txt","parameters_CC09.txt","parameters_DP03.txt","parameters_DP09.txt"]
 30 | 
 31 |     spotrna options:
 32 |         cpu: number cpu threads
 33 | 
 34 |     e2efold options:
 35 |         ???
 36 | 
 37 |     nupack options:
 38 |         ????
 39 | 
 40 |     '''
 41 |     if predictor not in ["hotknots", "ipknot", "knotty", "spotrna", "e2efold", "pknots","spotrna2","nupack"]:
 42 |         raise ValueError('Only hotknots,ipknot,knotty,spotrna,spotrna2,e2efold,pknots,nupack implemented.')
 43 |     if predictor == "spotrna":
 44 |         return _run_spotrna(seq, cpu=cpu)[0]
 45 |     elif predictor == "spotrna2":
 46 |         return _run_spotrna2(seq)[0]
 47 |     elif predictor == "e2efold":
 48 |         return _e2efold(seq)
 49 |     elif predictor == "pknots":
 50 |         return _pknots(seq)
 51 |     elif predictor == "knotty":
 52 |         return _knotty_mfe(seq)
 53 |     elif predictor == "hotknots":
 54 |         if model == "default":
 55 |             model = "DP"
 56 |         if model not in ["CC", "RE", "DP"]:
 57 |             raise ValueError('Only CC, RE, DP model implemented for hotknots.')
 58 |         if param not in ["parameters_CC06.txt", "parameters_CC09.txt", "parameters_DP03.txt", "parameters_DP09.txt"]:
 59 |             raise ValueError('Only parameters_CC06.txt, parameters_CC09.txt, parameters_DP03.txt, parameters_DP09.txt parameters implemented for hotknots.')
 60 |         return _run_hotknots(seq, model=model, param=param)[0][0]
 61 |     elif predictor == "ipknot":
 62 |         if model == "default":
 63 |             model = "LinearPartition-C"
 64 |         if model not in ["LinearPartition-C", "LinearPartition-V", "Boltzmann", "ViennaRNA", "CONTRAfold", "NUPACK"]:
 65 |             raise ValueError('Only LinearPartition-C, LinearPartition-V, Boltzmann, ViennaRNA, CONTRAfold, NUPACK model implemented for ipknot.')
 66 |         return _ipknot_mfe(seq, model=model, refinement=refinement, t1=t1, t2=t2)
 67 |     elif predictor == "nupack":
 68 |         return _nupack_mfe_pk(seq)
 69 | 
 70 | 
 71 | def pk_predict_from_bpp(bpp, heuristic="hungarian", theta=None, allowed_buldge_len=0, min_len_helix=2,
 72 |                         exp=1, sigmoid_slope_factor=None, prob_to_0_threshold_prior=0, prob_to_1_threshold_prior=1, ln=False, add_p_unpaired=True,
 73 |                         max_iter=1):
 74 |     '''
 75 |     threshknot options:
 76 |         theta
 77 |         max_iter
 78 |         allowed_buldge_len
 79 |         min_len_helix
 80 | 
 81 |     hungarian options:
 82 |         add_p_unpaired
 83 |         theta (aka prob_to_0_threshold_post)
 84 |         prob_to_0_threshold_prior
 85 |         prob_to_1_threshold_prior
 86 |         exp
 87 |         sigmoid_slope_factor
 88 |         ln
 89 |         allowed_buldge_len
 90 |         min_len_helix
 91 |     '''
 92 | 
 93 |     if heuristic not in ["threshknot", "hungarian"]:
 94 |         raise ValueError('Only threshknot and hunagrian heuristics implemented.')
 95 | 
 96 |     if heuristic == "threshknot":
 97 |         if theta is None:
 98 |             theta = 0.3
 99 |         return _threshknot(bpp, theta=theta, max_iter=max_iter, allowed_buldge_len=allowed_buldge_len, min_len_helix=min_len_helix)[0]
100 |     elif heuristic == "hungarian":
101 |         if theta is None:
102 |             theta = 0.0
103 |         return _hungarian(bpp, exp=1, sigmoid_slope_factor=sigmoid_slope_factor, prob_to_0_threshold_prior=prob_to_0_threshold_prior,
104 |                           prob_to_1_threshold_prior=prob_to_1_threshold_prior, theta=theta, ln=ln, add_p_unpaired=add_p_unpaired,
105 |                           allowed_buldge_len=allowed_buldge_len, min_len_helix=min_len_helix)[0]
106 | 
107 | 
108 | def _hungarian(bpp, exp=1, sigmoid_slope_factor=None, prob_to_0_threshold_prior=0,
109 |                prob_to_1_threshold_prior=1, theta=0, ln=False, add_p_unpaired=True,
110 |                allowed_buldge_len=0, min_len_helix=2):
111 | 
112 |     bpp_orig = bpp.copy()
113 | 
114 |     if add_p_unpaired:
115 |         p_unpaired = 1 - np.clip(np.sum(bpp, axis=0), 0, 1)
116 |         for i, punp in enumerate(p_unpaired):
117 |             bpp[i, i] = punp
118 | 
119 |     # apply prob_to_0 threshold and prob_to_1 threshold
120 |     bpp = np.where(bpp < prob_to_0_threshold_prior, 0, bpp)
121 |     bpp = np.where(bpp > prob_to_1_threshold_prior, 1, bpp)
122 | 
123 |     # aply exponential. On second thought this is likely not as helpful as sigmoid since
124 |     # * for 0 < exp < 1 lower probs will increase more than higher ones (seems undesirable)
125 |     # * for exp > 1 all probs will decrease, which seems undesirable (but at least lower probs decrease more than higher ones)
126 |     bpp = np.power(bpp, exp)
127 | 
128 |     # apply log which follows botlzamann where -ln(P) porportional to Energy
129 |     if ln:
130 |         bpp = np.log(bpp)
131 | 
132 |     bpp = np.where(np.isneginf(bpp), -1e10, bpp)
133 |     bpp = np.where(np.isposinf(bpp), 1e10, bpp)
134 | 
135 |     # apply sigmoid modified by slope factor
136 |     if sigmoid_slope_factor is not None and np.any(bpp):
137 |         bpp = _sigmoid(bpp, slope_factor=sigmoid_slope_factor)
138 | 
139 |         # should think about order of above functions and possibly normalize again here
140 | 
141 |         # run hungarian algorithm to find base pairs
142 |     _, row_pairs = linear_sum_assignment(-bpp)
143 |     # Hungarian/linear sum assignment operates on a bipartite graph such that each row is assigned to
144 |     # exactly one column and each column is assigned to exactly one row, however our case is not
145 |     # bipartite. That means some chosen assignments could conflict with others, either creating
146 |     # a "chain" (eg [(0,5), (5,10)]) or cycle (eg [(0,5), (5,10), (10, 0)]). We resolve these
147 |     # conflicts by solving for the maximum weight independent set. (Note that if we have
148 |     # two assignments like [(0,5) and (5,0)] we only need to deduplicate, hence the usage of set).
149 |     bp_assignments = set(
150 |         tuple(sorted((col, row)))
151 |         for col, row in enumerate(row_pairs)
152 |         if bpp_orig[col, row] > theta and col != row
153 |     )
154 |     bp_list = []
155 |     while len(bp_assignments):
156 |         bps = [bp_assignments.pop()]
157 |         
158 |         # # Start building a chain to the "left"
159 |         check_nt = bps[0][0]
160 |         while conflict := next((bp for bp in bp_assignments if check_nt in bp), None):
161 |             bps.insert(0, conflict)
162 |             bp_assignments.remove(conflict)
163 |             check_nt = next((nt for nt in conflict if nt != check_nt), None)
164 |         # And to the "right"
165 |         check_nt = bps[-1][1]
166 |         while conflict := next((bp for bp in bp_assignments if check_nt in bp), None):
167 |             bps.append(conflict)
168 |             bp_assignments.remove(conflict)
169 |             check_nt = next((nt for nt in conflict if nt != check_nt), None)
170 |         
171 |         if len(bps) == 1:
172 |             bp_list.extend(bps)
173 |         elif len(bps) > 2 and (bps[0][0] in bps[-1] or bps[0][1] in bps[-1]):
174 |             # We have a cycle. We need to try both excluding the first element and excluding
175 |             # the last element (only one or the other, or neither, can be present since they conflict)
176 |             (bp_list_a,prob_a) = _max_weight_independent_set(bps[1:], bpp_orig)
177 |             (bp_list_b,prob_b) = _max_weight_independent_set(bps[:-1], bpp_orig)
178 |             if prob_a > prob_b:
179 |                 bp_list.extend(bp_list_a)
180 |             else:
181 |                 bp_list.extend(bp_list_b)
182 |         else:
183 |             (bp_list_,_) = _max_weight_independent_set(bps, bpp_orig)
184 |             bp_list.extend(bp_list_)
185 | 
186 |     bp_list = [list(bp) for bp in bp_list]
187 |     bp_list = _check_bp_list(bp_list)
188 |     structure = convert_bp_list_to_dotbracket(bp_list, bpp.shape[0])
189 |     structure = post_process_struct(structure, allowed_buldge_len, min_len_helix)
190 |     bp_list = convert_dotbracket_to_bp_list(structure, allow_pseudoknots=True)
191 | 
192 |     return structure, bp_list
193 | 
194 | def _max_weight_independent_set(pairs, probs):
195 |     max_sets = []
196 |     for bp in pairs:
197 |         bp_prob = probs[bp[0], bp[1]]
198 | 
199 |         if len(max_sets) == 0:
200 |             max_sets.append({'prob': bp_prob, 'bps': [bp]})
201 |         elif len(max_sets) == 1:
202 |             if max_sets[0]['prob'] > bp_prob:
203 |                 max_sets.append(max_sets[0])
204 |             elif bp_prob > max_sets[0]['prob']:
205 |                 max_sets.append({'prob': bp_prob, 'bps': [bp]})
206 |             elif abs(max_sets[0]['bps'][0][0] - max_sets[0]['bps'][0][1]) <= abs(bp[0] - bp[1]):
207 |                 max_sets.append(max_sets[0])
208 |             else:
209 |                 max_sets.append({'prob': bp_prob, 'bps': [bp]})
210 |         else:
211 |             if max_sets[-1]['prob'] > max_sets[-2]['prob'] + bp_prob:
212 |                 max_sets.append(max_sets[-1])
213 |             elif max_sets[-2]['prob'] + bp_prob > max_sets[-1]['prob']:
214 |                 max_sets.append({'prob': max_sets[-2]['prob'] + bp_prob, 'bps': [*max_sets[-2]['bps'], bp]})
215 |             elif abs(max_sets[-1]['bps'][0][0] - max_sets[-1]['bps'][0][1]) <= abs(bp[0] - bp[1]):
216 |                 max_sets.append(max_sets[-1])
217 |             else:
218 |                 max_sets.append({'prob': max_sets[-2]['prob'] + bp_prob, 'bps': [*max_sets[-2]['bps'], bp]})
219 | 
220 |     return (max_sets[-1]['bps'], max_sets[-1]['prob'])
221 | 
222 | def _sigmoid(x, slope_factor=0.5):
223 |     # normalize to [-1, 1]
224 |     numerator = (x - x.min()) * 2.0
225 |     denominator = x.max() - x.min()
226 |     #print(numerator, denominator)
227 |     x = numerator / (denominator + 1e-6) - 1.0
228 |     return 1 / (1 + np.exp(-x / slope_factor))
229 | 
230 | 
231 | def _threshknot(bpp, theta=0.3, max_iter=1, allowed_buldge_len=0, min_len_helix=2):
232 |     iteration = 0
233 |     length = bpp.shape[0]
234 |     bp_list = []
235 |     new_bp = 1
236 |     while new_bp != 0 and iteration <= max_iter:
237 |         current_bp_list = []
238 |         bp_list_flat = np.array(bp_list).flatten()
239 |         if np.any(bp_list_flat):
240 |             bpp_update = np.delete(bpp, bp_list_flat, axis=1)
241 |             if np.any(bpp_update):
242 |                 Pmax = np.amax(bpp_update, axis=1)
243 |         else:
244 |             Pmax = np.amax(bpp, axis=1)
245 |         for i in range(length):
246 |             for j in range(i + 1, length):
247 |                 if i not in bp_list_flat and j not in bp_list_flat:
248 |                     prob = bpp[i, j]
249 |                     if prob == Pmax[i] and prob == Pmax[j] and prob > theta:
250 |                         current_bp_list.append([i, j])
251 |         new_bp = len(current_bp_list)
252 |         iteration += 1
253 |         if new_bp != 0 and iteration > max_iter:
254 |             print("Reached max iteration, stopping before converged.")
255 |         else:
256 |             bp_list.extend(current_bp_list)
257 | 
258 |     bp_list = _check_bp_list(bp_list)
259 |     structure = convert_bp_list_to_dotbracket(bp_list, length)
260 |     structure = post_process_struct(structure, allowed_buldge_len, min_len_helix)
261 |     bp_list = convert_dotbracket_to_bp_list(structure, allow_pseudoknots=True)
262 |     return structure, bp_list
263 | 
264 | 
265 | def _check_bp_list(bp_list):
266 |     for bp in bp_list:
267 |         bp.sort()
268 |     bp_list.sort(key=lambda x: x[0])
269 |     nts = [nt for bp in bp_list for nt in bp]
270 |     if len(nts) > len(set(nts)):
271 |         print("WARNING some nucletotides found in more than 1 bp")
272 |         for i, bpA in enumerate(bp_list):
273 |             for bpB in bp_list[i + 1:]:
274 |                 if bpA[0] == bpB[0] and bpA[1] == bpB[1]:
275 |                     print("removing repeat bp", bpA)
276 |                     bp_list = bp_list[:i] + bp_list[i + 1:]
277 |                 elif bpA[0] in bpB:
278 |                     if abs(bpA[0] - bpA[1]) <= abs(bpB[0] - bpB[1]):
279 |                         to_remove = bpB
280 |                     else:
281 |                         to_remove = bpA
282 |                     print("WARNING base", bpA[0], "is in 2 basepairs", bpA, bpB, "THIS SHOULD BE FIXED. Removing", to_remove)
283 |                     bp_list.remove(to_remove)
284 |                 elif bpA[1] in bpB:
285 |                     if abs(bpA[0] - bpA[1]) <= abs(bpB[0] - bpB[1]):
286 |                         to_remove = bpB
287 |                     else:
288 |                         to_remove = bpA
289 |                     print("WARNING base", bpA[1], "is in 2 basepairs", bpA, bpB, "THIS SHOULD BE FIXED. Removing", to_remove)
290 |                     bp_list.remove(to_remove)
291 |     return bp_list
292 | 
293 | 
294 | def _run_hotknots(seq, model="DP", param="parameters_DP03.txt"):
295 |     hotknot_location = package_locs["hotknots"]
296 |     cur_dir = getcwd()
297 |     chdir(hotknot_location)
298 |     command = [f"{hotknot_location}/HotKnots", "-noPS", "-s", seq, "-m", model, "-p", f"{hotknot_location}/params/{param}"]
299 |     p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE)
300 |     out, err = p.communicate()
301 |     if p.returncode:
302 |         print('ERROR: hotknots failed: on %s\n%s\n%s' % (seq, out.decode(), err.decode()))
303 |         return ["x"*len(seq)]
304 |     output = out.decode().split("\n")[2:-1]
305 |     structs = []
306 |     for struct in output:
307 |         x = struct.split('\t')
308 |         x2 = [x[0].split(" ")[-1], x[1]]
309 |         structs.append(x2)
310 |     chdir(cur_dir)
311 |     return structs
312 | 
313 | 
314 | def _ipknot_mfe(seq, model="LinearPartition-C", refinement=1, t1="auto", t2="auto"):
315 |     """
316 |     TODO
317 |       -g, --gamma G             The weight for true base-pairs equivalent to 
318 |                                 '-t 1/(gamma+1)'
319 |       -i, --allow-isolated      Allow isolated base-pairs
320 |       -P, --param FILE          Read the energy parameter file for Vienna RNA 
321 |                                 package
322 |       -x, --aux                 Import an auxiliary file for base-pairing 
323 |                                 probabilities
324 |       -u, --no-levelwise        Do not perform the levelwise prediction
325 |       -E, --energy              Output with the free energy
326 |     """
327 |     ipknot_location = package_locs["ipknot"]
328 |     out_folder = get_random_folder()
329 |     mkdir(out_folder)
330 |     fasta_file = f"{out_folder}/temp.fasta"
331 |     f = open(fasta_file, "w")
332 |     f.write(">seq \n")
333 |     f.write(seq)
334 |     f.close()
335 |     command = [f"{ipknot_location}/ipknot", fasta_file, "--model", model, "-r", str(refinement), "-t", str(t1), "-t", str(t2)]
336 |     p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE)
337 |     out, err = p.communicate()
338 |     if p.returncode:
339 |         print('ERROR: ipknot failed: on %s\n%s\n%s' % (seq, out.decode(), err.decode()))
340 |         remove(fasta_file)
341 |         rmdir(out_folder)
342 |         return "x"*len(seq)
343 |     output = out.decode().split("\n")
344 |     remove(fasta_file)
345 |     rmdir(out_folder)
346 |     return output[2]
347 | 
348 | 
349 | def _knotty_mfe(seq):
350 |     knotty_location = package_locs["knotty"]
351 |     command = [f"{knotty_location}/knotty", seq]
352 |     p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE,universal_newlines=True)
353 |     try:
354 |         out, err = p.communicate()
355 |     except:
356 |         print("ERROR knotty, could not communicate")
357 |         return "x"*len(seq)
358 |     if p.returncode:
359 |         print('ERROR: knotty failed: on %s\n%s\n%s' % (seq, out, err))
360 |         return "x"*len(seq)
361 |     output = out.split("\n")
362 |     struct = output[1].split(" ")[1]
363 |     bp_list = convert_dotbracket_to_bp_list(struct, allow_pseudoknots=True)
364 |     struct = convert_bp_list_to_dotbracket(bp_list, seq_len=len(struct))
365 |     return struct
366 | 
367 | 
368 | def _run_spotrna(seq, cpu=32):
369 |     '''
370 |     SPOT-RNA
371 |     '''
372 |     spotrna_location = package_locs["spotrna"]
373 |     spotrna_conda_env = package_locs["spotrna_conda_env"]
374 |     out_folder = get_random_folder()
375 |     mkdir(out_folder)
376 |     input_id = local_rand_filename()
377 |     fasta_file = f"{out_folder}/{input_id}.fasta"
378 |     f = open(fasta_file, "w")
379 |     f.write(">seq\n")
380 |     f.write(seq)
381 |     f.close()
382 |     command = [f"{spotrna_conda_env}/python3", f"{spotrna_location}/SPOT-RNA.py", "--inputs", fasta_file, "--outputs", out_folder, "--cpu", str(cpu)]
383 |     # keep running until output file exists
384 |     while not path.exists(out_folder + "/seq.bpseq"):
385 |         p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE)
386 |         out, err = p.communicate()
387 |         # print(seq, out.decode(),err.decode())
388 |         if p.returncode:
389 |             print('ERROR: spotrna failed: on %s\n%s\n%s' % (seq, out.decode(), err.decode()))
390 |             return "x"*len(seq)
391 |     bp_list = bpseq_to_bp_list(out_folder + "/seq.bpseq")
392 |     struct = convert_bp_list_to_dotbracket(bp_list, len(seq))
393 |     bpp = prob_to_bpp(out_folder + "/seq.prob")
394 |     remove(out_folder + "/seq.bpseq")
395 |     remove(out_folder + "/seq.prob")
396 |     remove(out_folder + "/seq.ct")
397 |     remove(fasta_file)
398 |     rmdir(out_folder)
399 |     return struct, bpp
400 | 
401 | def _run_spotrna2(seq):
402 |     # TODO 
403 |     spotrna2_location = package_locs["spotrna2"]
404 |     out_folder = get_random_folder()
405 |     mkdir(out_folder)
406 |     fasta_file = f"{out_folder}/temp.fasta"
407 |     f = open(fasta_file, "w")
408 |     f.write(">seq\n")
409 |     f.write(seq)
410 |     f.close()
411 |     command = [f"{spotrna2_location}/run_spotrna2.sh", fasta_file]
412 |     p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE)
413 |     out, err = p.communicate()
414 |     if p.returncode:
415 |         print('ERROR: spotrna2 failed: on %s\n%s\n%s' % (seq, out.decode(), err.decode()))
416 |         return "x"*len(seq)
417 |     bp_list = bpseq_to_bp_list(f"{out_folder}/temp_outputs/temp.bpseq")
418 |     struct = convert_bp_list_to_dotbracket(bp_list, len(seq))
419 |     bpp = prob_to_bpp(f"{out_folder}/temp_outputs/temp.prob")
420 |     for f in os.listdir(f"{out_folder}/temp_outputs"):
421 |         remove(f)
422 |     rmdir(f"{out_folder}/temp_outputs")
423 |     for f in os.listdir(f"{out_folder}/temp_features"):
424 |         remove(f)
425 |     rmdir(f"{out_folder}/temp_features")
426 |     remove(fasta_file)
427 |     rmdir(out_folder)
428 |     return struct, bpp
429 | 
430 | def _e2efold(seq):
431 |     # only if <600
432 |     # TODO probably plenty of options
433 |     e2efold_location = package_locs["e2efold"]
434 |     e2efold_conda_env = package_locs["e2efold_conda_env"]
435 |     out_folder = get_random_folder()
436 |     mkdir(out_folder)
437 |     with open(f'{out_folder}/config.json', 'w') as f:
438 |         f.write('\n'.join(['{',
439 |                            '        "exp_name": "performance on short sequences (50-600)",',
440 |                            f'        "test_folder": "{out_folder}/short_seqs",',
441 |                            f'        "save_folder": "{out_folder}/short_cts",',
442 |                            '        "gpu": "0",',
443 |                            '        "u_net_d": 10,',
444 |                            '        "BATCH_SIZE": 8,',
445 |                            '        "batch_size_stage_1": 20,',
446 |                            '        "batch_size_stage_2": 16,',
447 |                            '        "OUT_STEP": 100,',
448 |                            '        "LOAD_MODEL": true,',
449 |                            '        "pp_steps": 20,',
450 |                            '        "pp_loss": "f1",',
451 |                            '        "pp_model": "mixed",',
452 |                            '        "rho_per_position": "matrix",',
453 |                            '        "data_type": "rnastralign_all_600",',
454 |                            '        "model_type": "att_simple_fix",',
455 |                            '        "epoches_first": 50,',
456 |                            '        "epoches_second": 10,',
457 |                            '        "epoches_third": 10,',
458 |                            '        "evaluate_epi": 1,',
459 |                            '        "evaluate_epi_stage_1": 5,',
460 |                            '        "step_gamma": 1,',
461 |                            '        "k": 1,',
462 |                            '        "test": {',
463 |                            '                "f1": true,',
464 |                            '                "accuracy": false,',
465 |                            '                "energy": false',
466 |                            '        }',
467 |                            '}']))
468 |     mkdir(f'{out_folder}/short_seqs')
469 |     mkdir(f'{out_folder}/short_cts')
470 |     command = [f"{e2efold_conda_env}/python", f"{e2efold_location}/e2efold_productive_short.py", "-c", f"{out_folder}/config.json"]
471 |     fasta_file = f"{out_folder}/short_seqs/temp.seq"
472 |     f = open(fasta_file, "w")
473 |     f.write(seq)
474 |     f.close()
475 |     # keep running until output file exists
476 |     while not path.exists(f"{out_folder}/short_cts/temp.seq.ct"):
477 |         out, err = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE).communicate()
478 |     bp_list = ct_to_bp_list(f"{out_folder}/short_cts/temp.seq.ct", 1)
479 |     struct = convert_bp_list_to_dotbracket(bp_list, len(seq))
480 |     remove(fasta_file)
481 |     remove(f"{out_folder}/short_cts/temp.seq.ct")
482 |     remove(f"{out_folder}/config.json")
483 |     rmdir(f'{out_folder}/short_seqs')
484 |     rmdir(f'{out_folder}/short_cts')
485 |     rmdir(out_folder)
486 |     return struct
487 | 
488 | 
489 | def _pknots(seq):
490 |     ''' TODO
491 |       -a          : pseudoknot approx, exclude V7-V10 and WB9-WB1
492 |       -c          : add L^5 coaxials (V6)
493 |       -s          : shuffle sequences
494 |     '''
495 |     pknots_location = package_locs["pknots"]
496 |     out_folder = get_random_folder()
497 |     mkdir(out_folder)
498 |     fasta_file = f"{out_folder}/temp.fasta"
499 |     f = open(fasta_file, "w")
500 |     f.write(">seq \n")
501 |     f.write(seq)
502 |     f.close()
503 |     outfile = f"{out_folder}/out.out"
504 |     command = [pknots_location + "/pknots", "-k", "-g", fasta_file, outfile]
505 |     p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE)
506 |     out, err = p.communicate()
507 |     remove(fasta_file)
508 |     if p.returncode:
509 |         print('ERROR: PKNOTS failed: on %s\n%s\n%s' % (seq, out.decode(), err.decode()))
510 |         return "x"*len(seq)
511 |     bp_list = ct_to_bp_list(outfile, 4)
512 |     remove(outfile)
513 |     rmdir(out_folder)
514 |     struct = convert_bp_list_to_dotbracket(bp_list, len(seq))
515 |     return struct
516 | 
517 | 
518 | def _nupack_mfe_pk(seq):
519 |     # TODO many nupack options... also why is this not implemented in mfe?
520 |     nupack_location = package_locs['nupack']
521 |     out_folder = get_random_folder()
522 |     mkdir(out_folder)
523 |     fasta_file = f"{out_folder}/temp"
524 |     f = open(f'{fasta_file}.in','w')
525 |     f.write(seq)
526 |     f.close()
527 |     struct = None
528 |     command = [nupack_location+'/mfe', "-pseudo", fasta_file]
529 |     p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE)
530 |     out,err = p.communicate()
531 |     if p.returncode:
532 |         print(f'ERROR: nupack mfe pk failed on {seq} {fasta_file} {out.decode} {err.decode}')
533 |         return 'x'*len(seq)
534 |     f = open(f'{fasta_file}.mfe')
535 |     struct = f.readlines()[16][:-1]
536 |     f.close()
537 |     remove(f'{fasta_file}.in')
538 |     remove(f'{fasta_file}.mfe')
539 |     rmdir(out_folder)
540 |     return struct
541 | 


--------------------------------------------------------------------------------
/src/arnie/sample_structures.py:
--------------------------------------------------------------------------------
  1 | import os, re, sys, shutil
  2 | import subprocess as sp
  3 | import random, string
  4 | import numpy as np
  5 | from .utils import *
  6 | 
  7 | DEBUG=False
  8 | 
  9 | # load package locations from yaml file, watch! global dict
 10 | package_locs = load_package_locations()
 11 | 
 12 | def sample_structures(seq, n_samples = 10, package='vienna_2', T=37, constraint=None, param_file=None,
 13 | 	dangles=True, reweight=None, nonredundant=False):
 14 |     ''' Draw stochastic sampled structures for RNA sequence. Possible packages: 'eternafold', 'vienna_2'
 15 | 
 16 |         Args:
 17 |         seq (str): nucleic acid sequence
 18 |         T (float): temperature (Celsius)
 19 |         constraint (str): structure constraints
 20 |         motif (str): argument to vienna motif 
 21 |         dangles (bool): dangles or not, specifiable for vienna, nupack
 22 |         noncanonical(bool): include noncanonical pairs or not (for contrafold, RNAstructure (Cyclefold))
 23 |         
 24 |     Returns
 25 |         list of structures
 26 |         list of energies
 27 |         list of probabilities 
 28 |     '''
 29 | 
 30 |     try:
 31 |         pkg, version = package.lower().split('_')
 32 |     except:
 33 |         pkg, version = package.lower(), None
 34 | 
 35 |     if not dangles and pkg not in ['vienna','nupack']:
 36 |         print('Warning: %s does not support dangles options' % pkg)
 37 | 
 38 |     if pkg=='vienna':
 39 |         struct_list = sample_vienna_(seq, n_samples=n_samples, version=version, T=T, 
 40 |         	dangles=dangles, constraint=constraint, reweight=reweight, nonredundant = nonredundant)
 41 | 
 42 |     elif pkg=='eternafold':
 43 |         struct_list = sample_eternafold_(seq, n_samples=n_samples, param_file=param_file, constraint=constraint, nonredundant = nonredundant)
 44 | 
 45 |     else:
 46 |         raise ValueError('package %s either not understood or not supported at this moment.' % package)
 47 | 
 48 |     return struct_list
 49 | 
 50 | def sample_vienna_(seq, n_samples=10, T=37, version='2', constraint=None, 
 51 | 	dangles=True, reweight=None, nonredundant=False):
 52 |     """Stochastically sample structures from Vienna RNAsubopt.
 53 | 
 54 |     Inputs:
 55 |         seq (str): nucleic acid sequence
 56 |         n_samples (int): number of structures to sample.
 57 |         T (float): temperature
 58 |         constraint (str): structure constraints
 59 |         motif (str): argument to vienna motif  
 60 |     Outputs:
 61 |         struct_list (list): list of stochastically-sampled structures.
 62 |     """
 63 | 
 64 |     if not version:
 65 |         version='2'
 66 | 
 67 |     if version.startswith('2'):
 68 |         LOC=package_locs['vienna_2']
 69 |     elif version.startswith('1'):
 70 |         LOC=package_locs['vienna_1']
 71 |     else:
 72 |         raise RuntimeError('Error, vienna version %s not present' % version)
 73 | 
 74 |     command = ['%s/RNAsubopt' % LOC, '-T', str(T), '--stochBT_en=%d' % n_samples]#, '-N']
 75 | 
 76 |     if constraint is not None:
 77 |         fname = write([seq, constraint])
 78 |         command.append('-C')
 79 |         #command.append('--enforceConstraint')
 80 |     else:
 81 |         fname = write([seq])
 82 | 
 83 |     if not dangles:
 84 |         command.append('--dangles=0')
 85 | 
 86 |     if nonredundant:
 87 |     	command.append('-N')
 88 | 
 89 |     if reweight is not None:
 90 |         command.append('--commands=%s' % reweight)
 91 | 
 92 |     with open(fname) as f:
 93 |         if DEBUG: print(fname)
 94 |         if DEBUG: print(' '.join(command))
 95 |         p = sp.Popen(command, stdin=f, stdout=sp.PIPE, stderr=sp.PIPE)
 96 |     stdout, stderr = p.communicate()
 97 | 
 98 |     if DEBUG:
 99 |         print('stdout')
100 |         print(stdout)
101 |         print('stderr')
102 |         print(stderr)
103 | 
104 |     if p.returncode:
105 |         raise Exception('RNAsubopt failed: on %s\n%s' % (seq, stderr))
106 |     os.remove(fname)
107 | 
108 |     if 'omitting constraint' in stderr.decode('utf-8'):
109 |         raise RuntimeError("Constraint omitted, Impossible structure")
110 | 
111 |     else:
112 |         struct_list, prob_list, energy_list = [],[],[]
113 |         output_lines = stdout.decode('utf-8').split('\n')[1:-1] # first line is just repeating sequence, last is empty space
114 |         for line in output_lines:
115 |             struct_list.append(line.split(' ')[0])
116 |             # prob_list.append(float(line.split(' ')[-2]))
117 |             # energy_list.append(float(line.split(' ')[-1]))
118 | 
119 |     return struct_list
120 | 
121 | def sample_eternafold_(seq, n_samples=10, param_file=None, constraint=None, nonredundant=False):
122 |     """Stochastically sample structures from EternaFold.
123 | 
124 |     Inputs:
125 |         seq (str): nucleic acid sequence
126 |         n_samples (int): number of structures to sample.
127 |         T (float): temperature
128 |         constraint (str): structure constraints
129 |         motif (str): argument to vienna motif  
130 |     Outputs:
131 |         struct_list (list): list of stochastically-sampled structures.
132 |     """
133 | 
134 |     fname = '%s.in' % filename()
135 |     LOC=package_locs['eternafold']
136 | 
137 | 
138 |     command = ['%s/contrafold' % LOC, 'sample', fname]
139 | 
140 |     if param_file is not None:
141 |         command = command + ['--params', param_file]
142 |     else:
143 |         command = command + ['--params', package_locs['eternafoldparams']]
144 | 
145 |     if constraint is not None:
146 |         convert_dbn_to_contrafold_input(seq, constraint, fname)
147 |         command.append('--constraints')
148 |     else:
149 |         convert_dbn_to_contrafold_input(seq, ''.join(['.' for x in range(len(seq))]), fname)
150 | 
151 |     if DEBUG: print(' '.join(command))
152 | 
153 |     p = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE)
154 | 
155 |     stdout, stderr = p.communicate()
156 | 
157 |     struct_list = stdout.decode('utf-8').split('\n')[:-1]
158 | 
159 |     if DEBUG:
160 |         print('stdout')
161 |         print(stdout)
162 |         print('stderr')
163 |         print(stderr)
164 |     if p.returncode:
165 |         raise Exception('Eternafold sample failed: on %s\n%s' % (seq, stderr))
166 | 
167 |     os.remove(fname)
168 |     return struct_list
169 | 


--------------------------------------------------------------------------------
/src/arnie/viz.py:
--------------------------------------------------------------------------------
  1 | ###
  2 | # File is generally for functions that relate to various ways to visualize RNA sequences and experimental data.
  3 | ###
  4 | 
  5 | import matplotlib.pyplot as plt
  6 | import matplotlib.patches as patches
  7 | 
  8 | def plot_structure_heatmap(data):
  9 |   # Setup constants
 10 |   dpi = 100.0
 11 |   font_size = 12.0
 12 |   padded_size = 18.0
 13 |   scaling_ratio = padded_size/dpi
 14 | 
 15 |   # Set default text to monospace to align sequence and structures
 16 |   plt.rcParams["font.family"] = "monospace"
 17 |   
 18 |   # Figure Height = sequence + data (experimental, predicted) + predicted structures + (opt) ROI structure
 19 |   rows = (1+len(data["reactivity"])+len(data["predictions"]))
 20 |   if "control_structure" in data.keys():
 21 |     rows += 1
 22 |   fig_height = rows * scaling_ratio
 23 | 
 24 |   # Figure Width determined by sequence length
 25 |   fig_width = len(data["sequence"]) * scaling_ratio
 26 | 
 27 |   # Create figure
 28 |   fig, axs = plt.subplots(
 29 |     nrows=rows,
 30 |     sharex=True,
 31 |     figsize=(fig_width, fig_height),
 32 |   )
 33 | 
 34 |   # SETUP COMMON TO ALL AXES
 35 |   # Normalize for data range from -2 to 2
 36 |   norm=plt.Normalize(-2,2)
 37 |   # Turn off the axis bounding box
 38 |   [ax.spines[:].set_visible(False) for ax in axs]
 39 |   # Set all axes x_limits from 0 to the length of the sequence
 40 |   [ax.set_xlim([0, len(data["sequence"])]) for ax in axs]
 41 |   # Hide the tick marks on the x axis for all axes
 42 |   [ax.tick_params(axis='x',length=0) for ax in axs]
 43 |   # Hide the tick marks on the y axis for all axes
 44 |   [ax.set_yticks([]) for ax in axs]
 45 |   [ax.tick_params(axis='y',length=0,pad=6) for ax in axs]
 46 |   # Plot blank image data for all axes (simplifies alignment of text in sequence and prediction plotting)
 47 |   [ax.imshow([[0]*(len(data["sequence"])+1)], cmap="bwr",norm=norm) for ax in axs]
 48 |   # Collapse sub-plot spacing
 49 |   fig.subplots_adjust(hspace=0)
 50 |   
 51 |   # Set the plot title
 52 |   axs[0].set_title(data["title"], fontweight='bold')
 53 | 
 54 |   ################################################
 55 |   # PLOTTING
 56 |   ################################################
 57 |   ax_index = 0
 58 |   
 59 |   # Plot sequence text
 60 |   axs[0].set_yticks([0],labels=["sequence"])
 61 |   for (i, char) in enumerate(data["sequence"]):    
 62 |     if (i>=0):
 63 |       axs[0].text(i,0,char,fontfamily='monospace', ha="center", va="center")
 64 | 
 65 |   # Create the reactivity data heatmap
 66 |   for (j, data_label) in enumerate(data["reactivity"]):
 67 |     ax_index += 1 
 68 |     axs[ax_index].set_yticks([0],labels=[data_label])
 69 |     reactivity = data["reactivity"][data_label]["data"]
 70 |     BLANK_OUT5 = data["reactivity"][data_label]["BLANK_OUT5"]
 71 |     BLANK_OUT3 = data["reactivity"][data_label]["BLANK_OUT3"]
 72 | 
 73 |     # Reactivity needs to be a list of numbers
 74 |     if type(reactivity) != list or type(reactivity[0]) != float:
 75 |       print("WARNING: reactivity data in unexpected format")
 76 |         
 77 |     # If the data has blank out regions, add them.
 78 |     display_data = [-1.0] * BLANK_OUT5 + reactivity + [-1.0] * BLANK_OUT3
 79 | 
 80 |     # Plot the heatmap and the blank out regions
 81 |     pos = axs[ax_index].imshow([display_data], cmap="bwr", norm=norm)
 82 |     blank5 = patches.Rectangle((-0.5,-0.5),BLANK_OUT5,1,color="gray")
 83 |     axs[ax_index].add_patch(blank5)
 84 |     blank3 = patches.Rectangle((len(display_data)-BLANK_OUT3-0.5,-0.5),BLANK_OUT3,1,color="gray")
 85 |     axs[ax_index].add_patch(blank3)
 86 |       
 87 |   plt.colorbar(pos, ax=axs)
 88 | 
 89 |   # Plot control structure (if provided)
 90 |   if "control_structure" in data.keys():
 91 |     if type(data["control_structure"]["start_index"]) != float and type(data["control_structure"]["structure"]) != str:
 92 |       return
 93 |     ax_index += 1
 94 |     axs[ax_index].set_yticks([0],labels=["Control Structure"])
 95 |     start_index = data["control_structure"]["start_index"]
 96 |     for (i, char) in enumerate(data["control_structure"]["structure"]):    
 97 |       if (i>=0):
 98 |           axs[ax_index].text(i+start_index,0,char,fontfamily='monospace', ha="center", va="center")
 99 |             
100 |   # Plot predictions text
101 |   for (j, predictor_name) in enumerate(data["predictions"]):
102 |     ax_index += 1
103 |     axs[ax_index].set_yticks([0],labels=[predictor_name])
104 |     for (i, char) in enumerate(data["predictions"][predictor_name]):    
105 |       if (i>=0):
106 |         axs[ax_index].text(i,0,char,fontfamily='monospace', ha="center", va="center")
107 |     if ax_index == rows-1:
108 |       axs[ax_index].tick_params(axis='x',length=4,direction='out')
109 |           
110 |   return fig


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DasLab/arnie/660de8139bd2198bbe115adadd5bc5f12183f9f4/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_bpps.py:
--------------------------------------------------------------------------------
 1 | from arnie.bpps import bpps
 2 | from arnie.utils import load_package_locations
 3 | 
 4 | sample_seq = 'CGCUGUCUGUACUUGUAUCAGUACACUGACGAGUCCCUAAAGGACGAAACAGCG'
 5 | 
 6 | 
 7 | def test_bpps(pkg):
 8 |     p = bpps(sample_seq, package=pkg)
 9 |     print('test bpps %s' % pkg)
10 |     print(p[0])
11 | 
12 | 
13 | if __name__ == '__main__':
14 |     print("Test: printing first row of bpp matrices")
15 |     package_locs = load_package_locations()
16 |     PK_packages = ['hotknots', 'ipknot', 'knotty', 'pknots',
17 |                    'spotrna', 'spotrna_conda_env', 'e2efold',
18 |                    'e2efold_conda_env', 'spotrna2']
19 |     for pkg in sorted(package_locs.keys()):
20 |         if pkg == 'TMP' or pkg.startswith('linear') or pkg in PK_packages:
21 |             continue
22 | 
23 |         test_bpps(pkg)
24 | 
25 | 


--------------------------------------------------------------------------------
/tests/test_evaluation_metrics.py:
--------------------------------------------------------------------------------
 1 | from arnie.utils import *
 2 | 
 3 | samiv_struct = "((((....(.((((((....((.[[[[[)).)))))))(((..((((((..{{{{)).)))).)))]]]]]....))))..((((.(((((.......))))).))))....}}}}..."
 4 | 
 5 | hotknots = "((((((....(((((((......[[[[[..))))))).....(((((....{{{{{..)))))...]]]]]..))))))..((((.(((((((...))))))).))))...}}}}}..."
 6 | ipknot = "[[[[[[....[[[..........(((((((((((]]].((((.(((.((......)).))).)))).......]]]]]].......(((((((...)))))))..))))))).)))).."
 7 | knotty = "(((..[[[[[))).]].]]]...[[[[[[[[[[[[...[[[[.[[[.[[......]].]]].]]]]((.(((....))).))....(((((((...))))))).]]]]]]]].]]]].."
 8 | spotrna = ".(((((((....(((((...((.....[[.)))))))]](((.((((((((....)))).)))))))......))))))).......((((((((.))))))))(.........)...."
 9 | e2efold = "......(............((...(.......(.............)..........)).)........(.(......(.(.(.([...).]).)...)....).).....)......."
10 | pknots = ".(((.......))).((.....))(((((((((((((((((((.((((((......)).))))))))...(((....))).)))...((((((.....)))))).)))))))).))))."
11 | empty = "." * len(samiv_struct)
12 | 
13 | 
14 | def test_is_pk():
15 |     assert(is_PK(samiv_struct))
16 |     assert(not is_PK(pknots))
17 | 
18 | 
19 | def test_compare_struct():
20 |     assert(compare_structure_to_native(hotknots, samiv_struct, metric="PPV") == 0.8205128205128205)
21 |     assert(compare_structure_to_native(hotknots, samiv_struct, metric="sensitivity") == 0.8)
22 |     assert(compare_structure_to_native(hotknots, samiv_struct, metric="F1_score") == 0.810126582278481)
23 |     assert(compare_structure_to_native(hotknots, samiv_struct, metric="all")["F1_score"] == 0.810126582278481)
24 |     assert(compare_structure_to_native(empty, samiv_struct, metric="all")["F1_score"] == 0)
25 |     assert(compare_structure_to_native(hotknots, samiv_struct, metric="all", PK_involved=True)["F1_score"] == 0.7796610169491526)
26 |     assert(compare_structure_to_native(hotknots, samiv_struct, metric="all", PK_involved=False)["F1_score"] == 0.9)
27 | 
28 | 
29 | def test_compare_structs():
30 |     assert(0.4266666666666667 == compare_structures_to_natives([hotknots, spotrna], [samiv_struct, samiv_struct], comparison="basepairs")['PPV'])
31 |     assert(1.0 == compare_structures_to_natives([hotknots, spotrna], [samiv_struct, samiv_struct], comparison="is_PK")["F1_score"])
32 |     assert(1.0 == compare_structures_to_natives([hotknots, spotrna, pknots, empty], [samiv_struct, samiv_struct, samiv_struct, samiv_struct], comparison="is_PK", metric="PPV"))
33 |     assert(0.5 == compare_structures_to_natives([hotknots, spotrna, pknots, empty], [samiv_struct, samiv_struct, samiv_struct, samiv_struct], comparison="is_PK", metric="sensitivity"))
34 |     assert(0.6666666666666666 == compare_structures_to_natives([hotknots, spotrna, pknots, empty], [samiv_struct, samiv_struct, samiv_struct, samiv_struct], comparison="is_PK", metric="F1_score"))
35 |     assert(0.25 == compare_structures_to_natives([hotknots, spotrna, pknots, empty], [samiv_struct, samiv_struct, samiv_struct, samiv_struct], comparison="non_PK_basepairs")["sensitivity"])
36 |     assert(compare_structures_to_natives([hotknots, spotrna, pknots, empty], [samiv_struct, samiv_struct, samiv_struct, samiv_struct], comparison="PK_basepairs")["F1_score"] == 0.28571428571428575)
37 | 
38 | 
39 | if __name__ == '__main__':
40 |     test_is_pk()
41 |     test_compare_struct()
42 |     test_compare_structs()
43 | 


--------------------------------------------------------------------------------
/tests/test_file_readers.py:
--------------------------------------------------------------------------------
 1 | from arnie.utils import *
 2 | 
 3 | bp_list = [[1, 53], [2, 52], [3, 51], [4, 50]]
 4 | bpseq_file = "test_files/seq.bpseq"
 5 | ct_file = "test_files/seq.ct"
 6 | prob = [[0.000000000000000000e+00, 0.000000000000000000e+00, 1.763635280966738210e-08, 3.609733185304499478e-10, 1.899559890269620151e-08, 2.357239330957941875e-08, 4.468964148624955998e-08, 1.829342375496644303e-07, 4.127057055650768230e-06, 6.457873937490533129e-06, 5.384244594784908830e-06, 2.036780995975695890e-06, 8.030412357417303880e-05, 9.388004334808641715e-01],
 7 |         [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 7.471370797708187664e-11, 1.984224746020934790e-09, 1.454165406835343696e-07, 7.498508237767878982e-07, 3.865106868873978322e-06, 3.536102881585034987e-05, 2.394675941956625451e-05, 1.153705070502445834e-05, 2.612278010790368161e-04, 9.977645040109642816e-01, 3.237492435080424613e-02],
 8 |         [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 1.888956084283119840e-07, 5.413485565351087927e-07, 1.541276902667509429e-06, 1.126288121100931219e-05, 1.173869736654523994e-04, 7.325411664786217837e-06, 4.003005106708449522e-05, 9.983489958892994842e-01, 6.129986080705812287e-03, 1.054258712935700630e-04],
 9 |         [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 1.665249372829090312e-07, 9.497187209883291010e-06, 1.414039620915108573e-04, 3.243636730206535555e-05, 1.567102777497324565e-05, 9.975833150208464062e-01, 5.034934175522413902e-03, 4.125712961733680345e-04, 2.274785721191600433e-07],
10 |         [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 1.738593674907211730e-05, 8.545859784264292502e-05, 3.527927141337322309e-04, 9.980848491909390940e-01, 5.202587741165264415e-03, 1.251151206947209791e-04, 8.513900403421354130e-05, 2.669391452879514996e-07],
11 |         [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 4.533978023581152796e-03, 6.727278019311770663e-02, 6.385230293170572440e-04, 1.043096878283355517e-05, 5.352860417885025332e-05, 2.197980140741698389e-06, 7.290393039203351602e-07],
12 |         [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 2.247207617456368913e-06, 1.250804117783563030e-06, 1.540204172157810605e-05, 6.227562675838443224e-05, 4.136123245159868602e-06, 1.480250050762932428e-06],
13 |         [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 5.317539514120537016e-08, 1.466432823612201115e-06, 4.843728467860043972e-05, 9.024276054982824927e-06, 5.084605728081093467e-07],
14 |         [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 1.142360160134971489e-07, 2.745012688262551338e-05, 2.086567406564575274e-05, 2.143480176383162954e-06],
15 |         [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 1.219164078076087741e-06, 1.118464018225010500e-05, 7.100719490463872321e-06],
16 |         [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 4.536954583383731369e-06, 9.361786362639244079e-06],
17 |         [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 1.650802739207468036e-06],
18 |         [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00],
19 |         [0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00, 0.000000000000000000e+00]]
20 | prob_file = "test_files/seq.prob"
21 | 
22 | 
23 | def test_file_converters():
24 |     assert(bpseq_to_bp_list(bpseq_file, header_length=1) == bp_list)
25 |     assert(ct_to_bp_list(ct_file, header_length=2) == bp_list)
26 |     assert(prob_to_bpp(prob_file).tolist() == prob)
27 |     assert(bpseq_to_bp_list(bpseq_file, header_length=3) != bp_list)
28 |     assert(ct_to_bp_list(ct_file, header_length=4) != bp_list)
29 | 
30 | if __name__ == '__main__':
31 |     test_file_converters()
32 | 


--------------------------------------------------------------------------------
/tests/test_files/seq.bpseq:
--------------------------------------------------------------------------------
 1 | #seq
 2 | 1 A 0
 3 | 2 C 54
 4 | 3 A 53
 5 | 4 G 52
 6 | 5 C 51
 7 | 6 U 0
 8 | 7 A 0
 9 | 8 C 0
10 | 9 G 0
11 | 10 U 0
12 | 11 C 0
13 | 12 A 0
14 | 13 G 0
15 | 14 U 0
16 | 15 G 0
17 | 16 C 0
18 | 17 A 0
19 | 18 G 0
20 | 19 U 0
21 | 20 A 0
22 | 21 C 0
23 | 22 G 0
24 | 23 G 0
25 | 24 G 0
26 | 25 C 0
27 | 26 C 0
28 | 27 C 0
29 | 28 C 0
30 | 29 C 0
31 | 30 C 0
32 | 31 C 0
33 | 32 C 0
34 | 33 C 0
35 | 34 C 0
36 | 35 C 0
37 | 36 U 0
38 | 37 U 0
39 | 38 U 0
40 | 39 U 0
41 | 40 U 0
42 | 41 U 0
43 | 42 U 0
44 | 43 A 0
45 | 44 C 0
46 | 45 G 0
47 | 46 U 0
48 | 47 C 0
49 | 48 G 0
50 | 49 A 0
51 | 50 U 0
52 | 51 G 5
53 | 52 C 4
54 | 53 U 3
55 | 54 G 2
56 | 


--------------------------------------------------------------------------------
/tests/test_files/seq.ct:
--------------------------------------------------------------------------------
 1 | 54		seq		SPOT-RNA output
 2 | 
 3 | 1		A		0		2		0		1
 4 | 2		C		1		3		54		2
 5 | 3		A		2		4		53		3
 6 | 4		G		3		5		52		4
 7 | 5		C		4		6		51		5
 8 | 6		U		5		7		0		6
 9 | 7		A		6		8		0		7
10 | 8		C		7		9		0		8
11 | 9		G		8		10		0		9
12 | 10		U		9		11		0		10
13 | 11		C		10		12		0		11
14 | 12		A		11		13		0		12
15 | 13		G		12		14		0		13
16 | 14		U		13		15		0		14
17 | 15		G		14		16		0		15
18 | 16		C		15		17		0		16
19 | 17		A		16		18		0		17
20 | 18		G		17		19		0		18
21 | 19		U		18		20		0		19
22 | 20		A		19		21		0		20
23 | 21		C		20		22		0		21
24 | 22		G		21		23		0		22
25 | 23		G		22		24		0		23
26 | 24		G		23		25		0		24
27 | 25		C		24		26		0		25
28 | 26		C		25		27		0		26
29 | 27		C		26		28		0		27
30 | 28		C		27		29		0		28
31 | 29		C		28		30		0		29
32 | 30		C		29		31		0		30
33 | 31		C		30		32		0		31
34 | 32		C		31		33		0		32
35 | 33		C		32		34		0		33
36 | 34		C		33		35		0		34
37 | 35		C		34		36		0		35
38 | 36		U		35		37		0		36
39 | 37		U		36		38		0		37
40 | 38		U		37		39		0		38
41 | 39		U		38		40		0		39
42 | 40		U		39		41		0		40
43 | 41		U		40		42		0		41
44 | 42		U		41		43		0		42
45 | 43		A		42		44		0		43
46 | 44		C		43		45		0		44
47 | 45		G		44		46		0		45
48 | 46		U		45		47		0		46
49 | 47		C		46		48		0		47
50 | 48		G		47		49		0		48
51 | 49		A		48		50		0		49
52 | 50		U		49		51		0		50
53 | 51		G		50		52		5		51
54 | 52		C		51		53		4		52
55 | 53		U		52		54		3		53
56 | 54		G		53		0		2		54
57 | 


--------------------------------------------------------------------------------
/tests/test_files/seq.prob:
--------------------------------------------------------------------------------
 1 | 0.000000000000000000e+00	0.000000000000000000e+00	1.763635280966738210e-08	3.609733185304499478e-10	1.899559890269620151e-08	2.357239330957941875e-08	4.468964148624955998e-08	1.829342375496644303e-07	4.127057055650768230e-06	6.457873937490533129e-06	5.384244594784908830e-06	2.036780995975695890e-06	8.030412357417303880e-05	9.388004334808641715e-01
 2 | 0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	7.471370797708187664e-11	1.984224746020934790e-09	1.454165406835343696e-07	7.498508237767878982e-07	3.865106868873978322e-06	3.536102881585034987e-05	2.394675941956625451e-05	1.153705070502445834e-05	2.612278010790368161e-04	9.977645040109642816e-01	3.237492435080424613e-02
 3 | 0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	1.888956084283119840e-07	5.413485565351087927e-07	1.541276902667509429e-06	1.126288121100931219e-05	1.173869736654523994e-04	7.325411664786217837e-06	4.003005106708449522e-05	9.983489958892994842e-01	6.129986080705812287e-03	1.054258712935700630e-04
 4 | 0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	1.665249372829090312e-07	9.497187209883291010e-06	1.414039620915108573e-04	3.243636730206535555e-05	1.567102777497324565e-05	9.975833150208464062e-01	5.034934175522413902e-03	4.125712961733680345e-04	2.274785721191600433e-07
 5 | 0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	1.738593674907211730e-05	8.545859784264292502e-05	3.527927141337322309e-04	9.980848491909390940e-01	5.202587741165264415e-03	1.251151206947209791e-04	8.513900403421354130e-05	2.669391452879514996e-07
 6 | 0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	4.533978023581152796e-03	6.727278019311770663e-02	6.385230293170572440e-04	1.043096878283355517e-05	5.352860417885025332e-05	2.197980140741698389e-06	7.290393039203351602e-07
 7 | 0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	2.247207617456368913e-06	1.250804117783563030e-06	1.540204172157810605e-05	6.227562675838443224e-05	4.136123245159868602e-06	1.480250050762932428e-06
 8 | 0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	5.317539514120537016e-08	1.466432823612201115e-06	4.843728467860043972e-05	9.024276054982824927e-06	5.084605728081093467e-07
 9 | 0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	1.142360160134971489e-07	2.745012688262551338e-05	2.086567406564575274e-05	2.143480176383162954e-06
10 | 0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	1.219164078076087741e-06	1.118464018225010500e-05	7.100719490463872321e-06
11 | 0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	4.536954583383731369e-06	9.361786362639244079e-06
12 | 0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	1.650802739207468036e-06
13 | 0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00
14 | 0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00	0.000000000000000000e+00
15 | 


--------------------------------------------------------------------------------
/tests/test_helix_getting_and_removing.py:
--------------------------------------------------------------------------------
 1 | from arnie.utils import *
 2 | 
 3 | 
 4 | s = "(((....)).)...(..)....(((..(((....))))))"
 5 | s_0_2 = ".((....)).............(((..(((....))))))"
 6 | s_0_3 = "......................(((..(((....))))))"
 7 | s_1_3 = "(((....)).)...........(((..(((....))))))"
 8 | s_2_3 = "(((....)).)...........(((..(((....))))))"
 9 | s_0_4 = "........................................"
10 | s_1_4 = "........................................"
11 | s_2_4 = "......................(((..(((....))))))"
12 | s_1_2 = "(((....)).)...........(((..(((....))))))"
13 | 
14 | s_all_helices = [[[0, 10]],
15 |                  [[1, 8], [2, 7]],
16 |                  [[14, 17]],
17 |                  [[22, 39], [23, 38], [24, 37]],
18 |                  [[27, 36], [28, 35], [29, 34]]]
19 | s_1_helices = [[[0, 10], [1, 8], [2, 7]],
20 |                [[14, 17]],
21 |                [[22, 39], [23, 38], [24, 37]],
22 |                [[27, 36], [28, 35], [29, 34]]]
23 | s_2_helices = [[[0, 10], [1, 8], [2, 7]],
24 |                [[14, 17]],
25 |                [[22, 39], [23, 38], [24, 37], [27, 36], [28, 35], [29, 34]]]
26 | 
27 | 
28 | pk = "(((.((([..[[..))))((...)){...]]]...)})"
29 | pk_0_2 = "....(((...[[..))).((...))....]]......."
30 | pk_0_3 = "....(((.......)))....................."
31 | pk_1_3 = "..(.(((.......))))...................."
32 | pk_2_3 = "..(.((([..[[..))))...........]]]......"
33 | pk_0_4 = "......................................"
34 | pk_1_4 = "..(.(((.......))))...................."
35 | pk_2_4 = "..(.(((.......))))...................."
36 | pk_1_2 = "(((.(((...[[..))))((...))....]]....).)"
37 | 
38 | pk_all_helices = [[[0, 37]],
39 |                   [[1, 35]],
40 |                   [[2, 17]],
41 |                   [[4, 16], [5, 15], [6, 14]],
42 |                   [[7, 31]],
43 |                   [[10, 30], [11, 29]],
44 |                   [[18, 24], [19, 23]],
45 |                   [[25, 36]]]
46 | pk_1_helices = [[[0, 37], [1, 35]],
47 |                 [[2, 17], [4, 16], [5, 15], [6, 14]],
48 |                 [[7, 31]],
49 |                 [[10, 30], [11, 29]],
50 |                 [[18, 24], [19, 23]],
51 |                 [[25, 36]]]
52 | pk_2_helices = [[[0, 37], [1, 35]],
53 |                 [[2, 17], [4, 16], [5, 15], [6, 14]],
54 |                 [[7, 31], [10, 30], [11, 29]],
55 |                 [[18, 24], [19, 23]],
56 |                 [[25, 36]]]
57 | 
58 | 
59 | def test_getting_helix():
60 |     assert(get_helices(s, allowed_buldge_len=0) == s_all_helices)
61 |     assert(get_helices(pk, allowed_buldge_len=0) == pk_all_helices)
62 |     assert(get_helices(s, allowed_buldge_len=1) == s_1_helices)
63 |     assert(get_helices(pk, allowed_buldge_len=1) == pk_1_helices)
64 |     assert(get_helices(s, allowed_buldge_len=2) == s_2_helices)
65 |     assert(get_helices(pk, allowed_buldge_len=2) == pk_2_helices)
66 | 
67 | 
68 | def test_removing_helix():
69 |     assert(post_process_struct(s, allowed_buldge_len=0, min_len_helix=1) == s)
70 |     # note PKs may swap around their bracket types so fairest to compare bp_list always!
71 |     assert(convert_dotbracket_to_bp_list(post_process_struct(pk, allowed_buldge_len=0, min_len_helix=1), len(pk)) == convert_dotbracket_to_bp_list(pk, len(pk)))
72 |     assert(post_process_struct(s, allowed_buldge_len=0, min_len_helix=2) == s_0_2)
73 |     assert(post_process_struct(pk, allowed_buldge_len=0, min_len_helix=2) == pk_0_2)
74 |     assert(post_process_struct(s, allowed_buldge_len=0, min_len_helix=3) == s_0_3)
75 |     assert(post_process_struct(pk, allowed_buldge_len=0, min_len_helix=3) == pk_0_3)
76 |     assert(post_process_struct(s, allowed_buldge_len=1, min_len_helix=3) == s_1_3)
77 |     assert(post_process_struct(pk, allowed_buldge_len=1, min_len_helix=3) == pk_1_3)
78 |     assert(post_process_struct(s, allowed_buldge_len=2, min_len_helix=3) == s_2_3)
79 |     assert(post_process_struct(pk, allowed_buldge_len=2, min_len_helix=3) == pk_2_3)
80 |     assert(post_process_struct(s, allowed_buldge_len=0, min_len_helix=4) == s_0_4)
81 |     assert(post_process_struct(pk, allowed_buldge_len=0, min_len_helix=4) == pk_0_4)
82 |     assert(post_process_struct(s, allowed_buldge_len=1, min_len_helix=4) == s_1_4)
83 |     assert(post_process_struct(pk, allowed_buldge_len=1, min_len_helix=4) == pk_1_4)
84 |     assert(post_process_struct(s, allowed_buldge_len=2, min_len_helix=4) == s_2_4)
85 |     assert(post_process_struct(pk, allowed_buldge_len=2, min_len_helix=4) == pk_2_4)
86 |     assert(post_process_struct(pk, allowed_buldge_len=1, min_len_helix=2) == pk_1_2)
87 |     assert(post_process_struct(s, allowed_buldge_len=1, min_len_helix=2) == s_1_2)
88 | 
89 | if __name__ == '__main__':
90 |     test_getting_helix()
91 |     test_removing_helix()
92 | 


--------------------------------------------------------------------------------
/tests/test_linearpartition.py:
--------------------------------------------------------------------------------
 1 | from arnie.free_energy import free_energy
 2 | from arnie.mfe import mfe
 3 | 
 4 | seq = 'CGCUGUCUGUACUUGUAUCAGUACACUGACGAGUCCCUAAAGGACGAAACAGCG'
 5 | dG = free_energy(seq, linear=True, DEBUG=True)
 6 | print(dG)
 7 | 
 8 | dG = free_energy(seq, linear=True, package='contrafold', DEBUG=True)
 9 | print(dG)
10 | 
11 | dG = free_energy(seq, linear=True, package='eternafold', DEBUG=True)
12 | print(dG)
13 | 
14 | struct = mfe(seq, linear=True)
15 | print(struct)
16 | struct = mfe(seq, linear=True, package='contrafold')
17 | print(struct)
18 | struct = mfe(seq, linear=True, package='eternafold')
19 | print(struct)
20 | 
21 | 


--------------------------------------------------------------------------------
/tests/test_pfunc.py:
--------------------------------------------------------------------------------
 1 | from arnie.pfunc import pfunc
 2 | from arnie.utils import load_package_locations
 3 | 
 4 | sample_seq = 'CGCUGUCUGUACUUGUAUCAGUACACUGACGAGUCCCUAAAGGACGAAACAGCG'
 5 | 
 6 | 
 7 | def test_pfunc(package):
 8 | 
 9 |     Z = pfunc(sample_seq, package=package)
10 |     print('test %s' % package, Z)
11 |     return
12 | 
13 | 
14 | if __name__ == '__main__':
15 |     package_locs = load_package_locations()
16 |     for pkg in sorted(package_locs.keys()):
17 | 
18 |         if (pkg == 'TMP') or (
19 |             pkg.startswith('linear')) or (
20 |             pkg in ['hotknots', 'ipknot', 'knotty', 'pknots', 'spotrna',
21 |                     'spotrna_conda_env', 'e2efold', 'e2efold_conda_env',
22 |                     'spotrna2']):
23 |             print(f'{pkg} not tested.')
24 |             continue
25 |         print(pkg)
26 |         test_pfunc(pkg.lower())
27 | 
28 | 


--------------------------------------------------------------------------------
/tests/test_pk.py:
--------------------------------------------------------------------------------
 1 | from arnie.pk_predictors import pk_predict, pk_predict_from_bpp
 2 | from arnie.utils import prob_to_bpp, load_package_locations
 3 | import numpy as np
 4 | 
 5 | # TODO e2efold is stochastic?
 6 | # TODO spotrna2 add in?
 7 | 
 8 | samiv_seq = "GGUCAUGAGUGCCAGCGUCAAGCCCCGGCUUGCUGGCCGGCAACCCUCCAACCGCGGUGGGGUGCCCCGGGUGAUGACCAGGUUGAGUAGCCGUGACGGCUACGCGGCAAGCGCGGGUC"
 9 | samiv_struct = "((((....(.((((((....((.[[[[[)).)))))))(((..((((((..{{{{)).)))).)))]]]]]....))))..((((.(((((.......))))).))))....}}}}..."
10 | 
11 | pk_res = {"hotknots": "((((((....(((((((......[[[[[..))))))).....(((((....{{{{{..)))))...]]]]]..))))))..((((.(((((((...))))))).))))...}}}}}...",
12 |           "ipknot": "[[[[[[....[[[..........(((((((((((]]].((((.(((.((......)).))).)))).......]]]]]].......(((((((...)))))))..))))))).))))..",
13 |           "knotty": "(((..[[[[[))).]].]]]...((((((((((((...((((.(((.((......)).))).))))((.(((....))).))....(((((((...))))))).)))))))).))))..",
14 |           "spotrna": "(((((((....(((((...((.....[[.)))))))]](((.((((((((....)))).)))))))......))))))).......((((((((.))))))))(.........).....",
15 |           "e2efold": ".....(............((...(.......(.............)..........)).)........(.(......(.(.(.([...).]).)...)....).).....)........",
16 |           "pknots": "(((.......))).((.....))(((((((((((((((((((.((((((......)).))))))))...(((....))).)))...((((((.....)))))).)))))))).)))).."}
17 | 
18 | # threshknot_theta_maxIter_buldge_helix
19 | # hungarian_theta_buldge_helix_exp_sig_0p_1p_ln_unpaired
20 | bpp_heuristics = {"threshknot_0.1_1_0_1": "...((......(...........[[[[[[[[[[[[)..((((.(((.((....())).))).))))[[.[[[..))]]].]]....(((((((...))))))).]]]]]]]].]]]]..",
21 |                   "threshknot_0.4_1_0_1": "...........................(((((((....(((......(........)......)))....................(((((((...)))))))..))))))).......",
22 |                   "threshknot_0.9_1_0_1": ".......................................................................................................................",
23 |                   "threshknot_0.1_1_0_3": ".......................((((((((((((...((((.(((............))).))))...(((....))).......(((((((...))))))).)))))))).))))..",
24 |                   "threshknot_0.1_5_0_1": "(..[[.....[[)........(.((((((((((((]].((((.(((.((....())).))).))))((.(((..]]))).))....(((((((...))))))).)))))))).)))).)",
25 |                   "hungarian_0.3_0_1_1_None_0.1_0.9_False_True": ".......................(((((((((((....(((..(((.((......)).)))..)))....................(((((((...)))))))..))))))).))))..",
26 |                   "hungarian_0.3_0_2_4_None_0.1_0.9_False_True": ".......................(((((((((((....((((.(((.((......)).))).))))....................(((((((...)))))))..))))))).))))..",
27 |                   "hungarian_0.3_0_1_1_None_0_1_False_False": ".......................(((((((((((....((((.(((.((......)).))).))))....................(((((((...)))))))..))))))).))))..",
28 |                   "hungarian_0.3_0_1_1_3_0.1_0.9_False_True": ".......................(((((((((((....(((..(((.((......)).)))..)))....................(((((((...)))))))..))))))).))))..",
29 |                   "hungarian_0.8_0_1_1_None_0.1_0.9_False_True": "......................................................................................((((((.....))))))................"}
30 | 
31 | 
32 | def test_pk(pkg):
33 |     print("Testing", pkg)
34 |     pred = pk_predict(samiv_seq, pkg)
35 | 
36 |     assert(pred == pk_res[pkg])
37 | 
38 | 
39 | # def bpps and output expected
40 | bpp_file = "test_files/samiv_eternafold.prob"
41 | bpp = prob_to_bpp(bpp_file)
42 | 
43 | 
44 | def test_pk_from_bpp():
45 |     print("Testing threshknot")
46 |     assert(bpp_heuristics["threshknot_0.1_1_0_1"] == pk_predict_from_bpp(bpp, heuristic="threshknot", theta=0.1, max_iter=1, allowed_buldge_len=0, min_len_helix=1))
47 |     assert(bpp_heuristics["threshknot_0.4_1_0_1"] == pk_predict_from_bpp(bpp, heuristic="threshknot", theta=0.4, max_iter=1, allowed_buldge_len=0, min_len_helix=1))
48 |     assert(bpp_heuristics["threshknot_0.9_1_0_1"] == pk_predict_from_bpp(bpp, heuristic="threshknot", theta=0.9, max_iter=1, allowed_buldge_len=0, min_len_helix=1))
49 |     assert(bpp_heuristics["threshknot_0.1_1_0_3"] == pk_predict_from_bpp(bpp, heuristic="threshknot", theta=0.1, max_iter=1, allowed_buldge_len=0, min_len_helix=3))
50 |     assert(bpp_heuristics["threshknot_0.1_5_0_1"] == pk_predict_from_bpp(bpp, heuristic="threshknot", theta=0.1, max_iter=5, allowed_buldge_len=0, min_len_helix=1))
51 |     print("Testing hungarian")
52 |     assert(bpp_heuristics["hungarian_0.3_0_1_1_None_0.1_0.9_False_True"] == pk_predict_from_bpp(bpp, heuristic="hungarian", theta=0.3, allowed_buldge_len=0, min_len_helix=1,
53 |                                                                                                 exp=1, sigmoid_slope_factor=None, prob_to_0_threshold_prior=0.1, prob_to_1_threshold_prior=0.9, ln=False, add_p_unpaired=True))
54 |     assert(bpp_heuristics["hungarian_0.3_0_2_4_None_0.1_0.9_False_True"] == pk_predict_from_bpp(bpp, heuristic="hungarian", theta=0.3, allowed_buldge_len=2, min_len_helix=4,
55 |                                                                                                 exp=1, sigmoid_slope_factor=None, prob_to_0_threshold_prior=0.1, prob_to_1_threshold_prior=0.9, ln=False, add_p_unpaired=True))
56 |     assert(bpp_heuristics["hungarian_0.3_0_1_1_None_0_1_False_False"] == pk_predict_from_bpp(bpp, heuristic="hungarian", theta=0.3, allowed_buldge_len=0, min_len_helix=1,
57 |                                                                                              exp=1, sigmoid_slope_factor=None, prob_to_0_threshold_prior=0, prob_to_1_threshold_prior=1, ln=False, add_p_unpaired=False))
58 |     assert(bpp_heuristics["hungarian_0.3_0_1_1_3_0.1_0.9_False_True"] == pk_predict_from_bpp(bpp, heuristic="hungarian", theta=0.3, allowed_buldge_len=0, min_len_helix=1,
59 |                                                                                              exp=1, sigmoid_slope_factor=3, prob_to_0_threshold_prior=0.1, prob_to_1_threshold_prior=0.9, ln=False, add_p_unpaired=True))
60 |     assert(bpp_heuristics["hungarian_0.8_0_1_1_None_0.1_0.9_False_True"] == pk_predict_from_bpp(bpp, heuristic="hungarian", theta=0.8, allowed_buldge_len=0, min_len_helix=1,
61 |                                                                                                 exp=1, sigmoid_slope_factor=None, prob_to_0_threshold_prior=0.1, prob_to_1_threshold_prior=0.9, ln=False, add_p_unpaired=True))
62 | 
63 | 
64 | if __name__ == '__main__':
65 |     package_locs = load_package_locations()
66 |     pk_predictors = ["spotrna", "e2efold", "hotknots", "ipknot", "knotty", "pknots"]
67 |     for pkg in pk_predictors:
68 |         if pkg not in package_locs:
69 |             print("Warning:", pkg, "is not found in the ARNIEFILE, not testing.")
70 |         else:
71 |             test_pk(pkg)
72 |     test_pk_from_bpp()
73 | 


--------------------------------------------------------------------------------
/tests/test_sample_struct.py:
--------------------------------------------------------------------------------
 1 | from arnie.sample_structures import sample_structures
 2 | 
 3 | sample_seq = 'GGGGAAAACCCC'
 4 | 
 5 | 
 6 | def test_sample_seq():
 7 | 
 8 |     struct_list = sample_structures(
 9 |         sample_seq, n_samples=10, package='vienna_2')
10 |     # sample structures no longer returns energy or prob?
11 |     # print(ener_list) # , ener_list, prob_list
12 |     # print(prob_list)
13 |     return
14 | 
15 | 
16 | if __name__ == '__main__':
17 |     test_sample_seq()
18 |     # test_pkg_w_bpps(pkg.lower())
19 | 


--------------------------------------------------------------------------------
/tests/test_settings.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from arnie.utils import load_package_locations
 3 | 
 4 | 
 5 | def test_settings():
 6 |     package_locs = load_package_locations()
 7 |     for k in package_locs.keys():
 8 |         print(k)
 9 |         assert os.path.isdir(package_locs[k])
10 |     return
11 | 
12 | 
13 | if __name__ == '__main__':
14 |     test_settings()
15 | 


--------------------------------------------------------------------------------
/tests/test_structure_handling.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from arnie import utils
  3 | 
  4 | success_cases = [
  5 |   ["......", False],
  6 |   ["......", True],
  7 |   ["(((((......)))))", False],
  8 |   ["(((((......)))))", True],
  9 |   ["[[[[[......]]]]]", False],
 10 |   ["{{{{{......}}}}}", False],
 11 |   ["<<<<<......>>>>>", False],
 12 |   ["((((((((...........)).))))))", False],
 13 |   ["((((((((...........)).))))))", True],
 14 |   ["(((.((((((((..((((.(((((....)).)))..))))..))))((((...))))))))...)))", False],
 15 |   ["(((.((((((((..((((.(((((....)).)))..))))..))))((((...))))))))...)))", True],
 16 |   [".(((((((((.((((....))))....((((....))))..))))..(((((......)))))...(((.((.(((((((((((((...((((..((((.....))))...)))).))))))))))))))).)))..)))))..", False],
 17 |   [".(((((((((.((((....))))....((((....))))..))))..(((((......)))))...(((.((.(((((((((((((...((((..((((.....))))...)))).))))))))))))))).)))..)))))..", True],
 18 |   [".....((((((.....))))))....(((((([[[[[[[[[[[........))))))]]]]]]]]]]]........(((((((....))))))).....................", True],
 19 |   ["(((.[[[.(((...))).]]].)))", True],
 20 |   ["(((..[[[.(((...))))))]]]", True],
 21 |   ["([{<a.aaa....)]}>AAA.A", True],
 22 |   [".....[[[[[[.....]]]]]]....[[[[[[[[[[[....]]]]]]]]]]].......(((((((((((.[[[[[)))))))))))[[[[[[[[[[[[....]]]]].]]]]]]]....]]]]].[[[[[[[[[[[[[....]]]]]]]]]]]]].....................", True],
 23 |   [".....((((((.....))))))....(((((((((((....)))))))))))........((((((((((.<<<<<))))))))))(((((((((((((....))))).))))))))...>>>>>.(((((((((((((....))))))))))))).....................", True],
 24 |   ["((.(.(.((.(.....).)).)....(((((((((((....)))))))))))......)((((((((((...{{{{.)))))))))).(((((((((((....))))).))))))...)[}}}}..[[[[[[[[[[[[[)...]]]]]]]]]]]]]...].................", True],
 25 |   ["(((((((([.{)][..((.{]).)(.}).})))))))", True],
 26 |   ["-(((((..(((((....)))))((((((((.((...-----((((((..(((((((..)))))))(((((({..[[[[[[[)))))))))))..).))).))))))))))))}.]]]]]]]...", True],
 27 |   ["((((({<A[[[....))))).......}>]a]]", True],
 28 |   [".....(.((.(.....).)).)....(.((..({(<.{.a.a{........(..(((((.(.[....).)))).)...)).).}).((.(}...}...)>)).A.A...).)..............(((((((((((.(....).)))))))))))............]........", True],
 29 |   ["..((...((.........))......(((((((....))))))).(((((((....))))))).(((((((....))))))).(((((((((....)))))))))..(((((((....)))))))..(((((((....)))))))...[[[[[[[......[[[[[[[[......[[.....)).....((..........]]......]]]]]]]]......................))..............]]]]]]].....(((((((....)))))))......................", True],
 30 |   ["((((((((((((((((....)))))))((((.(((((((((((......)))))[[[[[[.)))))).))))(((((((((((((.((.(.....(......(((.(((((((((((.((((.(((((..]]]]]].))))).)))).))(((((((....)))))))(((((((((....)))))))(((((((....)))))))(((.(.((((((((((......)))))[[[[[[.))))).).)))))))))))))).)))..............)..).)).)))))))))))(((((((.(((((..]]]]]].))))).)))))))(((((((....))))))))))))))))))", True],
 31 |   ["(((....))) (((....)))", False],
 32 |   ["(((....))) (((....)))", True],
 33 | ]
 34 | 
 35 | success_expected_output = [
 36 |   "......",
 37 |   "......",
 38 |   "(((((......)))))",
 39 |   "(((((......)))))",
 40 |   "(((((......)))))",
 41 |   "(((((......)))))",
 42 |   "(((((......)))))",
 43 |   "((((((((...........)).))))))",
 44 |   "((((((((...........)).))))))",
 45 |   "(((.((((((((..((((.(((((....)).)))..))))..))))((((...))))))))...)))",
 46 |   "(((.((((((((..((((.(((((....)).)))..))))..))))((((...))))))))...)))",
 47 |   ".(((((((((.((((....))))....((((....))))..))))..(((((......)))))...(((.((.(((((((((((((...((((..((((.....))))...)))).))))))))))))))).)))..)))))..",
 48 |   ".(((((((((.((((....))))....((((....))))..))))..(((((......)))))...(((.((.(((((((((((((...((((..((((.....))))...)))).))))))))))))))).)))..)))))..",
 49 |   ".....((((((.....))))))....(((((([[[[[[[[[[[........))))))]]]]]]]]]]]........(((((((....))))))).....................",
 50 |   "(((.(((.(((...))).))).)))",
 51 |   "(((..[[[.(((...))))))]]]",
 52 |   "([{<a.aaa....)]}>AAA.A",
 53 |   ".....((((((.....))))))....(((((((((((....))))))))))).......(((((((((((.[[[[[)))))))))))((((((((((((....))))).)))))))....]]]]].(((((((((((((....))))))))))))).....................",
 54 |   ".....((((((.....))))))....(((((((((((....)))))))))))........((((((((((.[[[[[))))))))))(((((((((((((....))))).))))))))...]]]]].(((((((((((((....))))))))))))).....................",
 55 |   "((.(.(.((.(.....).)).)....(((((((((((....)))))))))))......)((((((((((...{{{{.)))))))))).(((((((((((....))))).))))))...)[}}}}..[[[[[[[[[[[[[)...]]]]]]]]]]]]]...].................",
 56 |   "(((((((([.{)](..[[.{)].](.}).})))))))",
 57 |   ".(((((..(((((....)))))((((((((.((........((((((..(((((((..)))))))(((((([..{{{{{{{)))))))))))..).))).))))))))))))].}}}}}}}...",
 58 |   "((((([{<aa<....))))).......]}>>AA",
 59 |   ".....(.((.(.....).)).)....(.((..({(<.{.a.a{........(..(((((.(.[....).)))).)...)).).}).((.(}...}...)>)).A.A...).)..............(((((((((((.(....).)))))))))))............]........",
 60 |   "..((...((.........))......(((((((....))))))).(((((((....))))))).(((((((....))))))).(((((((((....)))))))))..(((((((....)))))))..(((((((....)))))))...[[[[[[[......[[[[[[[[......[[.....)).....((..........]]......]]]]]]]]......................))..............]]]]]]].....(((((((....)))))))......................",
 61 |   "((((((((((((((((....)))))))((((.(((((((((((......)))))[[[[[[.)))))).))))(((((((((((((.((.(.....(......(((.(((((((((((.((((.(((((..]]]]]].))))).)))).))(((((((....)))))))(((((((((....)))))))(((((((....)))))))(((.(.((((((((((......)))))[[[[[[.))))).).)))))))))))))).)))..............)..).)).)))))))))))(((((((.(((((..]]]]]].))))).)))))))(((((((....))))))))))))))))))",
 62 |   "(((....))).(((....)))",
 63 |   "(((....))).(((....)))",
 64 | ]
 65 | 
 66 | def test_structure_sanitization_success():
 67 | 
 68 |   for (i, case) in enumerate(success_cases):
 69 |     bp_list = utils.convert_dotbracket_to_bp_list(case[0], allow_pseudoknots=case[1])
 70 |     dbn = utils.convert_bp_list_to_dotbracket(bp_list, seq_len=len(case[0]))
 71 |     assert(dbn == success_expected_output[i])
 72 | 
 73 | failure_cases = [
 74 |   ["(((...))))", False],
 75 |   ["(((...))))", True],
 76 |   ["(((", False],
 77 |   ["(((", True],
 78 |   ["...)))", False],
 79 |   ["...)))", True],
 80 |   ["xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", False],
 81 |   ["xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", True],
 82 |   ["(((.[[[.(((...))).]]].)))", False],
 83 |   ["aaa.....AAA", False]
 84 | ]
 85 | failure_expected_output = [
 86 |   "Unbalanced parenthesis notation: found closing character ')'",
 87 |   "Unbalanced parenthesis notation: found closing character ')'",
 88 |   "Unbalanced parenthesis notation: found unclosed pair for character '('",
 89 |   "Unbalanced parenthesis notation: found unclosed pair for character '('",
 90 |   "Unbalanced parenthesis notation: found closing character ')'",
 91 |   "Unbalanced parenthesis notation: found closing character ')'",
 92 |   "Unexpected character 'x'; did you mean to pass allow_pseudoknots=True?",
 93 |   "Unbalanced parenthesis notation: found unclosed pair for character 'x'",
 94 |   "Mixed pair delimiters found: '[' and '('; did you mean to pass allow_pseudoknots=True?",
 95 |   "Unexpected character 'a'; did you mean to pass allow_pseudoknots=True?"
 96 | ]
 97 | 
 98 | def test_structure_sanitization_failure():
 99 |   for (i, case) in enumerate(failure_cases):
100 |     with pytest.raises(Exception) as exc_info:
101 |       bp_list = utils.convert_dotbracket_to_bp_list(case[0], allow_pseudoknots=case[1])
102 |     assert(str(exc_info.value) == failure_expected_output[i])


--------------------------------------------------------------------------------
/tests/test_vfold_versions.py:
--------------------------------------------------------------------------------
 1 | from arnie import pfunc
 2 | 
 3 | # 3MXH c-di-GMP riboswitch, has coaxial stacking
 4 | sample_seq = 'GGUCACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUACCGAUGG'
 5 | 
 6 | 
 7 | def test_pkg(package, coaxial=True):
 8 | 
 9 |     Z = pfunc.pfunc(sample_seq, package=package, bpps=False, coaxial=coaxial)
10 |     print('test %s' % package, Z)
11 |     return None
12 | 
13 | 
14 | if __name__ == '__main__':
15 |     for pkg in ['vfold_0', 'vfold_1']:
16 |         for coaxial in [True, False]:
17 |             print(pkg, "coaxial %d" % coaxial)
18 |             test_pkg(pkg, coaxial=coaxial)
19 | 


--------------------------------------------------------------------------------